def is_word_char(char): return char not in '".,:;\'@$%^&*/+-=<>[]{}' # function modifies freq_dict # adds frequencies of all trigrams from word def freq_bigrams(freq_dict, word): # last beginning of a bigram is the index len(word)-2 for i in range(0, len(word)-1): freq_dict[w[i:i+2]] = freq_dict.get(w[i:i+2], 0) + 1 f = open('alice.txt','r') freq = {} for line in f.readlines(): line = "".join(filter(is_word_char, line)) line = line.lower() word_list = line.split() # default delimiter is space " " for w in word_list: freq_bigrams(freq, w) f.close() k = 10 # we want output k most frequent keys_list = list(freq.keys()) keys_list.sort(key=freq.get, reverse=True) for w in keys_list[:k]: print(w, "\t", freq[w])