def preprocess_data(file): parsed_data = [] for line in file.readlines(): parsed_line = {} parts = line.split(",") parsed_line["publication_date"] = parts[0] parsed_line["title"] = ",".join(parts[1:]) parsed_data.append(parsed_line) return parsed_data processed_data = [] with open("./examiner-date-text.csv", "r") as data: processed_data = preprocess_data(data) word_counts = {} for item in processed_data[1:]: words = item["title"].split(" ") for word in words: if word.lower() in word_counts.keys(): word_counts[word.lower()] += 1 else: word_counts[word.lower()] = 1 sorted_counts = sorted(word_counts.items(), key=lambda item: item[1], reverse=True) for i in range(100): print(str(sorted_counts[i][0]) + " freq: " + str(sorted_counts[i][1]))'''