import re # Nacti soubor filename a vypis jeho prvni radek. def print_first_row(filename): pass # print_first_row("slovnik.txt") # poradni # Vypis prumernou delku vety (ve slovech) souboru filename. Za slovo povazujte cokoliv # oddelene mezerami. def print_avg_sentence_len(filename): pass # print_avg_sentence_len("sherlock-holmes.txt") # 8.564444270751915 # Vypis n nejcastejsich po sobe jdoucich dvojic slov (tzv. bigramu) # souboru filename. def print_n_most_bigrams(filename, n): pass # print_n_most_bigrams("sherlock-holmes.txt", 5) # ('of', 'the', 4220) # ('in', 'the', 2910) # ('to', 'the', 1753) # ('at', 'the', 1271) # ('that', 'I', 1240) # Nacti soubor in_filename a do souboru out_filename z nej vypis: # - slova s "oo" def output_oo_words(in_filename, out_filename): pass # - slova s "a" na 1. a posledni pozici def output_axxxa_words(in_filename, out_filename): pass # - slova obsahujici "a", "e", "i", "o" v tomto poradi (i nebezprostredne) def output_aeio_words(in_filename, out_filename): pass # - slova s ctverici z pismen "r", "s" a "t" def output_rst4_words(in_filename, out_filename): pass # Napoveda: Nepomohla by dekompozice? # print_oo_words("slovnik.txt", "results_oo.txt") # print_axxxa_words("slovnik.txt", "results_axxxa.txt") # print_aeio_words("slovnik.txt", "results_aeio.txt") # print_rst4_words("slovnik.txt", "results_rst4.txt") # Ze souboru "jmena.csv" (nacti s parametrem encoding="utf-8") vypis: # - jmeno s nejvetsim poctem vyskytu v roce 2002 def print_max_name_2002(): pass # - n jmen s nejvetsim poctem vyskytu v souctu vsech let def print_n_max_names(n): pass # print_max_name_1992() # print_n_max_names(8) # Napis funkci, ktera vypise n slov textu imitujiciho soubor filename # na zaklade frekvenci po sobe jdoucich dvojic slov (viz prednaska). def imitate_text(filename, n): pass # imitate_text("sherlock-holmes.txt", 150)