{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Text Analysis (cont'd)\n", "The aim of the notebook is to begin with quantitative analysis of text data. We select a Czech text, split it into tokens, perform frequency analysis, and observe the nature of the data." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[nltk_data] Downloading package punkt to /home/zuzana/nltk_data...\n", "[nltk_data] Package punkt is already up-to-date!\n" ] } ], "source": [ "import pandas as pd\n", "import nltk\n", "nltk.download('punkt')\n", "from nltk.tokenize import word_tokenize\n", "from collections import Counter\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "text = None\n", "with open('../01-DH/maj.txt') as f: # modify the path if needed\n", " text = f.read()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import requests\n", "import json" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = {\"call\": \"tagger\", \n", " \"lang\": \"cs\",\n", " \"output\": \"json\",\n", " \"text\": text.replace(';', ',')\n", " }\n", "uri = \"https://nlp.fi.muni.cz/languageservices/service.py\"\n", "r = requests.post(uri, params=data)\n", "r" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'vertical': [[''],\n", " ['Byl', 'být', 'k5eAaImAgInS'],\n", " ['pozdní', 'pozdní', 'k2eAgInSc1d1'],\n", " ['večer', 'večer', 'k1gInSc1'],\n", " ['–', '–', 'k?'],\n", " ['první', 'první', 'k4xOgInSc4'],\n", " ['máj', 'máj', 'k1gInSc1'],\n", " ['–', '–', 'k?'],\n", " ['večerní', 'večerní', 'k2eAgInSc4d1'],\n", " ['máj', 'máj', 'k1gInSc1'],\n", " ['–', '–', 'k?'],\n", " ['byl', 'být', 'k5eAaImAgInS'],\n", " ['lásky', 'láska', 'k1gFnSc2'],\n", " ['čas', 'čas', 'k1gInSc1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Hrdliččin', 'hrdliččin', 'k2eAgInSc1d1'],\n", " ['zval', 'zvát', 'k5eAaImAgInS'],\n", " ['ku', 'k', 'k7c3'],\n", " ['lásce', 'láska', 'k1gFnSc3'],\n", " ['hlas', 'hlas', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['kde', 'kde', 'k6eAd1'],\n", " ['borový', 'borový', 'k2eAgMnSc1d1'],\n", " ['zaváněl', 'zavánět', 'k5eAaImAgInS'],\n", " ['háj', 'háj', 'k1gInSc1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['O', 'o', 'k7c6'],\n", " ['lásce', 'láska', 'k1gFnSc6'],\n", " ['šeptal', 'šeptat', 'k5eAaImAgInS'],\n", " ['tichý', 'tichý', 'k2eAgInSc1d1'],\n", " ['mech', 'mech', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['květoucí', 'květoucí', 'k2eAgInSc1d1'],\n", " ['strom', 'strom', 'k1gInSc1'],\n", " ['lhal', 'lhát', 'k5eAaImAgInS'],\n", " ['lásky', 'láska', 'k1gFnPc4'],\n", " ['žel', 'želet', 'k5eAaImRp2nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['svou', 'svůj', 'k3xOyFgFnSc4'],\n", " ['lásku', 'láska', 'k1gFnSc4'],\n", " ['slavík', 'slavík', 'k1gMnSc1'],\n", " ['růži', 'růže', 'k1gFnSc4'],\n", " ['pěl', 'pět', 'k5eAaImAgMnS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['růžinu', 'růžina', 'k1gFnSc4'],\n", " ['jevil', 'jevit', 'k5eAaImAgInS'],\n", " ['vonný', 'vonný', 'k2eAgInSc1d1'],\n", " ['vzdech', 'vzdech', 'k1gInSc1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Jezero', 'jezero', 'k1gNnSc1'],\n", " ['hladké', 'hladký', 'k2eAgNnSc1d1'],\n", " ['v', 'v', 'k7c6'],\n", " ['křovích', 'křoev', 'k1gFnPc6'],\n", " ['stinných', 'stinný', 'k2eAgMnPc2d1'],\n", " ['zvučelo', 'zvučet', 'k5eAaImAgNnS'],\n", " ['temně', 'temně', 'k6eAd1'],\n", " ['tajný', 'tajný', 'k2eAgInSc4d1'],\n", " ['bol', 'bol', 'k1gInSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['břeh', 'břeh', 'k1gInSc1'],\n", " ['je', 'on', 'k3xPp3gMnPc4'],\n", " ['objímal', 'objímat', 'k5eAaImAgInS'],\n", " ['kol', 'kolit', 'k5eAaImRp2nS'],\n", " ['a', 'a', 'k8xC'],\n", " ['kol', 'kolit', 'k5eAaImRp2nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['a', 'a', 'k8xC'],\n", " ['slunce', 'slunce', 'k1gNnPc1'],\n", " ['jasná', 'jasný', 'k2eAgNnPc1d1'],\n", " ['světů', 'svět', 'k1gInPc2'],\n", " ['jiných', 'jiný', 'k1gMnPc2'],\n", " ['bloudila', 'bloudit', 'k5eAaImAgNnP'],\n", " ['blankytnými', 'blankytný', 'k2eAgMnPc7d1'],\n", " ['pásky', 'pásek', 'k1gInPc7'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['planoucí', 'planoucí', 'k2eAgFnSc6d1'],\n", " ['tam', 'tam', 'k6eAd1'],\n", " ['co', 'co', 'k9'],\n", " ['slzy', 'slza', 'k1gFnSc2'],\n", " ['lásky', 'láska', 'k1gFnSc2'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['I', 'i', 'k8xC'],\n", " ['světy', 'svět', 'k1gInPc1'],\n", " ['jich', 'on', 'k3xPp3gMnPc2'],\n", " ['v', 'v', 'k7c4'],\n", " ['oblohu', 'obloha', 'k1gFnSc4'],\n", " ['skvoucí', 'skvoucí', 'k2eAgFnSc4d1'],\n", " ['co', 'co', 'k8xS'],\n", " ['ve', 'v', 'k7c4'],\n", " ['chrám', 'chrám', 'k1gInSc4'],\n", " ['věčné', 'věčný', 'k2eAgFnSc2d1'],\n", " ['lásky', 'láska', 'k1gFnSc2'],\n", " ['vzešly', 'vzejít', 'k5eAaPmAgFnP'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['až', 'až', 'k8xS'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['–', '–', 'k?'],\n", " ['milostí', 'milost', 'k1gFnPc2'],\n", " ['k', 'k', 'k7c3'],\n", " ['sobě', 'se', 'k3xPyFc3'],\n", " ['vroucí', 'vroucí', 'k2eAgMnPc1d1'],\n", " ['změnivše', 'změnit', 'k5eAaPmDgFnP'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['jiskry', 'jiskra', 'k1gFnPc4'],\n", " ['hasnoucí', 'hasnoucí', 'k2eAgFnPc4d1'],\n", " ['–', '–', 'k?'],\n", " ['bloudící', 'bloudící', 'k2eAgMnPc1d1'],\n", " ['co', 'co', 'k9'],\n", " ['milenci', 'milenec', 'k1gMnPc1'],\n", " ['sešly', 'sejít', 'k5eAaPmAgInP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Ouplné', 'Ouplný', 'k2eAgFnPc4d1'],\n", " ['lůny', 'lůno', 'k1gNnPc7'],\n", " ['krásná', 'krásný', 'k2eAgFnSc1d1'],\n", " ['tvář', 'tvář', 'k1gFnSc1'],\n", " ['–', '–', 'k?'],\n", " ['tak', 'tak', 'k9'],\n", " ['bledě', 'bledě', 'k6eAd1'],\n", " ['jasná', 'jasný', 'k2eAgFnSc1d1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jasně', 'jasně', 'k6eAd1'],\n", " ['bledá', 'bledý', 'k2eAgNnPc1d1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jak', 'jak', 'k8xC,k8xS'],\n", " ['milence', 'milenka', 'k1gFnSc6'],\n", " ['milenka', 'milenka', 'k1gFnSc1'],\n", " ['hledá', 'hledat', 'k5eAaImIp3nS'],\n", " ['–', '–', 'k?'],\n", " ['ve', 'v', 'k7c6'],\n", " ['růžovou', 'růžový', 'k2eAgFnSc4d1'],\n", " ['vzplanula', 'vzplanout', 'k5eAaPmAgFnS'],\n", " ['zář', 'zář', 'k1gFnSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['na', 'na', 'k7c6'],\n", " ['vodách', 'voda', 'k1gFnPc6'],\n", " ['obrazy', 'obraz', 'k1gInPc4'],\n", " ['své', 'svůj', 'k3xOyFgInPc4'],\n", " ['zřela', 'zřít', 'k5eAaImAgFnS'],\n", " ['a', 'a', 'k8xC'],\n", " ['sama', 'sám', 'k3xTgFnSc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['sobě', 'se', 'k3xPyFc3'],\n", " ['láskou', 'láska', 'k1gFnSc7'],\n", " ['mřela', 'mřít', 'k5eAaImAgFnS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Dál', 'daleko', 'k6eAd2'],\n", " ['blyštil', 'blyštit', 'k5eAaImAgInS'],\n", " ['bledý', 'bledý', 'k2eAgInSc1d1'],\n", " ['dvorů', 'dvůr', 'k1gInPc2'],\n", " ['stín', 'stín', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['sobě', 'se', 'k3xPyFc3'],\n", " ['šly', 'jít', 'k5eAaImAgInP'],\n", " ['vzdy', 'vzdy', 'k6eAd1xTwZ'],\n", " ['blíž', 'blíž', 'k1gFnSc4'],\n", " ['a', 'a', 'k8xC'],\n", " ['blíž', 'blíž', 'k1gFnSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jak', 'jak', 'k8xC,k8xS'],\n", " ['v', 'v', 'k7c6'],\n", " ['objetí', 'objetí', 'k1gNnSc6'],\n", " ['by', 'by', 'kYmCp3nS'],\n", " ['níž', 'nízce', 'k6eAd2'],\n", " ['a', 'a', 'k8xC'],\n", " ['níž', 'nízce', 'k6eAd2'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['vinuly', 'vinout', 'k5eAaImAgInP'],\n", " ['v', 'v', 'k7c6'],\n", " ['soumraku', 'soumrak', 'k1gInSc6'],\n", " ['klín', 'klín', 'k1gInSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['až', 'až', 'k8xS'],\n", " ['posléze', 'posléze', 'k6eAd1'],\n", " ['šerem', 'šero', 'k1gNnSc7'],\n", " ['v', 'v', 'k7c4'],\n", " ['jedno', 'jeden', 'k4xCgNnSc4'],\n", " ['splynou', 'splynout', 'k5eAaPmIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['S', 's', 'k7c7'],\n", " ['nimi', 'on', 'k3xPp3gFnPc7'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['stromy', 'strom', 'k1gInPc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['stromům', 'strom', 'k1gInPc3'],\n", " ['vinou', 'vinout', 'k5eAaImIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Nejzáze', 'Nejzáze', 'k1gFnPc1'],\n", " ['stíní', 'stínit', 'k5eAaImIp3nP'],\n", " ['šero', 'šero', 'k1gNnSc4'],\n", " ['hor', 'hora', 'k1gFnPc2'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['tam', 'tam', 'k6eAd1'],\n", " ['bříza', 'bříza', 'k1gFnSc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['boru', 'bor', 'k1gInSc3'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['k', 'k', 'k7c3'],\n", " ['bříze', 'bříza', 'k1gFnSc3'],\n", " ['bor', 'bor', 'k1gInSc1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['kloní', 'klonit', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Vlna', 'vlna', 'k1gFnSc1'],\n", " ['za', 'za', 'k7c7'],\n", " ['vlnou', 'vlna', 'k1gFnSc7'],\n", " ['potokem', 'potok', 'k1gInSc7'],\n", " ['spěchá', 'spěchat', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Vře', 'vřít', 'k5eAaImIp3nS'],\n", " ['plnou', 'plný', 'k2eAgFnSc7d1'],\n", " ['–', '–', 'k?'],\n", " ['v', 'v', 'k7c4'],\n", " ['čas', 'čas', 'k1gInSc4'],\n", " ['lásky', 'láska', 'k1gFnSc2'],\n", " ['–', '–', 'k?'],\n", " ['láskou', 'láska', 'k1gFnSc7'],\n", " ['každý', 'každý', 'k3xTgMnSc1'],\n", " ['tvor', 'tvor', 'k1gMnSc1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Za', 'za', 'k7c2'],\n", " ['růžového', 'růžový', 'k2eAgInSc2d1'],\n", " ['večera', 'večer', 'k1gInSc2'],\n", " ['pod', 'pod', 'k7c7'],\n", " ['dubem', 'dub', 'k1gInSc7'],\n", " ['sličná', 'sličný', 'k2eAgFnSc1d1'],\n", " ['děva', 'děva', 'k1gFnSc1'],\n", " ['sedí', 'sedět', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['skály', 'skála', 'k1gFnPc1'],\n", " ['v', 'v', 'k7c6'],\n", " ['břehu', 'břeh', 'k1gInSc6'],\n", " ['jezera', 'jezero', 'k1gNnSc2'],\n", " ['daleko', 'daleko', 'k6eAd1'],\n", " ['přes', 'přes', 'k7c4'],\n", " ['jezero', 'jezero', 'k1gNnSc4'],\n", " ['hledí', 'hledět', 'k5eAaImIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['To', 'ten', 'k3xDgNnSc1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['jí', 'on', 'k3xPp3gFnSc7'],\n", " ['modro', 'modro', 'k1gNnSc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['nohoum', 'nohoum', 'k1gInSc1'],\n", " ['vine', 'vinout', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['dále', 'daleko', 'k6eAd2'],\n", " ['zeleně', 'zeleň', 'k1gFnSc2'],\n", " ['zakvítá', 'zakvítat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['vzdy', 'vzdy', 'k6eAd1xTwZ'],\n", " ['zeleněji', 'zeleně', 'k6eAd2'],\n", " ['prosvítá', 'prosvítat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['až', 'až', 'k8xS'],\n", " ['v', 'v', 'k7c6'],\n", " ['dálce', 'dálka', 'k1gFnSc6'],\n", " ['v', 'v', 'k7c4'],\n", " ['bledé', 'bledý', 'k2eAgNnSc4d1'],\n", " ['jasno', 'jasno', 'k1gNnSc4'],\n", " ['splyne', 'splynout', 'k5eAaPmIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Po', 'po', 'k7c6'],\n", " ['šírošíré', 'šírošírý', 'k2eAgFnSc6d1'],\n", " ['hladině', 'hladina', 'k1gFnSc6'],\n", " ['umdlelý', 'umdlelý', 'k2eAgInSc1d1'],\n", " ['dívka', 'dívka', 'k1gFnSc1'],\n", " ['zrak', 'zrak', 'k1gInSc4'],\n", " ['upírá', 'upírat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['po', 'po', 'k7c6'],\n", " ['šírošíré', 'šírošírý', 'k2eAgFnSc6d1'],\n", " ['hladině', 'hladina', 'k1gFnSc6'],\n", " ['nic', 'nic', 'k3yNnSc1'],\n", " ['mimo', 'mimo', 'k7c4'],\n", " ['promyk', 'promyk', 'k1gInSc4'],\n", " ['hvězd', 'hvězda', 'k1gFnPc2'],\n", " ['nezírá', 'zírat', 'k5eNaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['Dívčina', 'dívčina', 'k1gFnSc1'],\n", " ['krásná', 'krásný', 'k2eAgFnSc1d1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['anjel', 'anjel', 'k1gMnSc1'],\n", " ['padlý', 'padlý', 'k1gMnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['co', 'co', 'k3yQnSc4,k3yRnSc4,k3yInSc4'],\n", " ['amarant', 'amarant', 'k1gInSc4'],\n", " ['na', 'na', 'k7c4'],\n", " ['jaro', 'jaro', 'k1gNnSc4'],\n", " ['svadlý', 'svadlý', 'k2eAgInSc1d1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['v', 'v', 'k7c6'],\n", " ['ubledlých', 'ubledlý', 'k2eAgInPc6d1'],\n", " ['lících', 'líc', 'k1gInPc6'],\n", " ['krásy', 'krása', 'k1gFnSc2'],\n", " ['spějí', 'spět', 'k5eAaImIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Hodina', 'hodina', 'k1gFnSc1'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['jí', 'on', 'k3xPp3gFnSc3'],\n", " ['všecko', 'všecek', 'k3gNnSc4xT'],\n", " ['vzala', 'vzít', 'k5eAaPmAgFnS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['ta', 'ten', 'k3xDgNnPc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['usta', 'ustum', 'k1gNnPc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['zraky', 'zrak', 'k1gInPc7'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['čelo', 'čelo', 'k1gNnSc4'],\n", " ['její', 'její', 'k3xOp3gInSc4'],\n", " ['půvabný', 'půvabný', 'k2eAgInSc4d1'],\n", " ['žal', 'žal', 'k1gInSc4'],\n", " ['i', 'i', 'k8xC'],\n", " ['smutek', 'smutek', 'k1gInSc4'],\n", " ['psala', 'psát', 'k5eAaImAgFnS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Tak', 'tak', 'k9'],\n", " ['zašel', 'zajít', 'k5eAaPmAgInS'],\n", " ['dnes', 'dnes', 'k6eAd1'],\n", " ['dvacátý', 'dvacátý', 'k4xOgInSc1'],\n", " ['den', 'den', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['v', 'v', 'k7c4'],\n", " ['krajinu', 'krajina', 'k1gFnSc4'],\n", " ['tichou', 'tichý', 'k2eAgFnSc4d1'],\n", " ['kráčí', 'kráčet', 'k5eAaImIp3nS'],\n", " ['sen', 'sen', 'k1gInSc1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Poslední', 'poslední', 'k2eAgInSc1d1'],\n", " ['požár', 'požár', 'k1gInSc1'],\n", " ['kvapně', 'kvapně', 'k6eAd1'],\n", " ['hasne', 'hasnout', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['i', 'i', 'k8xC'],\n", " ['nebe', 'nebe', 'k1gNnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['růžojasné', 'růžojasný', 'k2eAgInPc1d1'],\n", " ['nad', 'nad', 'k7c7'],\n", " ['modrými', 'modrý', 'k2eAgFnPc7d1'],\n", " ['horami', 'hora', 'k1gFnPc7'],\n", " ['míhá', 'míhat', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['On', 'on', 'k3xPp3gMnSc1'],\n", " ['nejde', 'jít', 'k5eNaImIp3nS'],\n", " ['–', '–', 'k?'],\n", " ['již', 'již', 'k6eAd1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['nevrátí', 'vrátit', 'k5eNaPmIp3nS'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['–', '–', 'k?'],\n", " ['Svedenou', 'svedený', 'k2eAgFnSc4d1'],\n", " ['žel', 'žel', 'k9'],\n", " ['tu', 'ten', 'k3xDgFnSc4'],\n", " ['zachvátí', 'zachvátit', 'k5eAaPmIp3nP'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " [''],\n", " ['“', '“', 'k?'],\n", " ['Hluboký', 'hluboký', 'k2eAgInSc1d1'],\n", " ['vzdech', 'vzdech', 'k1gInSc1'],\n", " ['jí', 'jíst', 'k5eAaImIp3nS'],\n", " ['ňadra', 'ňadro', 'k1gNnPc4'],\n", " ['zdvíhá', 'zdvíhat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['bolestný', 'bolestný', 'k2eAgMnSc1d1'],\n", " ['srdcem', 'srdce', 'k1gNnSc7'],\n", " ['bije', 'bít', 'k5eAaImIp3nS'],\n", " ['cit', 'cit', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['a', 'a', 'k8xC'],\n", " ['u', 'u', 'k7c2'],\n", " ['tajemné', 'tajemný', 'k2eAgFnSc2d1'],\n", " ['vod', 'voda', 'k1gFnPc2'],\n", " ['stonání', 'stonání', 'k1gNnSc2'],\n", " ['mísí', 'mísit', 'k5eAaImIp3nP'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['dívky', 'dívka', 'k1gFnPc1'],\n", " ['pláč', 'pláč', 'k1gInSc1'],\n", " ['a', 'a', 'k8xC'],\n", " ['lkání', 'lkánět', 'k5eAaPmIp3nS,k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['V', 'v', 'k7c6'],\n", " ['slzích', 'slze', 'k1gFnPc6wNwZ'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['zhlíží', 'zhlížet', 'k5eAaImIp3nS'],\n", " ['hvězdný', 'hvězdný', 'k2eAgInSc1d1'],\n", " ['svit', 'svit', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgInSc1'],\n", " ['po', 'po', 'k7c6'],\n", " ['lících', 'líc', 'k1gInPc6'],\n", " ['co', 'co', 'k8xS'],\n", " ['jiskry', 'jiskra', 'k1gFnPc1'],\n", " ['plynou', 'plynout', 'k5eAaImIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Vřelé', 'vřelý', 'k2eAgFnPc1d1'],\n", " ['ty', 'ten', 'k3xDgFnPc1'],\n", " ['jiskry', 'jiskra', 'k1gFnPc1'],\n", " ['tváře', 'tvář', 'k1gFnSc2'],\n", " ['chladné', 'chladný', 'k2eAgFnPc1d1'],\n", " ['co', 'co', 'k8xS'],\n", " ['padající', 'padající', 'k2eAgFnPc1d1'],\n", " ['hvězdy', 'hvězda', 'k1gFnPc1'],\n", " ['hynou', 'hynout', 'k5eAaImIp3nP'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['kam', 'kam', 'k6eAd1'],\n", " ['zapadnou', 'zapadnout', 'k5eAaPmIp3nP'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['tam', 'tam', 'k6eAd1'],\n", " ['květ', 'květ', 'k1gInSc1'],\n", " ['uvadne', 'uvadnout', 'k5eAaPmIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Viz', 'vidět', 'k5eAaImRp2nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['mihla', 'mihnout', 'k5eAaPmAgFnS'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['u', 'u', 'k7c2'],\n", " ['skály', 'skála', 'k1gFnSc2'],\n", " ['kraje', 'kraj', 'k1gInSc2'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['daleko', 'daleko', 'k6eAd1'],\n", " ['přes', 'přes', 'k7c4'],\n", " ['ní', 'on', 'k3xPp3gFnSc7'],\n", " ['nahnuté', 'nahnutý', 'k2eAgFnPc1d1'],\n", " ['větýrek', 'větýrek', 'k1gInSc1'],\n", " ['bílým', 'bílý', 'k2eAgInSc7d1'],\n", " ['šatem', 'šat', 'k1gInSc7'],\n", " ['vlaje', 'vlát', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Oko', 'oko', 'k1gNnSc1'],\n", " ['má', 'mít', 'k5eAaImIp3nS'],\n", " ['v', 'v', 'k7c4'],\n", " ['dálku', 'dálka', 'k1gFnSc4'],\n", " ['napnuté', 'napnutý', 'k2eAgFnSc2d1'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Teď', 'teď', 'k6eAd1'],\n", " ['slzy', 'slza', 'k1gFnPc4'],\n", " ['rychle', 'rychle', 'k6eAd1'],\n", " ['utírá', 'utírat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['rukou', 'ruka', 'k1gFnPc6'],\n", " ['si', 'se', 'k3xPyFc3'],\n", " ['zraky', 'zrak', 'k1gInPc7'],\n", " ['zastírá', 'zastírat', 'k5eAaImIp3nS'],\n", " ['upírajíc', 'upírat', 'k5eAaImSgFnS'],\n", " ['je', 'on', 'k3xPp3gFnPc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['dálné', 'dálný', 'k2eAgInPc4d1'],\n", " ['kraje', 'kraj', 'k1gInPc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['kde', 'kde', 'k6eAd1'],\n", " ['jezero', 'jezero', 'k1gNnSc1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['hory', 'hora', 'k1gFnPc4'],\n", " ['kloní', 'klonit', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['po', 'po', 'k7c6'],\n", " ['vlnách', 'vlna', 'k1gFnPc6'],\n", " ['jiskra', 'jiskra', 'k1gFnSc1'],\n", " ['jiskru', 'jiskra', 'k1gFnSc4'],\n", " ['honí', 'honit', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['po', 'po', 'k7c6'],\n", " ['vodě', 'voda', 'k1gFnSc6'],\n", " ['hvězda', 'hvězda', 'k1gFnSc1'],\n", " ['s', 's', 'k7c7'],\n", " ['hvězdou', 'hvězda', 'k1gFnSc7'],\n", " ['hraje', 'hrát', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Jak', 'jak', 'k8xC,k8xS'],\n", " ['holoubátko', 'holoubátko', 'k1gNnSc1'],\n", " ['sněhobílé', 'sněhobílý', 'k2eAgNnSc1d1'],\n", " ['pod', 'pod', 'k7c7'],\n", " ['černým', 'černý', 'k2eAgNnSc7d1'],\n", " ['mračnem', 'mračno', 'k1gNnSc7'],\n", " ['přelétá', 'přelétat', 'k5eAaImIp3nS,k5eAaPmIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['lílie', 'lílie', 'k1gFnSc1'],\n", " ['vodní', 'vodní', 'k2eAgNnSc1d1'],\n", " ['zakvétá', 'zakvétat', 'k5eAaImIp3nS'],\n", " ['nad', 'nad', 'k7c4'],\n", " ['temné', 'temný', 'k2eAgNnSc4d1'],\n", " ['modro', 'modro', 'k1gNnSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['tak', 'tak', 'k8xC,k8xS'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['číle', 'čílit', 'k5eAaImSgMnS'],\n", " ['–', '–', 'k?'],\n", " ['kde', 'kde', 'k6eAd1'],\n", " ['jezero', 'jezero', 'k1gNnSc1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['hory', 'hora', 'k1gFnPc4'],\n", " ['níží', 'níže', 'k1gFnPc2'],\n", " ['–', '–', 'k?'],\n", " ['po', 'po', 'k7c6'],\n", " ['temných', 'temný', 'k2eAgFnPc6d1'],\n", " ['vlnách', 'vlna', 'k1gFnPc6'],\n", " ['cosi', 'cosi', 'k3yInSc1'],\n", " ['blíží', 'blížit', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['rychle', 'rychle', 'k6eAd1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['blíží', 'blížit', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Malá', 'malý', 'k2eAgFnSc1d1'],\n", " ['chvíle', 'chvíle', 'k1gFnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['a', 'a', 'k8xC'],\n", " ['již', 'již', 'k9'],\n", " ['co', 'co', 'k9'],\n", " ['čápa', 'čáp', 'k1gMnSc4'],\n", " ['vážný', 'vážný', 'k2eAgInSc1d1'],\n", " ['let', 'let', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['ne', 'ne', 'k9'],\n", " ['již', 'již', 'k9'],\n", " ['holoubě', 'holoubě', 'k1gNnSc1'],\n", " ['či', 'či', 'k8xC'],\n", " ['lílie', 'lílie', 'k1gFnSc1'],\n", " ['květ', 'květ', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['bílá', 'bílý', 'k2eAgFnSc1d1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['plachta', 'plachta', 'k1gFnSc1'],\n", " ['větrem', 'vítr', 'k1gInSc7'],\n", " ['houpá', 'houpat', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Štíhlé', 'štíhlý', 'k2eAgNnSc1d1'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['veslo', 'veslo', 'k1gNnSc1'],\n", " ['v', 'v', 'k7c6'],\n", " ['modru', 'modro', 'k1gNnSc6'],\n", " ['koupá', 'koupat', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['a', 'a', 'k8xC'],\n", " ['dlouhé', 'dlouhý', 'k2eAgInPc4d1'],\n", " ['pruhy', 'pruh', 'k1gInPc4'],\n", " ['kolem', 'kolo', 'k1gNnSc7'],\n", " ['tvoří', 'tvořit', 'k5eAaImIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Těm', 'ten', 'k3xDgInPc3'],\n", " ['zlaté', 'zlatý', 'k2eAgFnPc1d1'],\n", " ['růže', 'růže', 'k1gFnPc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['při', 'při', 'k7c6'],\n", " ['doubí', 'doubí', 'k1gNnSc6'],\n", " ['tam', 'tam', 'k6eAd1'],\n", " ['na', 'na', 'k7c6'],\n", " ['horách', 'hora', 'k1gFnPc6'],\n", " ['po', 'po', 'k7c6'],\n", " ['nebi', 'nebe', 'k1gNnSc6'],\n", " ['hoří', 'hořet', 'k5eAaImIp3nP'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['růžovým', 'růžový', 'k2eAgNnSc7d1'],\n", " ['zlatem', 'zlato', 'k1gNnSc7'],\n", " ['čela', 'čelo', 'k1gNnSc2'],\n", " ['broubí', 'broubit', 'k5eAaPmIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['Rychlý', 'Rychlý', 'k1gMnSc1'],\n", " ['to', 'ten', 'k3xDgNnSc1'],\n", " ['člůnek', 'člůnek', 'k1gMnSc1'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['blíž', 'blízko', 'k6eAd2'],\n", " ['a', 'a', 'k8xC'],\n", " ['blíže', 'blízko', 'k6eAd2'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['To', 'ten', 'k3xDgNnSc1'],\n", " ['on', 'on', 'k3xPp3gMnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['to', 'ten', 'k3xDgNnSc1'],\n", " ['on', 'on', 'k3xPp3gMnSc1'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Ty', 'ten', 'k3xDgInPc4'],\n", " ['péra', 'péro', 'k1gNnPc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['kvítí', 'kvítí', 'k1gNnSc3'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['klobouk', 'klobouk', 'k1gInSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['oko', 'oko', 'k1gNnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['pod', 'pod', 'k7c7'],\n", " ['ním', 'on', 'k3xPp3gMnSc7'],\n", " ['svítí', 'svítit', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['ten', 'ten', 'k3xDgInSc1'],\n", " ['plášť', 'plášť', 'k1gInSc1'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " [''],\n", " ['“', '“', 'k?'],\n", " ['Již', 'již', 'k9'],\n", " ['člůn', 'člůna', 'k1gFnPc2'],\n", " ['pod', 'pod', 'k7c7'],\n", " ['skalou', 'skalý', 'k2eAgFnSc7d1'],\n", " ['víže', 'víže', 'k1gFnSc7'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Vzhůru', 'vzhůru', 'k6eAd1'],\n", " ['po', 'po', 'k7c6'],\n", " ['skále', 'skála', 'k1gFnSc6'],\n", " ['lehký', 'lehký', 'k2eAgInSc4d1'],\n", " ['krok', 'krok', 'k1gInSc4'],\n", " ['uzounkou', 'uzounký', 'k2eAgFnSc7d1'],\n", " ['stezkou', 'stezka', 'k1gFnSc7'],\n", " ['plavce', 'plavec', 'k1gMnSc2'],\n", " ['vede', 'vést', 'k5eAaImIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Dívce', 'dívka', 'k1gFnSc6'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['zardí', 'zardít', 'k5eAaPmIp3nP'],\n", " ['tváře', 'tvář', 'k1gFnPc1'],\n", " ['bledé', 'bledý', 'k2eAgFnPc1d1'],\n", " ['za', 'za', 'k7c4'],\n", " ['dub', 'dub', 'k1gInSc4'],\n", " ['je', 'být', 'k5eAaImIp3nS'],\n", " ['skryta', 'skrýt', 'k5eAaPmNgFnS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Vstříc', 'vstříc', 'k7c3'],\n", " ['mu', 'on', 'k3xPp3gMnSc3'],\n", " ['běží', 'běžet', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['zaplesá', 'zaplesat', 'k5eAaPmIp3nS'],\n", " ['–', '–', 'k?'],\n", " ['běží', 'běžet', 'k5eAaImIp3nS'],\n", " ['–', '–', 'k?'],\n", " ['dlouhý', 'dlouhý', 'k2eAgInSc1d1'],\n", " ['skok', 'skok', 'k1gInSc4'],\n", " ['–', '–', 'k?'],\n", " ['již', 'již', 'k6eAd1'],\n", " ['plavci', 'plavec', 'k1gMnSc3'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['již', 'již', 'k6eAd1'],\n", " ['na', 'na', 'k7c6'],\n", " ['prsou', 'prsa', 'k1gNnPc6'],\n", " ['leží', 'ležet', 'k5eAaImIp3nP'],\n", " ['–', '–', 'k?'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['Ha', 'ha', 'kA'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Běda', 'běda', 'k1gFnSc1'],\n", " ['mi', 'já', 'k3xPp1nSc3'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " [''],\n", " ['“', '“', 'k?'],\n", " ['Vtom', 'vtom', 'k6eAd1'],\n", " ['lůny', 'lůno', 'k1gNnPc7'],\n", " ['zář', 'zář', 'k1gFnSc1'],\n", " ['jí', 'on', 'k3xPp3gFnSc7'],\n", " ['známou', 'známá', 'k1gFnSc7'],\n", " ['osvítila', 'osvítit', 'k5eAaPmAgFnS'],\n", " ['tvář', 'tvář', 'k1gFnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['hrůzou', 'hrůza', 'k1gFnSc7'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['krev', 'krev', 'k1gFnSc1'],\n", " ['jí', 'on', 'k3xPp3gFnSc3'],\n", " ['v', 'v', 'k7c6'],\n", " ['žilách', 'žíla', 'k1gFnPc6'],\n", " ['staví', 'stavit', 'k5eAaBmIp3nS,k5eAaImIp3nS,k5eAaPmIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['Kde', 'kde', 'k6eAd1'],\n", " ['Vilém', 'Vilém', 'k1gMnSc1'],\n", " ['můj', 'můj', 'k3xOp1gMnSc1'],\n", " [''],\n", " ['?', '?', 'kIx.'],\n", " [''],\n", " [''],\n", " [''],\n", " ['“', '“', 'k?'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['Viz', 'vidět', 'k5eAaImRp2nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " [''],\n", " ['“', '“', 'k?'],\n", " ['plavec', 'plavec', 'k1gMnSc1'],\n", " ['k', 'k', 'k7c3'],\n", " ['ní', 'on', 'k3xPp3gFnSc3'],\n", " ['tichými', 'tichý', 'k2eAgNnPc7d1'],\n", " ['slovy', 'slovo', 'k1gNnPc7'],\n", " ['šepce', 'šepce', 'k1gFnSc2'],\n", " ['praví', 'pravit', 'k5eAaBmIp3nS,k5eAaImIp3nS'],\n", " [''],\n", " [':', ':', 'kIx,'],\n", " ['„', '„', 'k?'],\n", " [''],\n", " ['Tam', 'tam', 'k6eAd1'],\n", " ['při', 'při', 'k7c6'],\n", " ['jezeru', 'jezero', 'k1gNnSc6'],\n", " ['vížka', 'vížka', 'k1gFnSc1'],\n", " ['ční', 'čnít', 'k5eAaImIp3nS'],\n", " ['nad', 'nad', 'k7c7'],\n", " ['stromů', 'strom', 'k1gInPc2'],\n", " ['noc', 'noc', 'k1gFnSc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['její', 'její', 'k3xOp3gInSc4'],\n", " ['bílý', 'bílý', 'k2eAgInSc4d1'],\n", " ['stín', 'stín', 'k1gInSc4'],\n", " ['hlubokoť', 'hlubokotit', 'k5eAaPmRp2nS'],\n", " ['stopen', 'stopit', 'k5eAaPmNgInS,k5eAaImNgInS'],\n", " ['v', 'v', 'k7c4'],\n", " ['jezera', 'jezero', 'k1gNnPc4'],\n", " ['klín', 'klín', 'k1gInSc4'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['však', 'však', 'k8xC'],\n", " ['hlouběji', 'hluboko', 'k6eAd2'],\n", " ['ještě', 'ještě', 'k9'],\n", " ['u', 'u', 'k7c2'],\n", " ['vodu', 'voda', 'k1gFnSc4'],\n", " ['vryt', 'vrýt', 'k5eAaPmNgInS'],\n", " ['je', 'být', 'k5eAaImIp3nS'],\n", " ['z', 'z', 'k7c2'],\n", " ['mala', 'mal', 'k1gInSc2'],\n", " ['okénka', 'okénko', 'k1gNnSc2'],\n", " ['lampy', 'lampa', 'k1gFnSc2'],\n", " ['svit', 'svita', 'k1gFnPc2'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['tam', 'tam', 'k6eAd1'],\n", " ['Vilém', 'Vilém', 'k1gMnSc1'],\n", " ['myšlenkou', 'myšlenka', 'k1gFnSc7'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['baví', 'bavit', 'k5eAaImIp3nS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['že', 'že', 'k8xS'],\n", " ['příští', 'příští', 'k2eAgInSc4d1'],\n", " ['den', 'den', 'k1gInSc4'],\n", " ['jej', 'on', 'k3xPp3gNnSc2'],\n", " ['žití', 'žití', 'k1gNnSc2'],\n", " ['zbaví', 'zbavit', 'k5eAaPmIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['On', 'on', 'k3xPp3gMnSc1'],\n", " ['hanu', 'hana', 'k1gFnSc4'],\n", " ['svou', 'svůj', 'k3xOyFgFnSc7'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['on', 'on', 'k3xPp3gMnSc1'],\n", " ['tvoji', 'tvůj', 'k3xOp2gFnSc4'],\n", " ['vinu', 'vina', 'k1gFnSc4'],\n", " ['se', 'se', 'k3xPyFc4'],\n", " ['dozvěděl', 'dozvědět', 'k5eAaPmAgMnS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['on', 'on', 'k3xPp3gMnSc1'],\n", " ['svůdce', 'svůdce', 'k1gMnPc4'],\n", " ['tvého', 'tvůj', 'k1gMnSc2'],\n", " ['vraždě', 'vražda', 'k1gFnSc3'],\n", " ['zavraždil', 'zavraždit', 'k5eAaPmAgMnS'],\n", " ['otce', 'otec', 'k1gMnSc4'],\n", " ['svého', 'svůj', 'k1gMnSc4'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Msta', 'msta', 'k1gFnSc1'],\n", " ['v', 'v', 'k7c6'],\n", " ['patách', 'pata', 'k1gFnPc6'],\n", " ['kráčí', 'kráčet', 'k5eAaImIp3nS'],\n", " ['jeho', 'jeho', 'k3xOp3gInSc3'],\n", " ['činu', 'čin', 'k1gInSc3'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Hanebně', 'hanebně', 'k6eAd1'],\n", " ['zemře', 'zemřít', 'k5eAaPmIp3nS'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " ['–', '–', 'k?'],\n", " ['Poklid', 'poklid', 'k1gInSc1'],\n", " ['mu', 'on', 'k3xPp3gMnSc3'],\n", " ['dán', 'dát', 'k5eAaPmNgInS'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['až', 'až', 'k8xS'],\n", " ['tváře', 'tvář', 'k1gFnPc1'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['jenž', 'jenž', 'k3xRgMnSc1'],\n", " ['co', 'co', 'k3yRnSc1,k3yQnSc1,k3yInSc1'],\n", " ['růže', 'růže', 'k1gFnPc1'],\n", " ['květou', 'kvěíst', 'k5eAaPmIp3nP'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['zbledlé', 'zbledlý', 'k2eAgFnPc1d1'],\n", " ['nad', 'nad', 'k7c7'],\n", " ['kolem', 'kolo', 'k1gNnSc7'],\n", " ['obdrží', 'obdržet', 'k5eAaPmIp3nP'],\n", " ['stán', 'stán', 'k1gInSc4wZ'],\n", " [''],\n", " [',', ',', 'kIx,'],\n", " ['až', 'až', 'k8xS'],\n", " ['štíhlé', 'štíhlý', 'k2eAgInPc4d1'],\n", " ['oudy', 'oud', 'k1gInPc4'],\n", " ['v', 'v', 'k7c4'],\n", " ['kolo', 'kolo', 'k1gNnSc4'],\n", " ['vpletou', 'vplést', 'k5eAaPmIp3nP'],\n", " [''],\n", " ['.', '.', 'kIx.'],\n", " [''],\n", " [''],\n", " ['Tak', 'tak', 'k6eAd1'],\n", " ['skoná', 'skonat', 'k5eAaPmIp3nS'],\n", " ['strašný', 'strašný', 'k2eAgInSc1d1'],\n", " ['lesů', 'les', 'k1gInPc2'],\n", " ['pán', 'pán', 'k1gMnSc1'],\n", " [''],\n", " ['!', '!', 'kIx.'],\n", " [''],\n", " [''],\n", " ['–', '–', 'k?'],\n", " ['Za', 'za', 'k7c4'],\n", " ...]}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = r.json()\n", "data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordlemmatag
0Bylbýtk5eAaImAgInS
1pozdnípozdník2eAgInSc1d1
2večervečerk1gInSc1
3k?
4prvníprvník4xOgInSc4
5májmájk1gInSc1
6k?
7večernívečerník2eAgInSc4d1
8májmájk1gInSc1
9k?
10bylbýtk5eAaImAgInS
11láskyláskak1gFnSc2
12časčask1gInSc1
13..kIx.
14Hrdliččinhrdliččink2eAgInSc1d1
15zvalzvátk5eAaImAgInS
16kukk7c3
17lásceláskak1gFnSc3
18hlashlask1gInSc1
19,,kIx,
20kdekdek6eAd1
21borovýborovýk2eAgMnSc1d1
22zavánělzavánětk5eAaImAgInS
23hájhájk1gInSc1
24..kIx.
25Ook7c6
26lásceláskak1gFnSc6
27šeptalšeptatk5eAaImAgInS
28tichýtichýk2eAgInSc1d1
29mechmechk1gInSc1
............
2217Strážnéhostrážnýk1gMnSc4
2218vzbudilvzbuditk5eAaPmAgInS
2219strašnýstrašnýk2eAgInSc1d1
2220hřmothřmotk1gInSc1
2221,,kIx,
2222jejžjenžk3xRgInSc4
2223řetězůřetězk1gInPc2
2224činíčinitk5eAaImIp3nS
2225padánípadáník1gNnSc1
2226,,kIx,
2227sesek3xPyFc4
2228světlemsvětlok1gNnSc7
2229vstoupilvstoupitk5eAaPmAgInS
2230..kIx.
2231k?
2232Lehkýlehkýk2eAgInSc1d1
2233chodchodk1gInSc1
2234nevzbudilvzbuditk5eNaPmAgInS
2235vězněvězeňk1gMnPc4
2236zzk7c2
2237strašnýchstrašnýk2eAgNnPc2d1
2238zdánízdáník1gNnPc2
2239..kIx.
2240Ododk7c2
2241sloupusloupk1gInSc2
2242kkk7c3
2243sloupusloupk1gInSc3
2244lampylampak1gFnSc2
2245svitsvitk2eAgInSc1d1
2246dlouhdlouhk1gInSc1
\n", "

2247 rows × 3 columns

\n", "
" ], "text/plain": [ " word lemma tag\n", "0 Byl být k5eAaImAgInS\n", "1 pozdní pozdní k2eAgInSc1d1\n", "2 večer večer k1gInSc1\n", "3 – – k?\n", "4 první první k4xOgInSc4\n", "5 máj máj k1gInSc1\n", "6 – – k?\n", "7 večerní večerní k2eAgInSc4d1\n", "8 máj máj k1gInSc1\n", "9 – – k?\n", "10 byl být k5eAaImAgInS\n", "11 lásky láska k1gFnSc2\n", "12 čas čas k1gInSc1\n", "13 . . kIx.\n", "14 Hrdliččin hrdliččin k2eAgInSc1d1\n", "15 zval zvát k5eAaImAgInS\n", "16 ku k k7c3\n", "17 lásce láska k1gFnSc3\n", "18 hlas hlas k1gInSc1\n", "19 , , kIx,\n", "20 kde kde k6eAd1\n", "21 borový borový k2eAgMnSc1d1\n", "22 zaváněl zavánět k5eAaImAgInS\n", "23 háj háj k1gInSc1\n", "24 . . kIx.\n", "25 O o k7c6\n", "26 lásce láska k1gFnSc6\n", "27 šeptal šeptat k5eAaImAgInS\n", "28 tichý tichý k2eAgInSc1d1\n", "29 mech mech k1gInSc1\n", "... ... ... ...\n", "2217 Strážného strážný k1gMnSc4\n", "2218 vzbudil vzbudit k5eAaPmAgInS\n", "2219 strašný strašný k2eAgInSc1d1\n", "2220 hřmot hřmot k1gInSc1\n", "2221 , , kIx,\n", "2222 jejž jenž k3xRgInSc4\n", "2223 řetězů řetěz k1gInPc2\n", "2224 činí činit k5eAaImIp3nS\n", "2225 padání padání k1gNnSc1\n", "2226 , , kIx,\n", "2227 se se k3xPyFc4\n", "2228 světlem světlo k1gNnSc7\n", "2229 vstoupil vstoupit k5eAaPmAgInS\n", "2230 . . kIx.\n", "2231 – – k?\n", "2232 Lehký lehký k2eAgInSc1d1\n", "2233 chod chod k1gInSc1\n", "2234 nevzbudil vzbudit k5eNaPmAgInS\n", "2235 vězně vězeň k1gMnPc4\n", "2236 z z k7c2\n", "2237 strašných strašný k2eAgNnPc2d1\n", "2238 zdání zdání k1gNnPc2\n", "2239 . . kIx.\n", "2240 Od od k7c2\n", "2241 sloupu sloup k1gInSc2\n", "2242 k k k7c3\n", "2243 sloupu sloup k1gInSc3\n", "2244 lampy lampa k1gFnSc2\n", "2245 svit svit k2eAgInSc1d1\n", "2246 dlouh dlouh k1gInSc1\n", "\n", "[2247 rows x 3 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokens = [token for token in data['vertical'] if len(token)==3]\n", "df = pd.DataFrame.from_dict({\"word\": [word for word, lemma, tag in tokens], \n", " \"lemma\": [lemma for word, lemma, tag in tokens], \n", " \"tag\": [tag for word, lemma, tag in tokens]\n", " })\n", "df" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordlemmatagpos
0Bylbýtk5eAaImAgInSk5
1pozdnípozdník2eAgInSc1d1k2
2večervečerk1gInSc1k1
3k?k?
4prvníprvník4xOgInSc4k4
5májmájk1gInSc1k1
6k?k?
7večernívečerník2eAgInSc4d1k2
8májmájk1gInSc1k1
9k?k?
10bylbýtk5eAaImAgInSk5
11láskyláskak1gFnSc2k1
12časčask1gInSc1k1
13..kIx.kI
14Hrdliččinhrdliččink2eAgInSc1d1k2
15zvalzvátk5eAaImAgInSk5
16kukk7c3k7
17lásceláskak1gFnSc3k1
18hlashlask1gInSc1k1
19,,kIx,kI
20kdekdek6eAd1k6
21borovýborovýk2eAgMnSc1d1k2
22zavánělzavánětk5eAaImAgInSk5
23hájhájk1gInSc1k1
24..kIx.kI
25Ook7c6k7
26lásceláskak1gFnSc6k1
27šeptalšeptatk5eAaImAgInSk5
28tichýtichýk2eAgInSc1d1k2
29mechmechk1gInSc1k1
...............
2217Strážnéhostrážnýk1gMnSc4k1
2218vzbudilvzbuditk5eAaPmAgInSk5
2219strašnýstrašnýk2eAgInSc1d1k2
2220hřmothřmotk1gInSc1k1
2221,,kIx,kI
2222jejžjenžk3xRgInSc4k3
2223řetězůřetězk1gInPc2k1
2224činíčinitk5eAaImIp3nSk5
2225padánípadáník1gNnSc1k1
2226,,kIx,kI
2227sesek3xPyFc4k3
2228světlemsvětlok1gNnSc7k1
2229vstoupilvstoupitk5eAaPmAgInSk5
2230..kIx.kI
2231k?k?
2232Lehkýlehkýk2eAgInSc1d1k2
2233chodchodk1gInSc1k1
2234nevzbudilvzbuditk5eNaPmAgInSk5
2235vězněvězeňk1gMnPc4k1
2236zzk7c2k7
2237strašnýchstrašnýk2eAgNnPc2d1k2
2238zdánízdáník1gNnPc2k1
2239..kIx.kI
2240Ododk7c2k7
2241sloupusloupk1gInSc2k1
2242kkk7c3k7
2243sloupusloupk1gInSc3k1
2244lampylampak1gFnSc2k1
2245svitsvitk2eAgInSc1d1k2
2246dlouhdlouhk1gInSc1k1
\n", "

2247 rows × 4 columns

\n", "
" ], "text/plain": [ " word lemma tag pos\n", "0 Byl být k5eAaImAgInS k5\n", "1 pozdní pozdní k2eAgInSc1d1 k2\n", "2 večer večer k1gInSc1 k1\n", "3 – – k? k?\n", "4 první první k4xOgInSc4 k4\n", "5 máj máj k1gInSc1 k1\n", "6 – – k? k?\n", "7 večerní večerní k2eAgInSc4d1 k2\n", "8 máj máj k1gInSc1 k1\n", "9 – – k? k?\n", "10 byl být k5eAaImAgInS k5\n", "11 lásky láska k1gFnSc2 k1\n", "12 čas čas k1gInSc1 k1\n", "13 . . kIx. kI\n", "14 Hrdliččin hrdliččin k2eAgInSc1d1 k2\n", "15 zval zvát k5eAaImAgInS k5\n", "16 ku k k7c3 k7\n", "17 lásce láska k1gFnSc3 k1\n", "18 hlas hlas k1gInSc1 k1\n", "19 , , kIx, kI\n", "20 kde kde k6eAd1 k6\n", "21 borový borový k2eAgMnSc1d1 k2\n", "22 zaváněl zavánět k5eAaImAgInS k5\n", "23 háj háj k1gInSc1 k1\n", "24 . . kIx. kI\n", "25 O o k7c6 k7\n", "26 lásce láska k1gFnSc6 k1\n", "27 šeptal šeptat k5eAaImAgInS k5\n", "28 tichý tichý k2eAgInSc1d1 k2\n", "29 mech mech k1gInSc1 k1\n", "... ... ... ... ..\n", "2217 Strážného strážný k1gMnSc4 k1\n", "2218 vzbudil vzbudit k5eAaPmAgInS k5\n", "2219 strašný strašný k2eAgInSc1d1 k2\n", "2220 hřmot hřmot k1gInSc1 k1\n", "2221 , , kIx, kI\n", "2222 jejž jenž k3xRgInSc4 k3\n", "2223 řetězů řetěz k1gInPc2 k1\n", "2224 činí činit k5eAaImIp3nS k5\n", "2225 padání padání k1gNnSc1 k1\n", "2226 , , kIx, kI\n", "2227 se se k3xPyFc4 k3\n", "2228 světlem světlo k1gNnSc7 k1\n", "2229 vstoupil vstoupit k5eAaPmAgInS k5\n", "2230 . . kIx. kI\n", "2231 – – k? k?\n", "2232 Lehký lehký k2eAgInSc1d1 k2\n", "2233 chod chod k1gInSc1 k1\n", "2234 nevzbudil vzbudit k5eNaPmAgInS k5\n", "2235 vězně vězeň k1gMnPc4 k1\n", "2236 z z k7c2 k7\n", "2237 strašných strašný k2eAgNnPc2d1 k2\n", "2238 zdání zdání k1gNnPc2 k1\n", "2239 . . kIx. kI\n", "2240 Od od k7c2 k7\n", "2241 sloupu sloup k1gInSc2 k1\n", "2242 k k k7c3 k7\n", "2243 sloupu sloup k1gInSc3 k1\n", "2244 lampy lampa k1gFnSc2 k1\n", "2245 svit svit k2eAgInSc1d1 k2\n", "2246 dlouh dlouh k1gInSc1 k1\n", "\n", "[2247 rows x 4 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pos = [tag[0:2] for tag in df[\"tag\"]]\n", "df[\"pos\"] = pos\n", "df" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
lemma
noc17
vězeň14
láska13
čas13
hora12
den11
myšlenka11
hlas11
voda10
jezero9
tvář8
stín8
sen8
hvězda7
ticho7
konec7
klín7
svět6
zrak6
kolo6
jiskra6
Jarmila6
hrůza5
vlna5
les5
vina5
měsíc5
otec5
sloup5
dívka5
......
pásek1
péro1
rameno1
rouška1
růž1
růžina1
sbor1
pata1
paměť1
padání1
místo1
mal1
malus1
mech1
milenec1
milost1
mladost1
mrákota1
msta1
nebesa1
Kapek1
nejvejš1
nohoum1
níže1
objetí1
obloha1
obraz1
okénko1
oud1
žíla1
\n", "

253 rows × 1 columns

\n", "
" ], "text/plain": [ " count\n", "lemma \n", "noc 17\n", "vězeň 14\n", "láska 13\n", "čas 13\n", "hora 12\n", "den 11\n", "myšlenka 11\n", "hlas 11\n", "voda 10\n", "jezero 9\n", "tvář 8\n", "stín 8\n", "sen 8\n", "hvězda 7\n", "ticho 7\n", "konec 7\n", "klín 7\n", "svět 6\n", "zrak 6\n", "kolo 6\n", "jiskra 6\n", "Jarmila 6\n", "hrůza 5\n", "vlna 5\n", "les 5\n", "vina 5\n", "měsíc 5\n", "otec 5\n", "sloup 5\n", "dívka 5\n", "... ...\n", "pásek 1\n", "péro 1\n", "rameno 1\n", "rouška 1\n", "růž 1\n", "růžina 1\n", "sbor 1\n", "pata 1\n", "paměť 1\n", "padání 1\n", "místo 1\n", "mal 1\n", "malus 1\n", "mech 1\n", "milenec 1\n", "milost 1\n", "mladost 1\n", "mrákota 1\n", "msta 1\n", "nebesa 1\n", "Kapek 1\n", "nejvejš 1\n", "nohoum 1\n", "níže 1\n", "objetí 1\n", "obloha 1\n", "obraz 1\n", "okénko 1\n", "oud 1\n", "žíla 1\n", "\n", "[253 rows x 1 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nouns = df[df[\"pos\"]==\"k1\"]\n", "verbs = df[df[\"pos\"]==\"k5\"]\n", "cn = nouns.groupby(by=\"lemma\").count()\n", "noun_lemmata = pd.DataFrame({\"count\":cn[\"word\"]})\n", "vn = verbs.groupby(by=\"lemma\").count()\n", "verb_lemmata = pd.DataFrame({\"count\":vn[\"word\"]})\n", "\n", "noun_lemmata.sort_values(\"count\", ascending=False)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
count
lemma
být25
hynout6
mít5
znít5
umírat5
jít4
hrát4
vinout4
kolit4
změnit3
vědět3
vstát3
sklesnout3
splynout3
zvát3
opírat2
zdát2
zdvíhat2
kráčet2
míhat2
padat2
zavraždit2
ležet2
odpravit2
lkánět2
rozestírat2
zajít2
přikrývat2
vidět2
nastávat2
......
smrtit1
roznítit1
spěchat1
spět1
stavit1
stopit1
stát1
stínit1
střídat1
růst1
rozlít1
osvítit1
prolést1
plynout1
podávat1
pohrávat1
pomstit1
potopit1
poznat1
pravit1
proniknout1
rozlíhat1
pronést1
prosvítat1
psát1
pět1
přebývat1
přimrazit1
rozložit1
skvít1
\n", "

191 rows × 1 columns

\n", "
" ], "text/plain": [ " count\n", "lemma \n", "být 25\n", "hynout 6\n", "mít 5\n", "znít 5\n", "umírat 5\n", "jít 4\n", "hrát 4\n", "vinout 4\n", "kolit 4\n", "změnit 3\n", "vědět 3\n", "vstát 3\n", "sklesnout 3\n", "splynout 3\n", "zvát 3\n", "opírat 2\n", "zdát 2\n", "zdvíhat 2\n", "kráčet 2\n", "míhat 2\n", "padat 2\n", "zavraždit 2\n", "ležet 2\n", "odpravit 2\n", "lkánět 2\n", "rozestírat 2\n", "zajít 2\n", "přikrývat 2\n", "vidět 2\n", "nastávat 2\n", "... ...\n", "smrtit 1\n", "roznítit 1\n", "spěchat 1\n", "spět 1\n", "stavit 1\n", "stopit 1\n", "stát 1\n", "stínit 1\n", "střídat 1\n", "růst 1\n", "rozlít 1\n", "osvítit 1\n", "prolést 1\n", "plynout 1\n", "podávat 1\n", "pohrávat 1\n", "pomstit 1\n", "potopit 1\n", "poznat 1\n", "pravit 1\n", "proniknout 1\n", "rozlíhat 1\n", "pronést 1\n", "prosvítat 1\n", "psát 1\n", "pět 1\n", "přebývat 1\n", "přimrazit 1\n", "rozložit 1\n", "skvít 1\n", "\n", "[191 rows x 1 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "verb_lemmata.sort_values(\"count\", ascending=False)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
wordlemmatagpos
625skrytaskrýtk5eAaPmNgFnSk5
702stopenstopitk5eAaPmNgInS,k5eAaImNgInSk5
712vrytvrýtk5eAaPmNgInSk5
765dándátk5eAaPmNgInSk5
1126odpravenodpravitk5eAaPmNgMnSk5
1166přimrazenpřimrazitk5eAaPmNgMnSk5
1258vyhnánvyhnatk5eAaPmNgMnSk5
1269zvánzvátk5eAaImNgMnSk5
1313dándátk5eAaPmNgInSk5
1319odpravenodpravitk5eAaPmNgMnSk5
1434vyvábenvyvábitk5eAaPmNgMnSk5
1960rozloženrozložitk5eAaPmNgInSk5
2061svitsvítk5eAaPmNgInSk5
2179uvedenuvéstk5eAaPmNgMnSk5
\n", "
" ], "text/plain": [ " word lemma tag pos\n", "625 skryta skrýt k5eAaPmNgFnS k5\n", "702 stopen stopit k5eAaPmNgInS,k5eAaImNgInS k5\n", "712 vryt vrýt k5eAaPmNgInS k5\n", "765 dán dát k5eAaPmNgInS k5\n", "1126 odpraven odpravit k5eAaPmNgMnS k5\n", "1166 přimrazen přimrazit k5eAaPmNgMnS k5\n", "1258 vyhnán vyhnat k5eAaPmNgMnS k5\n", "1269 zván zvát k5eAaImNgMnS k5\n", "1313 dán dát k5eAaPmNgInS k5\n", "1319 odpraven odpravit k5eAaPmNgMnS k5\n", "1434 vyváben vyvábit k5eAaPmNgMnS k5\n", "1960 rozložen rozložit k5eAaPmNgInS k5\n", "2061 svit svít k5eAaPmNgInS k5\n", "2179 uveden uvést k5eAaPmNgMnS k5" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "passive = df[df[\"tag\"].str.contains(\"mN\")]\n", "passive" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([1., 0., 0., 0., 0., 0., 1., 2., 1., 0., 0., 0., 2., 1., 3., 1., 0.,\n", " 0., 0., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n", " 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]),\n", " array([ 0. , 89.88, 179.76, 269.64, 359.52, 449.4 , 539.28,\n", " 629.16, 719.04, 808.92, 898.8 , 988.68, 1078.56, 1168.44,\n", " 1258.32, 1348.2 , 1438.08, 1527.96, 1617.84, 1707.72, 1797.6 ,\n", " 1887.48, 1977.36, 2067.24, 2157.12, 2247. , 2336.88, 2426.76,\n", " 2516.64, 2606.52, 2696.4 , 2786.28, 2876.16, 2966.04, 3055.92,\n", " 3145.8 , 3235.68, 3325.56, 3415.44, 3505.32, 3595.2 , 3685.08,\n", " 3774.96, 3864.84, 3954.72, 4044.6 , 4134.48, 4224.36, 4314.24,\n", " 4404.12, 4494. , 4583.88, 4673.76, 4763.64, 4853.52, 4943.4 ,\n", " 5033.28, 5123.16, 5213.04, 5302.92, 5392.8 , 5482.68, 5572.56,\n", " 5662.44, 5752.32, 5842.2 , 5932.08, 6021.96, 6111.84, 6201.72,\n", " 6291.6 , 6381.48, 6471.36, 6561.24, 6651.12, 6741. , 6830.88,\n", " 6920.76, 7010.64, 7100.52, 7190.4 , 7280.28, 7370.16, 7460.04,\n", " 7549.92, 7639.8 , 7729.68, 7819.56, 7909.44, 7999.32, 8089.2 ,\n", " 8179.08, 8268.96, 8358.84, 8448.72, 8538.6 , 8628.48, 8718.36,\n", " 8808.24, 8898.12, 8988. ]),\n", " )" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAD1ZJREFUeJzt3W2MHWd5xvH/VdtJeBNJ8ApSx2aDiCqFqpB0lQZRVREUyAvClRoko4qEFGSJggotUuWAFASfoKqghSAii6QkiEIgpNQFI+SWSMAHDGvXCYlNYIGUODKN84JDylvd3v1wJuZ4s86Z3T3rzT77/0lH+8zMszP3PJ69dnbOnHGqCklSW35ruQuQJI2f4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lq0Nrl2vD69etrcnJyuTYvSSvSnj17HqyqiVH9li3cJycnmZ6eXq7NS9KKlOQ/+/TzsowkNchwl6QGGe6S1CDDXZIaZLhLUoNGhnuS05J8K8kdSe5O8t45+pya5JYkM0l2J5lcimIlSf30OXP/FfDyqnox8BLgkiQXzerzJuCRqnoh8CHgA+MtU5I0HyPDvQYe6ybXda/Z/zffZuCmrn0r8IokGVuVkqR56XXNPcmaJPuAB4BdVbV7VpcNwH0AVXUUOAI8Z5yFSpL66/UJ1ar6X+AlSU4H/jnJ71bVXfPdWJKtwFaATZs2zffbV6TJbV861r73/ZcvYyWSVpN53S1TVT8FbgcumbXofmAjQJK1wLOBh+b4/u1VNVVVUxMTIx+NIElaoD53y0x0Z+wkeRrwSuC7s7rtAK7q2lcAX62q2dflJUknSZ/LMmcBNyVZw+CXwWer6otJ3gdMV9UO4Abgk0lmgIeBLUtWsSRppJHhXlV3AufPMf/aofYvgdeNtzRJ0kL5CVVJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNGhnuSTYmuT3J/iR3J3n7HH0uTnIkyb7ude3SlCtJ6mNtjz5HgXdW1d4kzwL2JNlVVftn9ft6Vb1m/CVKkuZr5Jl7VR2qqr1d+2fAAWDDUhcmSVq4eV1zTzIJnA/snmPxS5PckeTLSV50gu/fmmQ6yfThw4fnXawkqZ/e4Z7kmcDngXdU1aOzFu8Fnl9VLwY+AnxhrnVU1faqmqqqqYmJiYXWLEkaoVe4J1nHINg/VVW3zV5eVY9W1WNdeyewLsn6sVYqSeqtz90yAW4ADlTVB0/Q53ldP5Jc2K33oXEWKknqr8/dMi8D3gB8J8m+bt67gE0AVXU9cAXwliRHgV8AW6qqlqBeSVIPI8O9qr4BZESf64DrxlWUJGlx/ISqJDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoZLgn2Zjk9iT7k9yd5O1z9EmSDyeZSXJnkguWplxJUh9re/Q5CryzqvYmeRawJ8muqto/1OdS4Nzu9QfAx7qvkqRlMPLMvaoOVdXerv0z4ACwYVa3zcDNNfBN4PQkZ429WklSL/O65p5kEjgf2D1r0QbgvqHpgzzxF4Ak6STpc1kGgCTPBD4PvKOqHl3IxpJsBbYCbNq0aSGrWFKT2750rH3v+y8/ad8rSePW68w9yToGwf6pqrptji73AxuHps/u5h2nqrZX1VRVTU1MTCykXklSD33ulglwA3Cgqj54gm47gCu7u2YuAo5U1aEx1ilJmoc+l2VeBrwB+E6Sfd28dwGbAKrqemAncBkwA/wcuHr8pUqS+hoZ7lX1DSAj+hTw1nEVJUlaHD+hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEjwz3JjUkeSHLXCZZfnORIkn3d69rxlylJmo+1Pfp8ArgOuPlJ+ny9ql4zlookSYs28sy9qr4GPHwSapEkjcm4rrm/NMkdSb6c5EUn6pRka5LpJNOHDx8e06YlSbONI9z3As+vqhcDHwG+cKKOVbW9qqaqampiYmIMm5YkzWXR4V5Vj1bVY117J7AuyfpFVyZJWrBFh3uS5yVJ176wW+dDi12vJGnhRt4tk+TTwMXA+iQHgfcA6wCq6nrgCuAtSY4CvwC2VFUtWcWSpJFGhntVvX7E8usY3CopSXqK8BOqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgkeGe5MYkDyS56wTLk+TDSWaS3JnkgvGXKUmajz5n7p8ALnmS5ZcC53avrcDHFl+WJGkxRoZ7VX0NePhJumwGbq6BbwKnJzlrXAVKkuZvHNfcNwD3DU0f7OZJkpbJ2pO5sSRbGVy6YdOmTQtez+S2Lx1r3/v+yxdV0/C6xrWNpVhnn/UPr3N2n8WOk6TFGWdu9TGOM/f7gY1D02d3856gqrZX1VRVTU1MTIxh05KkuYwj3HcAV3Z3zVwEHKmqQ2NYryRpgUZelknyaeBiYH2Sg8B7gHUAVXU9sBO4DJgBfg5cvVTFSpL6GRnuVfX6EcsLeOvYKpIkLZqfUJWkBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktSgXuGe5JIk9ySZSbJtjuVvTHI4yb7u9ebxlypJ6mvtqA5J1gAfBV4JHAS+nWRHVe2f1fWWqnrbEtQoSZqnPmfuFwIzVfXDqvo18Blg89KWJUlajD7hvgG4b2j6YDdvtj9NcmeSW5NsnGtFSbYmmU4yffjw4QWUK0nqY1xvqP4rMFlVvwfsAm6aq1NVba+qqaqampiYGNOmJUmz9Qn3+4HhM/Gzu3nHVNVDVfWrbvLjwO+PpzxJ0kL0CfdvA+cmOSfJKcAWYMdwhyRnDU2+FjgwvhIlSfM18m6Zqjqa5G3AV4A1wI1VdXeS9wHTVbUD+MskrwWOAg8Db1zCmiVJI4wMd4Cq2gnsnDXv2qH2NcA14y1NkrRQfkJVkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhrUK9yTXJLkniQzSbbNsfzUJLd0y3cnmRx3oZKk/kaGe5I1wEeBS4HzgNcnOW9WtzcBj1TVC4EPAR8Yd6GSpP76nLlfCMxU1Q+r6tfAZ4DNs/psBm7q2rcCr0iS8ZUpSZqPPuG+AbhvaPpgN2/OPlV1FDgCPGccBUqS5m/tydxYkq3A1m7ysST3LHBV64EHAXISLgAtxTbGvM71wINPts6TMU5PIceODwGOx7CnxFgs8ufx+X069Qn3+4GNQ9Nnd/Pm6nMwyVrg2cBDs1dUVduB7X0KezJJpqtqarHraYXjcTzH43iOx2+sprHoc1nm28C5Sc5JcgqwBdgxq88O4KqufQXw1aqq8ZUpSZqPkWfuVXU0yduArwBrgBur6u4k7wOmq2oHcAPwySQzwMMMfgFIkpZJr2vuVbUT2Dlr3rVD7V8CrxtvaU9q0Zd2GuN4HM/xOJ7j8RurZizi1RNJao+PH5CkBq24cB/1KIQWJNmY5PYk+5PcneTt3fwzk+xK8v3u6xnd/CT5cDcmdya5YGhdV3X9v5/kqhNtcyVIsibJfyT5Yjd9Tve4i5nu8RendPNP+DiMJNd08+9J8url2ZPFS3J6kluTfDfJgSQvXa3HR5K/6n5O7kry6SSnreZj45iqWjEvBm/o/gB4AXAKcAdw3nLXtQT7eRZwQdd+FvA9Bo9++FtgWzd/G/CBrn0Z8GUgwEXA7m7+mcAPu69ndO0zlnv/FjEufw38E/DFbvqzwJaufT3wlq79F8D1XXsLcEvXPq87Zk4FzumOpTXLvV8LHIubgDd37VOA01fj8cHgA5Q/Ap42dEy8cTUfG4+/VtqZe59HIax4VXWoqvZ27Z8BBxgcxMOPebgJ+JOuvRm4uQa+CZye5Czg1cCuqnq4qh4BdgGXnMRdGZskZwOXAx/vpgO8nMHjLuCJ4zHX4zA2A5+pql9V1Y+AGQbH1IqS5NnAHzG4S42q+nVV/ZTVe3ysBZ7Wfcbm6cAhVumxMWylhXufRyE0pfuz8XxgN/DcqjrULfoJ8NyufaJxaWm8/h74G+D/uunnAD+tweMu4Ph9O9HjMFoZj3OAw8A/dpepPp7kGazC46Oq7gf+Dvgxg1A/Auxh9R4bx6y0cF9VkjwT+Dzwjqp6dHhZDf6WXBW3OiV5DfBAVe1Z7lqeItYCFwAfq6rzgf9mcBnmmNVyfHTvK2xm8Avvt4FnsDL/+hi7lRbufR6F0IQk6xgE+6eq6rZu9n91f07TfX2gm3+icWllvF4GvDbJvQwuxb0c+AcGlxce/6zG8L4d2+9Zj8NoZTwOAgeranc3fSuDsF+Nx8cfAz+qqsNV9T/AbQyOl9V6bByz0sK9z6MQVrzuGuANwIGq+uDQouHHPFwF/MvQ/Cu7uyIuAo50f55/BXhVkjO6M5xXdfNWlKq6pqrOrqpJBv/mX62qPwNuZ/C4C3jieMz1OIwdwJbujolzgHOBb52k3RibqvoJcF+S3+lmvQLYz+o8Pn4MXJTk6d3PzeNjsSqPjeMs9zu6830xeOf/ewzezX73ctezRPv4hwz+pL4T2Ne9LmNwbfDfge8D/wac2fUPg/9Q5QfAd4CpoXX9OYM3h2aAq5d738YwNhfzm7tlXsDgB3AG+Bxwajf/tG56plv+gqHvf3c3TvcAly73/ixiHF4CTHfHyBcY3O2yKo8P4L3Ad4G7gE8yuONl1R4bj7/8hKokNWilXZaRJPVguEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1KD/B1njRvZtD/yRAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "plt.hist(np.append(passive.index.values, [0, df.size]), bins=100)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "8988" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.size" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }