# install instructions for RLing
#
# - [ ] install devtools
# - [ ] gunzip Rling_1.0.tar.gz
# - [ ] devtools::install_local("Rling_1.0.tar")
# - [ ] also works: install.packages(pkgs = file.choose(), repos = NULL, type = "source")
library (Rling)
#library(languageR)
# English 'black'
data (colreg)
colreg
spoken fiction academic press
black 20335 41118 26892 73080
blue 4693 22093 3605 21210
brown 1185 10914 1201 11539
gray 1168 12140 1289 6559
green 3860 14398 4477 26837
orange 931 3496 474 5766
pink 962 7312 584 6356
purple 613 3366 429 3403
red 7230 25111 5621 34596
white 14474 40745 26336 54883
yellow 1349 10553 1855 10382
Derivační morfologie?
Levshina: základní nápad – Deviation of Proportions, DP (Gries 2008; Lijijt & Gries 2012)
více lingvistický přístup?
intuice:
. . Na obzoru se černaly mraky. Na obzoru se ??oranžovělo slunce.
. . Petr nabílil košili. Petr ??naoranžověl košili.
. . Petr začernil text. Petr ??zaokroval text.
DeriNET search: [pos=“A” lemma=“bílý”] [pos=“V”]
vs. [pos=“A” lemma=“okrový”] [pos=“V”]
DeriNET – v1
Korpusy
freqreg <- c (95385672 , 90344134 , 91044778 , 187245672 )
freqreg
[1] 95385672 90344134 91044778 187245672
exp_prop <- prop.table (freqreg)
exp_prop
[1] 0.2055636 0.1946987 0.1962086 0.4035291
spoken fiction academic press
black 20335 41118 26892 73080
black_obs <- prop.table (colreg[1 ,])
black_obs
spoken fiction academic press
black 0.1259718 0.2547189 0.1665913 0.452718
DP_black <- sum (abs (black_obs - exp_prop))/ 2
DP_black
DP_black_norm <- DP_black/ (1 - min (exp_prop))
DP_black_norm
English “gray”
gray_obs <- prop.table (colreg[4 ,])
gray_obs
spoken fiction academic press
gray 0.05520892 0.5738325 0.06092834 0.3100303
DP_gray <- sum (abs (gray_obs - exp_prop))/ 2
DP_gray
DP_gray_norm <- DP_gray/ (1 - min (exp_prop))
DP_gray_norm
Czech “černý”
cerny_obs <- c (363 ,181 ,175 ,124 )
cerny_obs
freq_cz_reg <- c (1000000 ,1000000 ,1000000 ,1000000 )
exp_cz_prop <- prop.table (freq_cz_reg)
exp_cz_prop
cerny_obs <- prop.table (cerny_obs)
cerny_obs
[1] 0.4306050 0.2147094 0.2075919 0.1470937
DP_cerny <- sum (abs (cerny_obs - exp_cz_prop))/ 2
DP_cerny
DP_cerny_norm <- DP_cerny/ (1 - min (exp_cz_prop))
DP_cerny_norm
Czech “okrová”
okrovy_obs <- c (0 ,4 ,2 ,1 )
okrovy_obs
freq_cz_reg <- c (1000000 ,1000000 ,1000000 ,1000000 )
exp_cz_prop <- prop.table (freq_cz_reg)
exp_cz_prop
okrovy_obs <- prop.table (okrovy_obs)
okrovy_obs
[1] 0.0000000 0.5714286 0.2857143 0.1428571
DP_okrovy <- sum (abs (okrovy_obs - exp_cz_prop))/ 2
DP_okrovy
DP_okrovy_norm <- DP_okrovy/ (1 - min (exp_cz_prop))
DP_okrovy_norm
library (ggplot2)
labs <- c ("academic" ,"fiction" ,"press" ,"spoken" )
label <- rep (c ("okrovy" ,"cerny" ), each = 4 )
barvy <- c (okrovy_obs,cerny_obs)
df <- cbind (labs,barvy,label)
df <- as.data.frame (df)
ggplot (data= df, aes (x= labs, y= barvy, fill= label)) +
geom_bar (stat= "identity" , position= position_dodge ())
Czech “oranžový”
oranzovy_obs <- c (23 ,22 ,16 ,25 )
oranzovy_obs
freq_cz_reg <- c (1000000 ,1000000 ,1000000 ,1000000 )
exp_cz_prop <- prop.table (freq_cz_reg)
exp_cz_prop
oranzovy_obs <- prop.table (oranzovy_obs)
oranzovy_obs
[1] 0.2674419 0.2558140 0.1860465 0.2906977
DP_oranzovy <- sum (abs (oranzovy_obs - exp_cz_prop))/ 2
DP_oranzovy
DP_oranzovy_norm <- DP_oranzovy/ (1 - min (exp_cz_prop))
DP_oranzovy_norm
library (ggplot2)
labs <- c ("academic" ,"fiction" ,"press" ,"spoken" )
label <- rep (c ("oranzovy" ,"cerny" ), each = 4 )
barvy <- c (oranzovy_obs,cerny_obs)
df <- cbind (labs,barvy,label)
df <- as.data.frame (df)
ggplot (data= df, aes (x= labs, y= barvy, fill= label)) +
geom_bar (stat= "identity" , position= position_dodge ())
back to English
primcol <- colreg[c (1 , 2 , 5 , 9 : 11 ),]
primcol
spoken fiction academic press
black 20335 41118 26892 73080
blue 4693 22093 3605 21210
green 3860 14398 4477 26837
red 7230 25111 5621 34596
white 14474 40745 26336 54883
yellow 1349 10553 1855 10382
seccol <- colreg[- c (1 , 2 , 5 , 9 : 11 ),]
seccol
spoken fiction academic press
brown 1185 10914 1201 11539
gray 1168 12140 1289 6559
orange 931 3496 474 5766
pink 962 7312 584 6356
purple 613 3366 429 3403
primcol_sums <- colSums (primcol)
primcol_sums
spoken fiction academic press
51941 154018 68786 220988
seccol_sums <- colSums (seccol)
seccol_sums
spoken fiction academic press
4859 37228 3977 33623
primcol_obs <- prop.table (primcol_sums)
primcol_obs
spoken fiction academic press
0.1047762 0.3106874 0.1387561 0.4457803
seccol_obs <- prop.table (seccol_sums)
seccol_obs
spoken fiction academic press
0.06097607 0.46717783 0.04990776 0.42193833
DP_primcol <- sum (abs (primcol_obs - exp_prop))/ 2
DP_primcol
DP_seccol <- sum (abs (seccol_obs - exp_prop))/ 2
DP_seccol
DP_primcol_norm <- DP_primcol/ (1 - min (exp_prop))
DP_primcol_norm
DP_seccol_norm <- DP_seccol/ (1 - min (exp_prop))
DP_seccol_norm
Domácí úloha
kombinace primárních a sekundárních barev
primarni_barvy <- c ("černá" , "modrá" , "zelená" , "červená" , "bílá" , "žlutá" )
sekundární_barvy <- c ("hnědá" , "šedá" , "oranžová" , "růžová" , "fialová" )
# let's make all possible pairs of primary and secondary colors
kombinace <- expand.grid (primarni_barvy, sekundární_barvy)
print (kombinace)
Var1 Var2
1 černá hnědá
2 modrá hnědá
3 zelená hnědá
4 červená hnědá
5 bílá hnědá
6 žlutá hnědá
7 černá šedá
8 modrá šedá
9 zelená šedá
10 červená šedá
11 bílá šedá
12 žlutá šedá
13 černá oranžová
14 modrá oranžová
15 zelená oranžová
16 červená oranžová
17 bílá oranžová
18 žlutá oranžová
19 černá růžová
20 modrá růžová
21 zelená růžová
22 červená růžová
23 bílá růžová
24 žlutá růžová
25 černá fialová
26 modrá fialová
27 zelená fialová
28 červená fialová
29 bílá fialová
30 žlutá fialová
# add name of student as third row and save it as csv
kombinace <- cbind (kombinace, "name" = "name" )
write.csv (kombinace, "zadani_barev.csv" )
References
Berlin, Brent, and Paul Kay. 1991. Basic Color Terms: Their Universality and Evolution . Univ of California Press.