Barvy

Author

MD

Published

March 20, 2023

# install instructions for RLing
#
# - [ ] install devtools
# - [ ] gunzip Rling_1.0.tar.gz
# - [ ] devtools::install_local("Rling_1.0.tar")

library(Rling)
#library(languageR)

# English 'black'

data(colreg)

colreg
       spoken fiction academic press
black   20335   41118    26892 73080
blue     4693   22093     3605 21210
brown    1185   10914     1201 11539
gray     1168   12140     1289  6559
green    3860   14398     4477 26837
orange    931    3496      474  5766
pink      962    7312      584  6356
purple    613    3366      429  3403
red      7230   25111     5621 34596
white   14474   40745    26336 54883
yellow   1349   10553     1855 10382

Stará debata

Derivační morfologie?

  • Levshina: základní nápad – Deviation of Proportions, DP (Gries 2008; Lijijt & Gries 2012)
  • více lingvistický přístup?
  • intuice:

. . Na obzoru se černaly mraky. Na obzoru se ??oranžovělo slunce.

. . Petr nabílil košili. Petr ??naoranžověl košili.

. . Petr začernil text. Petr ??zaokroval text.

  • DeriNET search: [pos=“A” lemma=“bílý”] [pos=“V”]
  • vs. [pos=“A” lemma=“okrový”] [pos=“V”]
  • DeriNET – v1

Korpusy

freqreg <- c(95385672, 90344134, 91044778, 187245672)

freqreg
[1]  95385672  90344134  91044778 187245672
exp_prop <- prop.table(freqreg)

exp_prop
[1] 0.2055636 0.1946987 0.1962086 0.4035291
mosaicplot(freqreg)

colreg[1,]
      spoken fiction academic press
black  20335   41118    26892 73080
black_obs <- prop.table(colreg[1,])

black_obs
         spoken   fiction  academic    press
black 0.1259718 0.2547189 0.1665913 0.452718
mosaicplot(black_obs)

DP_black <- sum(abs(black_obs - exp_prop))/2

DP_black
[1] 0.1092091
min(exp_prop)
[1] 0.1946987
DP_black_norm <- DP_black/(1 - min(exp_prop))

DP_black_norm
[1] 0.1356127

English “gray”

gray_obs <- prop.table(colreg[4,])

gray_obs
         spoken   fiction   academic     press
gray 0.05520892 0.5738325 0.06092834 0.3100303
DP_gray <- sum(abs(gray_obs - exp_prop))/2

DP_gray
[1] 0.3791338
DP_gray_norm <- DP_gray/(1 - min(exp_prop))

DP_gray_norm
[1] 0.4707974

Czech “černý”

cerny_obs <- c(363,181,175,124)

cerny_obs
[1] 363 181 175 124
freq_cz_reg <- c(1000000,1000000,1000000,1000000)

exp_cz_prop <- prop.table(freq_cz_reg)

exp_cz_prop
[1] 0.25 0.25 0.25 0.25
cerny_obs <- prop.table(cerny_obs)

cerny_obs 
[1] 0.4306050 0.2147094 0.2075919 0.1470937
DP_cerny <- sum(abs(cerny_obs - exp_cz_prop))/2

DP_cerny
[1] 0.180605
DP_cerny_norm <- DP_cerny/(1 - min(exp_cz_prop))

DP_cerny_norm
[1] 0.2408066
DP_black_norm
[1] 0.1356127

Czech “okrová”

okrovy_obs <- c(0,4,2,1)

okrovy_obs
[1] 0 4 2 1
freq_cz_reg <- c(1000000,1000000,1000000,1000000)

exp_cz_prop <- prop.table(freq_cz_reg)

exp_cz_prop
[1] 0.25 0.25 0.25 0.25
okrovy_obs <- prop.table(okrovy_obs)

okrovy_obs 
[1] 0.0000000 0.5714286 0.2857143 0.1428571
DP_okrovy <- sum(abs(okrovy_obs - exp_cz_prop))/2

DP_okrovy
[1] 0.3571429
DP_okrovy_norm <- DP_okrovy/(1 - min(exp_cz_prop))

DP_okrovy_norm
[1] 0.4761905
DP_black_norm
[1] 0.1356127
library(ggplot2)

labs <- c("academic","fiction","press","spoken")

label <- rep(c("okrovy","cerny"), each =4)

barvy <- c(okrovy_obs,cerny_obs)

df <- cbind(labs,barvy,label)
df <- as.data.frame(df) 

ggplot(data=df, aes(x=labs, y=barvy, fill=label)) +
geom_bar(stat="identity", position=position_dodge())

Czech “oranžový”

oranzovy_obs <- c(23,22,16,25)

oranzovy_obs
[1] 23 22 16 25
freq_cz_reg <- c(1000000,1000000,1000000,1000000)

exp_cz_prop <- prop.table(freq_cz_reg)

exp_cz_prop
[1] 0.25 0.25 0.25 0.25
oranzovy_obs <- prop.table(oranzovy_obs)

oranzovy_obs 
[1] 0.2674419 0.2558140 0.1860465 0.2906977
DP_oranzovy <- sum(abs(oranzovy_obs - exp_cz_prop))/2

DP_oranzovy
[1] 0.06395349
DP_oranzovy_norm <- DP_oranzovy/(1 - min(exp_cz_prop))

DP_oranzovy_norm
[1] 0.08527132
DP_black_norm
[1] 0.1356127
library(ggplot2)

labs <- c("academic","fiction","press","spoken")

label <- rep(c("oranzovy","cerny"), each =4)

barvy <- c(oranzovy_obs,cerny_obs)

df <- cbind(labs,barvy,label)
df <- as.data.frame(df) 

ggplot(data=df, aes(x=labs, y=barvy, fill=label)) +
geom_bar(stat="identity", position=position_dodge())

back to English

primcol <- colreg[c(1, 2, 5, 9:11),]

primcol
       spoken fiction academic press
black   20335   41118    26892 73080
blue     4693   22093     3605 21210
green    3860   14398     4477 26837
red      7230   25111     5621 34596
white   14474   40745    26336 54883
yellow   1349   10553     1855 10382
seccol <- colreg[-c(1, 2, 5, 9:11),]

seccol
       spoken fiction academic press
brown    1185   10914     1201 11539
gray     1168   12140     1289  6559
orange    931    3496      474  5766
pink      962    7312      584  6356
purple    613    3366      429  3403
primcol_sums <- colSums(primcol)

primcol_sums
  spoken  fiction academic    press 
   51941   154018    68786   220988 
seccol_sums <- colSums(seccol)

seccol_sums
  spoken  fiction academic    press 
    4859    37228     3977    33623 
primcol_obs <- prop.table(primcol_sums)

primcol_obs
   spoken   fiction  academic     press 
0.1047762 0.3106874 0.1387561 0.4457803 
seccol_obs <- prop.table(seccol_sums)

seccol_obs
    spoken    fiction   academic      press 
0.06097607 0.46717783 0.04990776 0.42193833 
DP_primcol <- sum(abs(primcol_obs - exp_prop))/2

DP_primcol
[1] 0.1582399
DP_seccol <- sum(abs(seccol_obs - exp_prop))/2

DP_seccol
[1] 0.2908884
DP_primcol_norm <- DP_primcol/(1 - min(exp_prop))

DP_primcol_norm
[1] 0.1964978
DP_seccol_norm <- DP_seccol/(1 - min(exp_prop))

DP_seccol_norm
[1] 0.3612168

References

Berlin, Brent, and Paul Kay. 1991. Basic Color Terms: Their Universality and Evolution. Univ of California Press.