ipak <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}
packages <- c("ggplot2", "dplyr")
ipak(packages)
Hate_Crimes_selected <- read.csv("hate_crimes.csv") %>%
select(c(share_voters_voted_trump, share_white_poverty, median_household_income))
hist(Hate_Crimes_selected$share_voters_voted_trump,
main="Histogram podílu voličů Donalda Trumpa",
xlab="Podíl (v %)",
border="red",
col="darkblue",
xlim=c(0,1),
breaks=20)
plot(Hate_Crimes_selected$share_voters_voted_trump,
Hate_Crimes_selected$median_household_income,
main="Scatterplot podílu voličů Donalda Trumpa \n a mediánu ročního příjmu domácnosti",
xlab="Podíl voličů Donalda Trumpa",
ylab="Medián ročního
příjmu domácnosti (v $)",
pch=19)
qqnorm(Hate_Crimes_selected$share_voters_voted_trump)
cor(Hate_Crimes_selected)
share_voters_voted_trump share_white_poverty
share_voters_voted_trump 1.0000000 0.5528492
share_white_poverty 0.5528492 1.0000000
median_household_income -0.5975281 -0.8180451
median_household_income
share_voters_voted_trump -0.5975281
share_white_poverty -0.8180451
median_household_income 1.0000000
cor.test(Hate_Crimes_selected$share_voters_voted_trump, Hate_Crimes_selected$share_white_poverty, use = pairwise)
Pearson's product-moment correlation
data: Hate_Crimes_selected$share_voters_voted_trump and Hate_Crimes_selected$share_white_poverty
t = 4.6442, df = 49, p-value = 2.589e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.3271014 0.7189034
sample estimates:
cor
0.5528492
cor.test(Hate_Crimes_selected$share_white_poverty, Hate_Crimes_selected$median_household_income, use = pairwise)
Pearson's product-moment correlation
data: Hate_Crimes_selected$share_white_poverty and Hate_Crimes_selected$median_household_income
t = -9.9561, df = 49, p-value = 2.329e-13
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.8924377 -0.7003477
sample estimates:
cor
-0.8180451
cor.test(Hate_Crimes_selected$share_voters_voted_trump, Hate_Crimes_selected$median_household_income, use = pairwise)
Pearson's product-moment correlation
data: Hate_Crimes_selected$share_voters_voted_trump and Hate_Crimes_selected$median_household_income
t = -5.2163, df = 49, p-value = 3.669e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.7496651 -0.3854093
sample estimates:
cor
-0.5975281
library("Rcmdr")
RcmdrMisc::rcorr.adjust(Hate_Crimes_selected)
Pearson correlations:
share_voters_voted_trump share_white_poverty
share_voters_voted_trump 1.0000 0.5528
share_white_poverty 0.5528 1.0000
median_household_income -0.5975 -0.8180
median_household_income
share_voters_voted_trump -0.5975
share_white_poverty -0.8180
median_household_income 1.0000
Number of observations: 51
Pairwise two-sided p-values:
share_voters_voted_trump share_white_poverty
share_voters_voted_trump <.0001
share_white_poverty <.0001
median_household_income <.0001 <.0001
median_household_income
share_voters_voted_trump <.0001
share_white_poverty <.0001
median_household_income
Adjusted p-values (Holm's method)
share_voters_voted_trump share_white_poverty
share_voters_voted_trump <.0001
share_white_poverty <.0001
median_household_income <.0001 <.0001
median_household_income
share_voters_voted_trump <.0001
share_white_poverty <.0001
median_household_income
“It appeared from these experiments that the offspring did not tend to resemble their parent seeds in size, but to be always more mediocre than they-to be smaller than the parents, if the parents were large; to be larger than the parents, if the parents were very small.”
“The point of convergence was considerably below the average size of the seeds contained in the large bagful I bought at a nursery garden, out of which I selected those that were sown, and I had some reason to believe that the size of the seed towards which the produce converged was similar to that of an average seed taken out of beds of self-planted specimens.”
Galton, 1886, s. 246
\(Y' = a + bX\)
\(Y' = b_{0} + b_{1}X_{1}\)\(Y' = b_{0} + b_{n}X_{n}\)
\(Y' = b_{0} + b_{1}X_{1} + b_{2}X_{2} + ... + b_{n}X_{n}+ e\)Přímka (model) je proložena daty tak, aby jim co nejlépe odpovídala.
\(S_{T}\)\(^{2}\) = \(S_{M}\)\(^{2}\) + \(S_{R}\)\(^{2}\)
\(R^{2}\) = \(SS_{M}\)\(^{2}\) + \(SS_{T}\)\(^{2}\)
\(SS_{M}\)
\(SS_{R}\)
\(SS_{T}\)
\(R^{2}\)
Metoda nejmenších čtverců graficky
ModelHateCrime <- lm(formula = share_voters_voted_trump ~ share_white_poverty, data = Hate_Crimes_selected)
# Generic functions (summary) change their behaviour based on an object's class.
summary(ModelHateCrime)
Call:
lm(formula = share_voters_voted_trump ~ share_white_poverty,
data = Hate_Crimes_selected)
Residuals:
Min 1Q Median 3Q Max
-0.312542 -0.052037 0.007795 0.061240 0.214686
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2463 0.0543 4.536 3.72e-05 ***
share_white_poverty 2.6554 0.5718 4.644 2.59e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.09992 on 49 degrees of freedom
Multiple R-squared: 0.3056, Adjusted R-squared: 0.2915
F-statistic: 21.57 on 1 and 49 DF, p-value: 2.589e-05
anova(ModelHateCrime)
Analysis of Variance Table
Response: share_voters_voted_trump
Df Sum Sq Mean Sq F value Pr(>F)
share_white_poverty 1 0.21536 0.215355 21.569 2.589e-05 ***
Residuals 49 0.48924 0.009985
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
predict(ModelHateCrime)
1 2 3 4 5 6 7 8 9
0.5649769 0.4056510 0.4853139 0.5649769 0.4853139 0.4322053 0.4056510 0.4587596 0.3525424
10 11 12 13 14 15 16 17 18
0.5384226 0.4853139 0.4322053 0.5384226 0.4322053 0.5649769 0.4853139 0.5384226 0.6977485
19 20 21 22 23 24 25 26 27
0.5649769 0.5649769 0.4056510 0.4587596 0.4853139 0.3790967 0.6180855 0.4322053 0.5118683
28 29 30 31 32 33 34 35 36
0.4322053 0.4587596 0.4056510 0.4322053 0.5118683 0.5118683 0.5118683 0.4853139 0.5118683
37 38 39 40 41 42 43 44 45
0.5118683 0.5118683 0.4853139 0.4587596 0.4853139 0.4587596 0.5915312 0.4587596 0.4587596
46 47 48 49 50 51
0.5118683 0.4322053 0.4853139 0.6180855 0.4853139 0.4853139
\(b_{i}\)
\(β_{i}\)
\(b_{0}\)
QuantPsyc::lm.beta(ModelHateCrime)
share_white_poverty
0.5528492
American Psychological Association. (2001). Publication manual of the American Psychological Association (6th ed.). Washington, DC: APA.
Field, A. (2009). Discovering statistics using SPSS, 3th Ed. Los Angeles: Sage.
Fox, J. (2016). Applied Regression Analysis and Generalized Linear Models, 3th Ed. Los Angeles: Sage.
Galton, F. (1886). Regression towards mediocrity in hereditary stature. Journal of the Anthropological Institute, 15, pp. 246-63. Dostupné online z “http://galton.org/essays/1880-1889/galton-1886-jaigiregression-stature.pdf”
Robotková, A., & Ježek, S. (2012). Vícenásobná lineární regrese. Prezentace ke kurzu PSY252.
A work by Vit Gabrhel