library(AER)
## Loading required package: car
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
library(ggplot2)
Načtení data:
data("mtcars")
Vizualizace dat:
ggplot(mtcars, aes(wt, mpg, color = factor(cyl), size = hp)) +
geom_point()
Modely:
mod1 <- mpg ~ wt
mod2 <- update(mod1, . ~ . + hp + cyl)
mod3 <- update(mod1, . ~ . + hp * cyl)
mod4 <- update(mod1, . ~ . + hp * factor(cyl))
mod5 <- update(mod4, . ~ . - wt + log(wt))
mod5
## mpg ~ hp + factor(cyl) + log(wt) + hp:factor(cyl)
Odhad modelu:
m1 <- lm(mod1, data = mtcars)
summary(m1)
##
## Call:
## lm(formula = mod1, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5432 -2.3647 -0.1252 1.4096 6.8727
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 37.2851 1.8776 19.858 < 2e-16 ***
## wt -5.3445 0.5591 -9.559 1.29e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.046 on 30 degrees of freedom
## Multiple R-squared: 0.7528, Adjusted R-squared: 0.7446
## F-statistic: 91.38 on 1 and 30 DF, p-value: 1.294e-10
Odhad více modelů:
models <- list(mod1, mod2, mod3, mod4, mod5)
estims <- lapply(models, function(x) lm(x, data = mtcars))
prezentace více modelů:
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2015). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2. http://CRAN.R-project.org/package=stargazer
stargazer(estims, type = "text")
##
## ======================================================================================================================================
## Dependent variable:
## ------------------------------------------------------------------------------------------------------------------
## mpg
## (1) (2) (3) (4) (5)
## --------------------------------------------------------------------------------------------------------------------------------------
## wt -5.344*** -3.167*** -3.120*** -3.060***
## (0.559) (0.741) (0.661) (0.683)
##
## hp -0.018 -0.164*** -0.099*** -0.092***
## (0.012) (0.052) (0.035) (0.032)
##
## cyl -0.942* -2.742***
## (0.551) (0.800)
##
## hp:cyl 0.019***
## (0.007)
##
## factor(cyl)6 -9.982* -8.102
## (5.769) (5.381)
##
## factor(cyl)8 -11.728** -10.051**
## (4.225) (3.970)
##
## log(wt) -10.794***
## (2.039)
##
## hp:factor(cyl)6 0.078 0.069
## (0.052) (0.048)
##
## hp:factor(cyl)8 0.086** 0.078**
## (0.037) (0.034)
##
## Constant 37.285*** 38.752*** 52.018*** 41.877*** 42.907***
## (1.878) (1.787) (4.917) (3.233) (3.021)
##
## --------------------------------------------------------------------------------------------------------------------------------------
## Observations 32 32 32 32 32
## R2 0.753 0.843 0.879 0.883 0.900
## Adjusted R2 0.745 0.826 0.862 0.854 0.876
## Residual Std. Error 3.046 (df = 30) 2.512 (df = 28) 2.242 (df = 27) 2.300 (df = 25) 2.121 (df = 25)
## F Statistic 91.375*** (df = 1; 30) 50.171*** (df = 3; 28) 49.251*** (df = 4; 27) 31.317*** (df = 6; 25) 37.569*** (df = 6; 25)
## ======================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
library(broom)
m5 <- estims[[5]]
tidy(m5)
## term estimate std.error statistic p.value
## 1 (Intercept) 42.90666182 3.02086997 14.203412 1.787500e-13
## 2 hp -0.09225948 0.03226566 -2.859370 8.442745e-03
## 3 factor(cyl)6 -8.10229605 5.38072716 -1.505799 1.446503e-01
## 4 factor(cyl)8 -10.05130192 3.97017464 -2.531703 1.801087e-02
## 5 log(wt) -10.79406355 2.03877336 -5.294391 1.743246e-05
## 6 hp:factor(cyl)6 0.06893945 0.04843774 1.423259 1.670218e-01
## 7 hp:factor(cyl)8 0.07812729 0.03426396 2.280160 3.139175e-02
glance(m5)
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.9001654 0.8762051 2.120552 37.56903 2.497248e-11 7 -65.50991
## AIC BIC deviance df.residual
## 1 147.0198 158.7457 112.4185 25
augment(m5)
## .rownames mpg hp factor.cyl. log.wt. .fitted .se.fit
## 1 Mazda RX4 21.0 110 6 0.9631743 21.84260 0.9890285
## 2 Mazda RX4 Wag 21.0 110 6 1.0560527 20.84006 0.9330202
## 3 Datsun 710 22.8 93 4 0.8415672 25.24260 0.7219892
## 4 Hornet 4 Drive 21.4 110 6 1.1678274 19.63356 0.9143613
## 5 Hornet Sportabout 18.7 175 8 1.2354715 17.04647 0.7436189
## 6 Valiant 18.1 105 6 1.2412686 18.95743 1.0260913
## 7 Duster 360 14.3 245 8 1.2725656 15.65682 0.7295476
## 8 Merc 240D 24.4 62 4 1.1600209 24.66523 1.2301590
## 9 Merc 230 22.8 95 4 1.1474025 21.75688 1.0028817
## 10 Merc 280 19.2 123 6 1.2354715 18.60024 0.8301657
## 11 Merc 280C 17.8 123 6 1.2354715 18.60024 0.8301657
## 12 Merc 450SE 16.4 180 8 1.4036430 15.16055 0.6628058
## 13 Merc 450SL 17.3 180 8 1.3164082 16.10217 0.6686205
## 14 Merc 450SLC 15.2 180 8 1.3297240 15.95844 0.6646656
## 15 Cadillac Fleetwood 10.4 205 8 1.6582281 12.05924 0.8164395
## 16 Lincoln Continental 10.4 215 8 1.6908336 11.56598 0.8664144
## 17 Chrysler Imperial 14.7 230 8 1.6761615 11.51236 0.8747715
## 18 Fiat 128 32.4 66 4 0.7884574 28.30688 0.8334929
## 19 Honda Civic 30.4 52 4 0.4793350 32.93520 1.2864065
## 20 Toyota Corolla 33.9 65 4 0.6070445 30.35732 0.9119299
## 21 Toyota Corona 21.5 97 4 0.9021918 24.21918 0.8029580
## 22 Dodge Challenger 15.5 150 8 1.2584610 17.15162 0.9165122
## 23 AMC Javelin 15.2 150 8 1.2340169 17.41547 0.9301737
## 24 Camaro Z28 13.3 245 8 1.3454724 14.86986 0.7031688
## 25 Pontiac Firebird 19.2 175 8 1.3467736 15.84507 0.6923763
## 26 Fiat X1-9 27.3 66 4 0.6601073 29.69230 0.8603249
## 27 Porsche 914-2 26.0 91 4 0.7608058 26.29886 0.7017866
## 28 Lotus Europa 30.4 113 4 0.4140944 28.01158 1.4721561
## 29 Ford Pantera L 15.8 264 8 1.1537316 16.67101 0.9581067
## 30 Ferrari Dino 19.7 175 6 1.0188473 19.72586 2.0459805
## 31 Maserati Bora 15.0 335 8 1.2725656 14.38492 1.5711799
## 32 Volvo 142E 21.4 109 4 1.0224509 21.81398 1.1169443
## .resid .hat .sigma .cooksd .std.resid
## 1 -0.84259798 0.21753037 2.155527 0.0080136432 -0.44919801
## 2 0.15993690 0.19359067 2.163974 0.0002419218 0.08398893
## 3 -2.44260086 0.11592141 2.098312 0.0281119652 -1.22506244
## 4 1.76643994 0.18592510 2.127063 0.0278106612 0.92324688
## 5 1.65353020 0.12297113 2.134059 0.0138868553 0.83263822
## 6 -0.85743088 0.23413931 2.155019 0.0093234637 -0.46203554
## 7 -1.35682045 0.11836125 2.144085 0.0089058886 -0.68144091
## 8 -0.26523481 0.33653082 2.163258 0.0017086324 -0.15355759
## 9 1.04312340 0.22366691 2.150745 0.0128286572 0.55829379
## 10 0.59975515 0.15326107 2.160186 0.0024427840 0.30736215
## 11 -0.80024485 0.15326107 2.156986 0.0043489344 -0.41010898
## 12 1.23944529 0.09769557 2.147828 0.0058563589 0.61532134
## 13 1.19782769 0.09941723 2.148888 0.0055873688 0.59522846
## 14 -0.75844098 0.09824459 2.158130 0.0022078979 -0.37664185
## 15 -1.65924256 0.14823487 2.132939 0.0178704165 -0.84781502
## 16 -1.16597514 0.16693741 2.148513 0.0103891802 -0.60242312
## 17 3.18763708 0.17017335 2.043014 0.0797735287 1.65016107
## 18 4.09312246 0.15449205 1.964302 0.1150229480 2.09916727
## 19 -2.53519708 0.36800928 2.064064 0.1881330460 -1.50386043
## 20 3.54268084 0.18493761 2.010599 0.1109971604 1.85049433
## 21 -2.71917694 0.14337981 2.079533 0.0458976636 -1.38546153
## 22 -1.65162412 0.18680082 2.131744 0.0244800116 -0.86370196
## 23 -2.21547490 0.19241122 2.104962 0.0460031727 -1.16258001
## 24 -1.56986013 0.10995665 2.137459 0.0108673927 -0.78470521
## 25 3.35493249 0.10660723 2.039401 0.0477610271 1.67383917
## 26 -2.39229597 0.16459903 2.097298 0.0428816439 -1.23429389
## 27 -0.29886303 0.10952478 2.163313 0.0003919380 -0.14935248
## 28 2.38842062 0.48195894 2.055548 0.3254674606 1.56487581
## 29 -0.87101075 0.20414093 2.155083 0.0077679973 -0.46042255
## 30 -0.02585828 0.93090487 2.164186 0.0041420382 -0.04639029
## 31 0.61507628 0.54897701 2.156189 0.0324353831 0.43189761
## 32 -0.41397864 0.27743762 2.161995 0.0028931757 -0.22966320
modelu m5
rozdělení náhodné složky (imho zbytečné):
qqPlot(m5)
linearita modelu:
raintest(m5)
##
## Rainbow test
##
## data: m5
## Rain = 0.51395, df1 = 16, df2 = 9, p-value = 0.8824
heteroskedasticita:
bptest(m5)
##
## studentized Breusch-Pagan test
##
## data: m5
## BP = 9.6483, df = 6, p-value = 0.1403
ncvTest(m5)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 2.943605 Df = 1 p = 0.08621809
autokorelace (tady zrovna nemá smysl):
durbinWatsonTest(m5)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.2029913 2.398143 0.36
## Alternative hypothesis: rho != 0
multikolinearita (problém, pokud VIF > 4):
vif(m5)
## GVIF Df GVIF^(1/(2*Df))
## hp 33.738297 1 5.808468
## factor(cyl) 612.000904 2 4.973797
## log(wt) 2.888584 1 1.699583
## hp:factor(cyl) 2129.740990 2 6.793314
coeftest(m5, vcov. = vcovHC) # vcovHAC pro heteroskedasticitu a autokorelaci (mnoho variant)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 42.906662 4.910080 8.7385 4.528e-09 ***
## hp -0.092259 0.053273 -1.7318 0.0956261 .
## factor(cyl)6 -8.102296 5.244885 -1.5448 0.1349605
## factor(cyl)8 -10.051302 5.599233 -1.7951 0.0847346 .
## log(wt) -10.794064 2.685770 -4.0190 0.0004719 ***
## hp:factor(cyl)6 0.068939 0.055434 1.2436 0.2251696
## hp:factor(cyl)8 0.078127 0.054332 1.4379 0.1628496
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
in-sample:
pred <- predict(m5)
ggplot(data.frame(skutecne = mtcars$mpg, predpoved = pred), aes(skutecne, predpoved)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
geom_smooth()
## `geom_smooth()` using method = 'loess'
out-of-sample:
set.seed(12345)
idx <- sample(nrow(mtcars), round(nrow(mtcars) / 2))
estim_sample <- mtcars[idx, ]
test_sample <- mtcars[-idx, ]
m6 <- lm(mod5, estim_sample)
predex <- predict(m6, newdata = test_sample)
ggplot(data.frame(skutecne = test_sample$mpg, predpoved = predex), aes(skutecne, predpoved)) +
geom_point() +
geom_abline(intercept = 0, slope = 1) +
geom_smooth()
## `geom_smooth()` using method = 'loess'
data:
data(iris)
vizualizace:
ggplot(iris, aes(Sepal.Length, Sepal.Width)) + geom_point() + geom_smooth(method = "lm")
ggplot(iris, aes(Sepal.Length, Sepal.Width, color = Species)) + geom_point() + geom_smooth(method = "lm")
iris1 <- Sepal.Width ~ Sepal.Length
est_iris1 <- lm(iris1, iris)
est_iris2 <- update(est_iris1, . ~ . * Species)
stargazer(est_iris1, est_iris2, type = "text")
##
## ==========================================================================
## Dependent variable:
## -------------------------------------------
## Sepal.Width
## (1) (2)
## --------------------------------------------------------------------------
## Sepal.Length -0.062 0.799***
## (0.043) (0.110)
##
## Speciesversicolor 1.442**
## (0.713)
##
## Speciesvirginica 2.016***
## (0.686)
##
## Sepal.Length:Speciesversicolor -0.479***
## (0.134)
##
## Sepal.Length:Speciesvirginica -0.567***
## (0.126)
##
## Constant 3.419*** -0.569
## (0.254) (0.554)
##
## --------------------------------------------------------------------------
## Observations 150 150
## R2 0.014 0.623
## Adjusted R2 0.007 0.610
## Residual Std. Error 0.434 (df = 148) 0.272 (df = 144)
## F Statistic 2.074 (df = 1; 148) 47.534*** (df = 5; 144)
## ==========================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
jako výše
jako výše
hypotézy o parametrech:
linearHypothesis(est_iris2, "Sepal.Length:Speciesversicolor = Sepal.Length:Speciesvirginica")
## Linear hypothesis test
##
## Hypothesis:
## Sepal.Length:Speciesversicolor - Sepal.Length:Speciesvirginica = 0
##
## Model 1: restricted model
## Model 2: Sepal.Width ~ Sepal.Length + Species + Sepal.Length:Species
##
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 145 10.741
## 2 144 10.680 1 0.060706 0.8185 0.3671
hypotézy o nested modelech:
anova(est_iris1, est_iris2)
## Analysis of Variance Table
##
## Model 1: Sepal.Width ~ Sepal.Length
## Model 2: Sepal.Width ~ Sepal.Length + Species + Sepal.Length:Species
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 148 27.916
## 2 144 10.680 4 17.236 58.098 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1