automobile <- read.table("automobile.txt", header=TRUE,sep=",") ## 1978 Automobile Data - no missings ## make Make and Model ## price Price ## mpg Mileage (mpg - mile per gallon) ## rep78 Repair Record 1978 ## headroom Headroom (in.) ## trunk Trunk space (cu. ft.) ## weight Weight (lbs.) ## length Length (in.) ## turn Turn Circle (ft.) ## displacement Displacement (cu. in.) ## gear_ratio Gear Ratio ## foreign Car type #transformace, aby to bylo srozumitelnejsi... # litru na 100 km <- mili na galon automobile$spotreba <- 1/automobile$mpg * 3.78541178/1.609344 * 100 attach(automobile) #neumi americani delat motory, nebo maji jen radi velka auta? pairs(automobile) stripchart(spotreba~foreign,vertical = TRUE,method = "jitter") stripchart(weight~foreign,vertical = TRUE,method = "jitter") ########## plot(spotreba ~ weight, xlab = "Hmotnost [lbs]", ylab = "Spotreba [l/100km]") model1 <- lm(spotreba ~ weight) summary(model1) #jednoducha cara abline(model1) #slozitejsi zpusob, potreba u slozitejsich modelu newweight <- data.frame(weight = seq(min(weight), max(weight), length.out = 100)) lines(predict(model1, newweight) ~ newweight$weight) ################################################################# #odhad modelu model1 <- lm(spotreba ~ weight,model = TRUE, x = TRUE, y = TRUE) class(model1) model1 summary(model1) #z objektu lze vyndat jeho jednotlive komponenty model1$coefficients coef(model1) model1$residuals resid(model1) model1$fitted.values fitted(model1) #matice planu model1$x #matice vysledku model1$y #neco malo maticove algebry #indexace model1$x[1,] #transpozice t(model1$x) #nasobeni t(model1$x) %*% model1$x #inverze solve(t(model1$x) %*% model1$x) #OLS odhad ( solve(t(model1$x) %*% model1$x) ) %*% ( t(model1$x) %*% model1$y ) #srovnani model1$coefficients #rezidualni soucet ctvercu sum(resid(model1)^2) #pocet stupnu volnosti model1$df.residual #odhad rozptylu modelu sum(resid(model1)^2) / model1$df.residual #odhad smerodatne odchylky modelu - "residual standard error" sqrt( sum(resid(model1)^2) / model1$df.residual ) ################################################################# # model s kategorialnim prediktorem model2 <- lm(spotreba ~ foreign,model = TRUE, x = TRUE, y = TRUE) summary(model2) #coz odpovida prumerum ve skupinach tapply(spotreba,foreign,mean) #nez. promenna foreign #matice planu model2$x ################################################################# # model s obema prediktory model3 <- lm(spotreba ~ weight+foreign,model = TRUE, x = TRUE, y = TRUE) summary(model3) model3$x plot(spotreba ~ weight, xlab = "Hmotnost [lbs]", ylab = "Spotreba [l/100km]", type="n") points(weight[foreign=="Domestic"],spotreba[foreign=="Domestic"],pch=1) points(weight[foreign=="Foreign"],spotreba[foreign=="Foreign"],pch=2) #slozitejsi zpusob, potreba u slozitejsich modelu newweightDom <- data.frame(weight = seq(min(weight), max(weight), length.out = 100), foreign = rep("Domestic",100)) newweightFor <- data.frame(weight = seq(min(weight), max(weight), length.out = 100), foreign = rep("Foreign",100)) lines(predict(model3, newweightDom) ~ newweightDom$weight) lines(predict(model3, newweightFor) ~ newweightFor$weight,lty=2) #F test XX <- t(model3$x) %*% model3$x XX_inv = solve(XX) V <- XX_inv[2:3,2:3] V_inv <- solve(V) beta <- as.matrix(model3$coefficient[2:3]) s2 <- sum(resid(model3)^2) / model3$df.residual Fstat <- 1/(s2*2) * ( t(beta) %*% V_inv %*% beta ) summary(model3) #p-hodnota pro vyznamnost modelu 1-pf(Fstat,2,66) ################################################################# # problem multikolinearity pairs(data.frame(weight,length,foreign,spotreba)) cor(weight,length) model4 <- lm(spotreba ~ weight+length+foreign,model = TRUE, x = TRUE, y = TRUE) summary(model4) summary(model3) #korelace odhadu parametru summary(model4,correlation=TRUE) #variance inflation faktor - nad 10 velky problem #library(faraway) vif(model4)