#	Nacteme data
tabulka <- read.csv2 (file = "../data/data01.csv")

# Podivame se na strukturu
str (tabulka)
# Vsimneme si, ze obe promenne jsou brany jako kvantitativni

#	Bodovy graf
range (tabulka$x)
range (tabulka$Y)
plot (tabulka$x, tabulka$Y, type = "p", pch = 20, xlab = "x", ylab = "Y", xlim = c (0, 11), ylim = c (0, 200))

# Linearni modely
model3 <- lm (Y ~ 1 + x + I(x^2) + I(x^3), data = tabulka)
model2 <- lm (Y ~ 1 + x + I(x^2), data = tabulka)
model1 <- lm (Y ~ 1 + x, data = tabulka)
summary (model3)
summary (model2)
summary (model1)

# Porovnani modely pomoci ANOVy
anova (model3, model2)
# => prechodem od modelu 3 k modelu 2 se linearni model statisticky vyznamne nezhorsi 
anova (model2, model1)
# => prechodem od modelu 2 k modelu 1 se linearni model statisticky vyznamne zhorsi 
# => z teto trojice bychom meli zvolit model 2

# Vykreslime vsechny zavislosti do grafu 
xx <- seq (0, 11, by = 0.1)
Y3 <- predict (model3, data.frame (x = xx))
Y2 <- predict (model2, data.frame (x = xx))
Y1 <- predict (model1, data.frame (x = xx))
lines (xx, Y3, col = "red")
lines (xx, Y2, col = "green")
lines (xx, Y1, col = "blue")
legend ("topleft", legend = c ("m3", "m2", "m1"), col = c ("red", "green", "blue"), lty = c (1, 1, 1))

# Podivame se na QQ-ploty rezidui
boxplot (model3$residuals, model2$residuals, model1$residuals, names = c ("m3", "m2", "m1"), col = c ("red", "green", "blue"), ylab = "Rezidua")
par (mfrow = c (1, 3))
qqnorm (model3$residuals, col = "red")
qqline (model3$residuals, col = "red")
qqnorm (model2$residuals, col = "green")
qqline (model2$residuals, col = "green")
qqnorm (model1$residuals, col = "blue")
qqline (model1$residuals, col = "blue")
par (mfrow = c (1, 1))

# Pro zvoleny model jeste 2 overime podminky
r2 <- model2$residuals
ks.test ((r2 - mean (r2)) / sd (r2), "pnorm")
t.test (r2)
plot (tabulka$x, r2, xlab = "x", ylab = "Rezidua")