# LOGISTICKA REGRESE

heartdisease <- read.table("heartdisease.txt", header=TRUE,sep=",")

tobacco.kat <- cut(heartdisease$tobacco,c(-1,0,Inf),right=T,labels=c("Nekurak","Kurak"))

model2 <- glm(chd ~ tobacco.kat,
             family = binomial,
             data = heartdisease)

summary(model2)

## kategorialni + spojity prediktor

model3 <- glm(chd ~ tobacco.kat+sbp,
             family = binomial,
             data = heartdisease)

summary(model3)


## analyza deviance, se statistickym testem

# srovnani modelu a submodelu
# pozor, nemusi byt korektni pri vetsim poctu prediktoru

anova(model2,model3,test="Chisq")


####################################################################

# POISSONOVA REGRESE
c34 <- read.table("c34.csv", header=TRUE,sep=";")

# Incidence zhoubneho nadoru plic
# Rok
# Incidence
# Mortalita 


model1 <- glm(Incidence ~ Rok,
             family = poisson,
             data = c34)

summary(model1)


#Graf
plot(Incidence ~ Rok,
		 type="p",	
     data = c34,
     xlab = "Rok",
     ylab = "Incidence"
     )
    
lines(predict(model1,type = "response") ~ Rok,data=c34)


#Rezidua
plot(residuals(model1, type = "pearson") ~ c34$Rok,
     xlab = "Rok",
     ylab = "Pearson residual"
     )

#Cookovy vzdalenosti
plot(cooks.distance(model1)~c34$Rok,
		xlab = "Rok",
    ylab = "Cookova vzdálenost"
)


#overdisperze

model2 <- glm(Incidence ~ Rok,
             family = quasipoisson,
             data = c34)

summary(model2)


################
#predikce strednich hodnot
             
newRok <- data.frame(Rok = 1977:2015)

#namalovat jednou pro kazdy model
newfit <- predict(model1, newRok, se.fit=TRUE, type = "response")
newfit <- predict(model2, newRok, se.fit=TRUE, type = "response")

newRok$Incidence <- newfit$fit
newRok$IncidenceL <- newfit$fit-1.96*newfit$se.fit
newRok$IncidenceU <- newfit$fit+1.96*newfit$se.fit
##

plot(Incidence ~ Rok,
		 type="l",	
     data = newRok,
     xlab = "Rok",
     ylab = "Incidence"
     )
     
#dolni hranice intervalu spolehlivosti pro odhad stredni hodnoty     
lines(IncidenceL ~ Rok,
		 lty="dashed",
     data = newRok
     )
#horni hranice intervalu spolehlivosti pro odhad stredni hodnoty     
lines(IncidenceU ~ Rok,
		 lty="dashed",	
     data = newRok
     )    
     
points(Incidence ~ Rok,data=c34)



####################################################################
# KATEGORIALNI REGRESE

#nacteni dat
source("http://192.38.117.59/~linearpredictors/datafiles/readFibrosis.R")
fibrosis
library(MASS)


## vypusteni nekolika chybejicich
fibrosis.compl <- na.omit(fibrosis)

## model proporcionalnich sanci
propodds <- polr(stage ~ log2(ha) + log2(p3np) + log2(ykl40),
                 data = fibrosis.compl,
                 Hess = T ## not to recalculate Hessian later
                 )
                 
round(exp(cbind(Estimate = propodds$coef, confint.default(propodds))),
      digits = 2)