# LOGISTICKA REGRESE heartdisease <- read.table("heartdisease.txt", header=TRUE,sep=",") tobacco.kat <- cut(heartdisease$tobacco,c(-1,0,Inf),right=T,labels=c("Nekurak","Kurak")) model2 <- glm(chd ~ tobacco.kat, family = binomial, data = heartdisease) summary(model2) ## kategorialni + spojity prediktor model3 <- glm(chd ~ tobacco.kat+sbp, family = binomial, data = heartdisease) summary(model3) ## analyza deviance, se statistickym testem # srovnani modelu a submodelu # pozor, nemusi byt korektni pri vetsim poctu prediktoru anova(model2,model3,test="Chisq") #################################################################### # POISSONOVA REGRESE c34 <- read.table("c34.csv", header=TRUE,sep=";") # Incidence zhoubneho nadoru plic # Rok # Incidence # Mortalita model1 <- glm(Incidence ~ Rok, family = poisson, data = c34) summary(model1) #Graf plot(Incidence ~ Rok, type="p", data = c34, xlab = "Rok", ylab = "Incidence" ) lines(predict(model1,type = "response") ~ Rok,data=c34) #Rezidua plot(residuals(model1, type = "pearson") ~ c34$Rok, xlab = "Rok", ylab = "Pearson residual" ) #Cookovy vzdalenosti plot(cooks.distance(model1)~c34$Rok, xlab = "Rok", ylab = "Cookova vzdálenost" ) #overdisperze model2 <- glm(Incidence ~ Rok, family = quasipoisson, data = c34) summary(model2) ################ #predikce strednich hodnot newRok <- data.frame(Rok = 1977:2015) #namalovat jednou pro kazdy model newfit <- predict(model1, newRok, se.fit=TRUE, type = "response") newfit <- predict(model2, newRok, se.fit=TRUE, type = "response") newRok$Incidence <- newfit$fit newRok$IncidenceL <- newfit$fit-1.96*newfit$se.fit newRok$IncidenceU <- newfit$fit+1.96*newfit$se.fit ## plot(Incidence ~ Rok, type="l", data = newRok, xlab = "Rok", ylab = "Incidence" ) #dolni hranice intervalu spolehlivosti pro odhad stredni hodnoty lines(IncidenceL ~ Rok, lty="dashed", data = newRok ) #horni hranice intervalu spolehlivosti pro odhad stredni hodnoty lines(IncidenceU ~ Rok, lty="dashed", data = newRok ) points(Incidence ~ Rok,data=c34) #################################################################### # KATEGORIALNI REGRESE #nacteni dat source("http://192.38.117.59/~linearpredictors/datafiles/readFibrosis.R") fibrosis library(MASS) ## vypusteni nekolika chybejicich fibrosis.compl <- na.omit(fibrosis) ## model proporcionalnich sanci propodds <- polr(stage ~ log2(ha) + log2(p3np) + log2(ykl40), data = fibrosis.compl, Hess = T ## not to recalculate Hessian later ) round(exp(cbind(Estimate = propodds$coef, confint.default(propodds))), digits = 2)