# LOGISTICKA REGRESE #nacteni souboru heartdisease <- read.table("heartdisease.txt", header=TRUE,sep=",") ### A retrospective sample of males in a heart-disease high-risk region ### of the Western Cape, South Africa. There are roughly two controls per ### case of CHD. Many of the CHD positive men have undergone blood ### pressure reduction treatment and other programs to reduce their risk ### factors after their CHD event. In some cases the measurements were ### made after these treatments. These data are taken from a larger ### dataset, described in Rousseauw et al, 1983, South African Medical ### Journal. ### ### sbp systolic blood pressure ### tobacco cumulative tobacco (kg) ### ldl low densiity lipoprotein cholesterol ### adiposity ### famhist family history of heart disease (Present, Absent) ### typea type-A behavior ### obesity ### alcohol current alcohol consumption ### age age at onset ### chd response, coronary heart disease # jednoduchy prehled promennych pairs(heartdisease) ## # jednoduchy model se spojitou promennou model1 <- glm(chd ~ tobacco, family = binomial, data = heartdisease) summary(model1) #odhad ODDS RATIO exp(coef(model1)[2]) ## znazorneni modelu newTob <- data.frame(tobacco=seq(0,40,.1)) predicted.probability <- predict(model1,newTob,type="resp") plot(predicted.probability ~ newTob$tobacco,type="l", xlab="Tobacco", ylab="Prob(CHD)", ylim=c(0,1)) points(heartdisease$tobacco,heartdisease$chd) # princip Goodness of Fit #kategorizace prediktoru Tob.group <- cut(heartdisease$tobacco,c(-1,2,4,6,8,10,15,20,100)) tb <- table(Tob.group, heartdisease$chd) rel.freq <- prop.table(tb,1)[,2] #relativni frekvence vysledku v kategoriich points(rel.freq~c(1,3,5,7,9,12.5,17.5,25),pch=5) ## Goodness of Fit statistic #library(Design) # v knihovne Franka Harrella library(rms) # v knihovne Franka Harrella des.model1 <- lrm(chd ~ tobacco, x=TRUE, y=TRUE, data=heartdisease) resid(des.model1, 'gof') ## OVERENI PREDPOKLADU MODELU #zakladni rezidua ve zobecnenem linearnim modelu plot(residuals(model1, type = "pearson") ~ heartdisease$tobacco, xlab = "Tobacco", ylab = "Pearson residual" ) #radeji s vyhlazenim scatter.smooth(residuals(model1, type = "pearson") ~ heartdisease$tobacco, xlab = "Tobacco", ylab = "Pearson residual" ) scatter.smooth(residuals(model1, type = "deviance") ~ heartdisease$tobacco, xlab = "Tobacco", ylab = "Deviance residual" ) #pakove body plot(hatvalues(model1)~fitted(model1), xlab = "Predicted probability", ylab = "Leverage" ) points(fitted(model1)[heartdisease$chd==1],hatvalues(model1)[heartdisease$chd==1], col="red") #bez ohledu na skutečný výsledek, váha pozorování s predikovanou vysokou nebo nízkou pravděpodobností je nízká #Cookovy vzdalenosti plot(cooks.distance(model1)~fitted(model1), xlab = "Predicted probability", ylab = "Cookova vzdálenost" ) points(fitted(model1)[heartdisease$chd==1],cooks.distance(model1)[heartdisease$chd==1], col="red") heartdisease[cooks.distance(model1) > 0.05,] boxplot(heartdisease$tobacco~heartdisease$chd) ## kategorialni prediktor tobacco.kat <- cut(heartdisease$tobacco,c(-1,0,Inf),right=T,labels=c("Nekurak","Kurak")) model2 <- glm(chd ~ tobacco.kat, family = binomial, data = heartdisease) summary(model2) ## kategorialni + spojity prediktor model3 <- glm(chd ~ tobacco.kat+sbp, family = binomial, data = heartdisease) summary(model3) ## analyza deviance, se statistickym testem # srovnani modelu a submodelu anova(model2,model3,test="Chisq")