#Prevzato z knihy: Andersen, P. K., Skovgaard, L. T. (2010) Regression with Linear Predictors, Springer.

##   Variable list:
##   
##   age: Age of the patient
##   
##   surgtype: Type of surgery
##           1: Orthopedic
##           2: Gynecological
##           3: Abdominal
##   
##   blocking: Neuromuscular blocking agent
##           1: Pancuronium
##           2: Vecuronium
##           3: Atracurium
##   
##   longact: Is the blocking agent long-acting?
##           0: No
##           1: Yes
##   
##   duration: Duration of the operation in minutes
##   
##   tofratio: The ratio of T4/T1 in a train-of-four stimulation
##   
##   complication: Does the patient experience a pulmonary complication?
##           0: No
##           1: Yes


surgery <- read.csv2("http://staff.pubhealth.ku.dk/~linearpredictors/datafiles/Surgery.csv",               
                     sep = ";",                                                                            
                     dec = ".",                                                                            
                     header = TRUE,                                                                        
                     colClasses =                                                                          
                     ## c("age", "surgtype", "blocking", "longact", "duration", "tofratio", "complication")
                     c("numeric", "factor", "factor","factor", "numeric","numeric","factor"),              
                     na.strings = "."                                                                      
                     )       
levels(surgery$blocking) <- c("Pancuronium", "Vecuronium", "Atracurium")
levels(surgery$surgtype) <- c("Orthopedic", "Gynecological", "Abdominal")


## number of complications in each NBA group
t <- table(surgery$blocking, surgery$complication, dnn = c("NBA", "Complication"))
t <- addmargins(t)

## percent complications
t[,"1"]/t[,"Sum"]*100

## number of the different surgeries in each NBA group
cts <- table(surgery$surgtype, surgery$blocking, dnn = c("Surgery", "NBA"))
addmargins(cts)

## percents within surgery type
ps <- apply(cts, 1, function(x)x/sum(x)*100)
t(addmargins(ps))





## Logistic regression models, setting reference level
surgery$blocking <- relevel(surgery$blocking, ref = 2)

#pokus o zjisteni nelinearity - linearni splajn
model1 <- glm(complication ~ I(age/10) + ifelse(age > 60, (age-60)/10,0) + surgtype + blocking,
                   family = binomial,
                   data = surgery)

summary(model1)


#bez nelinearity
model2 <- glm(complication ~ I(age/10) + surgtype + blocking,
             family = binomial,
             data = surgery)                                                                              
             
summary(model2)

#ODHADY ODDS RATIOS, VCETNE INTERVALU SPOLEHLIVOSTI
round(
	exp(cbind(coef(model2),confint(model2))),
	2)


#ANALYZA REZIDUI, HLEDANI MOZNYCH INTERAKCI


surgery$res <- residuals(model2, type = "pearson")
plot(res ~ age,
     data = surgery,
     xlab = "Age",
     ylab = "Pearson residual",
     col = "grey"
     )

## smoother for each surgery type
s <- split(surgery,surgery$surgtype)
smoother <- function(d){
  newage <- seq(min(d$age), max(d$age), length.out = 200)
  lines(predict(loess(res ~ age, data = d), data.frame(age = newage)) ~ newage)
}
lapply(s, smoother)

#to nevypada na interakci mezi vekem a typem operace, zkusime i model

model3 <- glm(complication ~ I(age/10):surgtype + surgtype + blocking - 1,
             family = binomial,
             data = surgery)
             

## the summary includes p-values for significance of terms
summary(model3) 
anova(model3,model2,test="Chisq")
#efekt je skutecne nevyznamny



#INTERPRETACE

## linear effect of age
model1 <- glm(complication ~ surgtype + blocking + age, family = binomial, data = surgery)
exp(cbind(model1$coef, confint(model1)))

model1a <- glm(complication ~ surgtype + relevel(blocking, ref=3) + age, family = binomial,
               data = surgery)
exp(cbind(model1a$coef, confint(model1a)))

## linear effects differing before and after age 60
model2 <- glm(complication ~ surgtype + blocking + age + I(ifelse(age > 60, age - 60, 0)),
              family = binomial, data = surgery)
exp(cbind(model2$coef, confint(model2)))

model2a <- glm(complication ~ surgtype + relevel(blocking, ref = 3) + age + I(ifelse(age > 60, age - 60, 0)),
               family = binomial, data = surgery)
exp(cbind(model2a$coef, confint(model2a)))

## linear effect of age, according to surgery
model3 <- glm(complication ~ age:surgtype + surgtype + blocking - 1,
             family = binomial,
             data = surgery)

exp(cbind(model3$coef, confint(model3)))

model3a <- glm(complication ~ age:surgtype + surgtype + relevel(blocking, ref=3) - 1,
             family = binomial,
             data = surgery)

exp(cbind(model3a$coef, confint(model3a)))



#BEZ OHLEDU NA MODEL VYCHÁZÍ RIZIKO SPOJENÉ S JEDNOTLIVÝMI MOLEKULAMI PODOBNĚ





##############INTERMEDIATY

#longact - Pancuronium
model4 <- glm(complication ~ surgtype + I((age-60)/10) + I(duration/60-1) +
             longact:I((tofratio-0.7)*10) + longact,
             family = binomial,
             data = surgery)

summary(model4)

round(
	exp(cbind(coef(model4),confint(model4))),
	2)

#chybejici hodnoty pro jednoduchost vypustime
used.data <- na.omit(surgery)

par(mfrow = c(1,2))

used.data$res <- residuals(model4, type = "pearson")
plot(res ~ tofratio,
     data = used.data,
     xlab = "TOF-ratio",
     ylab = "Pearson residual",
     col = "grey"
     )

  ## solid = long acting, dashed = short acting
linetypes <- c("43", "solid")
s <<- split(used.data, used.data$longact)
smoother <- function(d){
  newtof <- seq(min(d$tofratio), max(d$tofratio), length.out = 200)
  lines(predict(loess(res ~ tofratio, data = d, degree = 1),
                data.frame(tofratio = newtof)) ~ newtof,
        lty = linetypes[unique(unclass(d$longact))])
}
lapply(s, smoother)

plot(res ~ duration,
     data = used.data,
     xlab = "Duration of anesthesia",
     xlim = c(0,500),
     ylab = "Pearson residual",
     col = "grey"
     )

smoother <- function(d){
  newdur <- seq(min(d$duration), max(d$duration), length.out = 200)
  lines(predict(loess(res ~ duration, data = d, degree = 1),
                data.frame(duration = newdur)) ~ newdur,
        lty = linetypes[unique(unclass(d$longact))])
}
lapply(s, smoother)






#test KALIBRACE - hosmer lemeshow gof test
#decily rizika

used.data$predicted <- model4$fitted



## choosing a type of quantiles which gives the same grouping as in SAS
used.data$decile <- cut(used.data$predicted,
                        breaks = quantile(used.data$predicted, seq(0,1,0.1), type = 6),
                        include.lowest = T, labels = 1:10)
#zobrazeni decilu
used.data[order(used.data$predicted),]



cross <- xtabs(~ complication + decile, data = used.data)
print(addmargins(cross))

O <- cross[2,]
E <- tapply(used.data$predicted, used.data$decile, sum)


HLtab <- cbind(colSums(cross[1:2,]), O, E, (O-E)/sqrt(E))
#chi kvadrat statistika
sum(HLtab[,4]^2)

1-pchisq(3.877446,8)
#8 d.f. -> P=0.87






#diagnostika

## cooks distance
plot(cooks.distance(model4) ~ tofratio,
     data = used.data,
     xlab = "tofratio",
     ylab = "Cook's distance",
     pch = ifelse(used.data$complication == 1, 4, 21)
     ## cross = complication, o = no complication.
     )
#jeden pacient s vysokym rizikem bez komplikace
used.data[cooks.distance(model4)>0.1,]  
     
     
     
     
     
     
     
#vysledky finalniho modelu
#nemame efekt TOF pro kratce pusobici

surgery$age60 <- (surgery$age-60)/10
surgery$dur60 <- surgery$duration/60-1
surgery$tof07 <- (surgery$tofratio-0.7)*10

model <- glm(complication ~ surgtype + age60 + dur60 +
             longact + I((as.numeric(longact)-1)*tof07),
             family = binomial,
             data = surgery)

used.data <- surgery[rownames(model$model),]



plot(0,0, type = "n",
     ylim = c(0,.8),
     xlim = range(used.data$tofratio) - c(0.02,0),
     xlab = "TOF-ratio",
     ylab = "Estimated probability")

plot.fitted <- function(d){
  n <- 100
  newtof07 <- seq(min(d$tof07), max(d$tof07), length.out = n)
  newdata <- data.frame(age60 = rep(0,n), dur60 = rep(1,n), tof07 = newtof07,
                        surgtype = rep(unique(d$surgtype),n), longact = rep(unique(d$longact),n))
  newtof <- newtof07/10+0.7
  predicted <- predict(model, newdata = newdata, type = "response")
  
  lines(newtof, predicted,
        lty = ifelse(unique(d$longact) == 0, "solid", "32"))
  
  ## surgtypes
  ## 1='orthopaedic'
  ## 2='gynaecological'
  ## 3='abdominal'
  
  text(newtof[1], predicted[1], labels = c("o", "g", "a")[unique(d$surgtype)],
       adj = c(1.7,0.5), 
       cex = 0.7
       )
}

## plot fitted curces for longact*surgerytype groups
lapply(split(used.data, list(used.data$surgtype, used.data$longact)), plot.fitted)



#finalni odhady
summary(model)
exp(cbind(model$coef, confint(model)))







#JEN POKUSY O ALTERNATIVNI VIZUALIZACI


#logodds

plot(0,0, type = "n",
     xlim = range(used.data$tofratio) - c(0.02,0),
     ylim=c(-6,2),
     xlab = "TOF-ratio",
     ylab = "Estimated log-odds")

plot.fitted <- function(d){
  n <- 100
  newtof07 <- seq(min(d$tof07), max(d$tof07), length.out = n)
  newdata <- data.frame(age60 = rep(0,n), dur60 = rep(1,n), tof07 = newtof07,
                        surgtype = rep(unique(d$surgtype),n), longact = rep(unique(d$longact),n))
  newtof <- newtof07/10+0.7
  predicted <- predict(model, newdata = newdata)
  
  lines(newtof, predicted,
        lty = ifelse(unique(d$longact) == 0, "solid", "32"))
  
  ## surgtypes
  ## 1='orthopaedic'
  ## 2='gynaecological'
  ## 3='abdominal'
  
  text(newtof[1], predicted[1], labels = c("o", "g", "a")[unique(d$surgtype)],
       adj = c(1.7,0.5), 
       cex = 0.7
       )
}

## plot fitted curces for longact*surgerytype groups
lapply(split(used.data, list(used.data$surgtype, used.data$longact)), plot.fitted)



#odds

plot(0,0, type = "n",
     xlim = range(used.data$tofratio) - c(0.02,0),
     ylim=c(0,3),
     xlab = "TOF-ratio",
     ylab = "Estimated odds")

plot.fitted <- function(d){
  n <- 100
  newtof07 <- seq(min(d$tof07), max(d$tof07), length.out = n)
  newdata <- data.frame(age60 = rep(0,n), dur60 = rep(1,n), tof07 = newtof07,
                        surgtype = rep(unique(d$surgtype),n), longact = rep(unique(d$longact),n))
  newtof <- newtof07/10+0.7
  predicted <- exp(predict(model, newdata = newdata))
  
  lines(newtof, predicted,
        lty = ifelse(unique(d$longact) == 0, "solid", "32"))
  
  ## surgtypes
  ## 1='orthopaedic'
  ## 2='gynaecological'
  ## 3='abdominal'
  
  text(newtof[1], predicted[1], labels = c("o", "g", "a")[unique(d$surgtype)],
       adj = c(1.7,0.5), 
       cex = 0.7
       )
}

## plot fitted curces for longact*surgerytype groups
lapply(split(used.data, list(used.data$surgtype, used.data$longact)), plot.fitted)