# Dr. Pablo Emilio Verde
# Julio 2015 
# Barcelona Summer School

# Lecture 1 
# Practical 1: 

# Deterministic analysis ..................................................

theta <- c(0.25, 0.5, 0.75)
prior <- rep(1/3, 3) 
#prior <- prior/sum(prior)
plot(theta, prior, type = "h", ylim=c(0,.6),
          ylab="Probability",
          xlab = expression(theta))

lik <- function(theta, x)
    {
      theta^x*(1-theta)^(1-x)
    }

lik(theta, 1)
  
curve( lik(x, 1)/sum(lik(theta, 1)),
       from=0, to =1, add=TRUE, 
       col="red", lwd=2, lty=2)

product <- prior * lik(theta, 1)
product

posterior <- product / sum(product)

points(theta, posterior, col="blue", cex=2, pch=19)
  
legend(0.3, 0.5, col=c("black", "red", "blue"),
     legend=c("prior", "likelihood", "posterior"), lty=c(1,2,1))
  
posterior
  
# Extra material ..........................................................
# Three coins simulation example ..........................................

set.seed(1111234)

coin.pri <- rep(1/3, 3)       # prior for coins 1, 2, 3.
theta   <- c(0.25, 0.5, 0.75) # pr. heads under 1, 2, 3.
sim <- 100000                 # number of simulations
y <- rep(0, 3*sim)
dim(y) <- c(sim, 3)
for( i in 1:sim)
{
  ind  <- sample(c(1,2,3), size = 1, prob = coin.pri,
                 replace = TRUE)
  y.new <- rbinom(1, 1, prob = theta[ind])
  y[i, ind] <- y.new
}
# average each class and normalize
apply(y, 2, mean) / sum( apply(y, 2, mean))


# Exercise 2:.............................................................. 
# Beta flexibility
par(mfrow=c(2,2))
curve(dbeta(x, 1, 1), from =0, to=1, lty = 1, lwd=2,
      main="Beta(1, 1)", xlab=expression(theta))

curve(dbeta(x, 1/2, 1/2), from = 0, to = 1, lty = 2,
      lwd=2, col="red", main="Beta(1/2, 1/2)", 
      xlab=expression(theta))

curve(dbeta(x, 2, 5), from = 0, to = 1, lty = 2, lwd=2,
      col="blue", main="Beta(2, 5)", 
      xlab=expression(theta))

curve(dbeta(x, 5, 2), from = 0, to = 1, lty = 2, 
      lwd=2, col="green", main="Beta(5, 2)",
      xlab=expression(theta))
par(mfrow=c(1,1))

# Lecture 2: R script ................................................

#Consider an early investigation of a new drug
#Experience with similar compounds has suggested that response rates between 0.2 and 0.6 could be feasible
#Interpret this as a distribution with mean=0.4 and standard deviation 0.1
#A Beta(9.2, 13.8) distribution has these properties
#Suppose we treat $n=20$ volunteers with the compound and observe $r=15$ positive responses.
#Then we update the prior and the posterior is Beta(15 + 9.2, 5 + 13.8)

##################################################################################

par(mfrow=c(3,1)) 
# graphical output: 3 by 1 panels .........................................
# draw a curve for a beta density
curve(dbeta(x,9.2, 13.8),from=0, to =1,
         xlab= "prob of success",main="Prior")

# draw a curve for a binomial density
curve(dbinom(15, 20,x),from=0, to =1,
         col="blue", xlab="prob of sucess", 
         main="Likelihood")
# draw the posterior
curve(dbeta(x, 24.2, 18.8),from=0, to =1,
         col="red", xlab="prob of sucess", 
         main="Posterior")
par(mfrow=c(1,1))
# ......................................................
theta.star <- rbeta(20000, 24.2, 18.8)
odds.star <- theta.star/(1-theta.star)
quantile(odds.star, prob =  c(0.05, 0.5, 0.75, 0.95))
hist(odds.star, breaks=100, xlab="odds ratio", 
       freq=FALSE, xlim=c(0, 4))
lines(density(odds.star), lwd =2, lty = 1, col ="red")

# Practical 2: conjugate  analysis ....................................

# 1) Posteriors mean and median
# The posterior  mean is:  (r+a) / (n + a+b)
(15 + 9.2) /( 20 + 9.2 + 13.8)

# the posterior median is
qbeta(0.5, 24.2, 18.8 )

# 2) Posterior percentiles:
qbeta(0.025, 24.2, 18.8 )  # 2.5%
qbeta(0.975, 24.2, 18.8 )  # 97.5%

# Lecture 3: R script .........................................................
# Priors
## predictive prior distribution for sample size and power
set.seed(123)
theta <- rnorm(10000, 0.5, 0.1)
sigma <- rnorm(10000, 1, 0.3)
sigma <- ifelse(sigma <0, -1*sigma, sigma)
n <- 2*sigma^2 /(theta^2)*(0.84 + 1.96)^2
pow <- pnorm( sqrt( 63/2) * theta /sigma - 1.96)

par(mfrow=c(1,2))
hist(n,  xlim = c(0, 400), breaks=50)
hist(pow)
par(mfrow=c(1,1))

round(quantile(n),2)
round(quantile(pow),2)
sum(pow<0.7)/10000

# mixture of betas
mixbeta <- function(x,r,a1,b1,a2,b2,q,n)
{
  qstar <- q*betabin(r,a1,b1,n)/
    (q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n))
  
  p1 <- dbeta(x,a1+r,n-r+b1)
  p2 <- dbeta(x,a2+r,n-r+b2)
  posterior <- qstar*p1 + (1-qstar)*p2
}

par(mfrow=c(2,1))
# informative beta prior
curve(dbeta(x,9.2,13.8),from=0, to=1,col="red",
      xlab="probability of response",main="priors")
# mixture beta prior with 80% informative and 20% flat
curve(0.8*dbeta(x, 9.2, 13.8)+0.2*dbeta(x, 1, 1),from=0,
      to=1,col="blue",add=T)
# beta posterior
curve(dbeta(x,24.2,18.8),from=0,to=1,col="red",
      xlab="probability of response", main="posteriors")
# posterior from a mixture prior
curve(mixbeta(x, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20),
      from=0,to=1, col="blue",add=T)
par(mfrow=c(1,1))


# Lecture 4: WinBUGS ..........................................................
# load R2WinBUGS package
library(R2WinBUGS)

# Write the model in BUGS code ...............................................

cat("
    model
    {
    # WinBUGS code: binary problem non-conjugate analysis
    y ~ dbin(theta, n)
    logit(theta) <- phi
    phi ~ dnorm(0, 0.001)
    y.pred ~ dbin(theta, n) # making prediction 
    }
    "
    , file = "model1.bug")


# define you data nodes
n <- 20
y <- 15
data1 <- list ("n", "y")

# define parameters of interest
par1 <- c("theta", "y.pred")

# initial values
my.inits <- function(){
  list(phi = rnorm(1, 0, 10), y.pred = rbinom(1, 20, 15/20))
}

# setup OpenBUGS directory
bugsdir <- "C:/OpenBUGS/OpenBUGS323"

# run the bugs() function:
m1 <- bugs(data1, 
           inits = NULL, 
           parameters.to.save = par1,
           program = "OpenBUGS",
           model.file  = "model1.bug", 
           n.chains = 2, 
           n.iter = 2000,  
           n.thin = 1, 
           useWINE = TRUE,
           bugs.directory= bugsdir,
           debug = TRUE,
           clearWD = TRUE)

# setup WinBUGS directory
bugsdir <-  "C:/WinBUGS14" 

m1 <- bugs(data1, inits=NULL, par1,
           "model1.bug", n.chains = 5,
           n.iter = 2000,  n.thin=1,
           bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)


print(m1, digits.summary = 3)

class(m1)
names(m1)
m1$pD
m1$n.chains
m1$sims.array[1:10, ,"theta"]
m1$sims.array[1:10, ,"y.pred"]
theta <- m1$sims.array[1:1000, ,"theta"]
hist(theta, breaks = 50, prob = TRUE)
lines(density(theta), col = "blue", lwd =2)

# Using the attach.bugs() function ...
# posterior predictive ...

post.pred <- table(y.pred)
plot(post.pred)


# Lecture 5: Multiple paramenters .............................................

# Example: Normal distribution with unknown mean and variance 
# Marathon times with R non-informative analysis ..............................

library(LearnBayes)
data(marathontimes)
marathontimes$times

attach(marathontimes)

mycontour(normchi2post, c(220, 330, 500, 9000),
          time,xlab="mean",ylab="variance")

SS <- sum((time - mean(time))^2)
n <- length(time)
sigma2 <- SS/rchisq(1000, n - 1)
mu <- rnorm(1000, mean = mean(time), sd = sqrt(sigma2)/sqrt(n))

points(mu, sigma2, col="blue")

quantile(mu, c(0.025, 0.975))
quantile(sqrt(sigma2), c(0.025, 0.975))

#posterior of the coefficient of variation
cv.star <- sqrt(sigma2)/mu
hist(cv.star*100, breaks = 50, xlab ="coefficient of variation in %",
     freq = FALSE)
lines(density(cv.star*100), col="blue", lwd=2, lty=2)
abline(v=quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)),
       lty=3, lwd =2, col="red")
quantile(cv.star*100, prob = c(0.025, 0.5, 0.975))

# Model checking ........................................................................
y.star <- rnorm(1000, mean = mu, sd =sqrt(sigma2))

par(mfrow=c(3,4))
hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green")
for(i in 1:11){
  y.sim <- sample(y.star, 20)
  hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue")
}

# Analysis of the minimum, maximum, variability and asymmetry
min.y <- max.y <- asy1 <- asy2 <- inter.q <-  rep(0,1000)
time <- sort(time)
for (b in 1:1000){
  y.sim <- sample(y.star, 20)
  mu.star <- sample(mu, 20)
  min.y[b] <- min(y.sim)
  max.y[b] <- max(y.sim)
  y.sim <- sort(y.sim)
  asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star))   # predicted asymmetry measure
  asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star))
  inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25)
}


par(mfrow =c(2,2))
hist(min.y, breaks = 50, col="blue", main = "Minimum y*")
abline(v=min(time), lwd=3, lty=2)
hist(max.y, breaks = 50, col="red", main = "Maximum y*")
abline(v=max(time), lwd=3, lty=2)
hist(inter.q, breaks = 50, col="magenta", main = "Variability y*")
abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2)
plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data",
     ylab ="Asymmetry original data")
abline(a=0, b=1, lwd=2)
par(mfrow=c(1,1))

# Marathon times with R2WinBUGS .............................................
# The IAAF world record for men is 2:02:57
# setup WinBUGS directory
bugsdir <-  "C:/WinBUGS14" 

library(R2WinBUGS)
library(LearnBayes)
y <- marathontimes$time
n <- length(y)
dat.marathon <- list("y", "n")      # Data
par.marathon <- list("mu", "sigma", "y.pred") # Nodes

# model in BUGS
cat("
    model
    {
    # Non informative Priors
    mu ~ dnorm(0, 0.0001)
    tau ~ dgamma(0.0001, 0.0001)
    sigma <- pow(tau,-0.5)
    
    # Data model
    for( i in 1: n)
    {
    #y[i] ~ dnorm(mu, tau)           # Normal Likelihood
    y[i] ~ dnorm(mu, tau) I(115, )   # Truncated Normal Likelihood
    #y[i] ~ dt(mu, tau, nu)          # t Likelihood with nu df.
    }
    y.pred ~ dnorm(mu, tau)I(115, )
    }",
    file = "marathon.bug")

inits.marathon <- list(
                    list(mu = 250, tau = 10, y.pred = 200),
                    list(mu = 150, tau = 20, y.pred = 300)
                      )

m.marathon <- bugs(dat.marathon,
                   inits = inits.marathon,
                   par.marathon,
                   model ="marathon.bug",
                   n.chains = 2,
                   n.iter = 10000,  
                   n.thin = 2, 
                   bugs.directory = bugsdir,
                  debug = TRUE, DIC = FALSE, clearWD = TRUE)
print(m.marathon)  
attach.bugs(m.marathon)

index <- sample(1:5000, size = 1000)
# Model checking ........................................................................
#y.star <- rnorm(1000, mean = mu[index], sd = sigma[index])

y.star <- y.pred[index]
par(mfrow=c(3,4))
hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green")
for(i in 1:11){
  y.sim <- sample(y.star, 20)
  hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue")
}

# Analysis of the minimum, maximum, variability and asymmetry
min.y <- max.y <- asy1 <- asy2 <- inter.q <-  rep(0,1000)
time <- sort(time)
for (b in 1:1000){
  y.sim <- sample(y.star, 20)
  mu.star <- sample(mu, 20)
  min.y[b] <- min(y.sim)
  max.y[b] <- max(y.sim)
  y.sim <- sort(y.sim)
  asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star))   # predicted asymmetry measure
  asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star))
  inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25)
}


par(mfrow =c(2,2))
hist(min.y, breaks = 50, col="blue", main = "Minimum y*")
abline(v=min(time), lwd=3, lty=2)
hist(max.y, breaks = 50, col="red", main = "Maximum y*")
abline(v=max(time), lwd=3, lty=2)
hist(inter.q, breaks = 50, col="magenta", main = "Variability y*")
abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2)
plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data",
     ylab ="Asymmetry original data", cex = 0.1)
abline(a=0, b=1, lwd=2)
par(mfrow=c(1,1))


# Dirichlet priors for Multinomial models ...................................................................
# Example of 2x2 table and odds ratio: Great Trial ...

library(LearnBayes)
draws <- rdirichlet(10000, c(13,23,150,125) ) # here a[i,j]=1
odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3])
hist(odds, breaks = 100, xlab="Odds Ratio", freq = FALSE)
lines(density(odds), lty =2, lwd=2, col ="blue")
abline(v=quantile(odds, prob=c(0.025, 0.5, 0.975)), lty=3, col="red")

tab.p <- rdirichlet(1000, c(1,1,1,1))
odds.p <- tab.p[,1]*tab.p[,4]/(tab.p[,2]*tab.p[,3])

lines(density(odds.p), lty =1, lwd =2)
legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2),
       col=c("black", "blue"))


quantile(odds, prob=c(0.025, 0.5, 0.975))

sum(odds > 1)/10000

# Fisher exact test........................................................................................
set.seed(123)
param <- c(2,1,1,2) # parameter of the dirichlet
draws <- rdirichlet(10000, c(13,23,150,125)+param ) # here a[i,j]=1
odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3])

tab.p2 <- rdirichlet(1000, param)
odds.p2 <- tab.p2[,1]*tab.p2[,4]/(tab.p2[,2]*tab.p2[,3])

lines(density(odds.p2), lty =1, lwd =2)
legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2),
       col=c("black", "blue"))


quantile(odds, prob=c(0.025, 0.5, 0.975))

sum(odds>1)/10000

fisher.test(matrix(c(13, 23, 150, 125), nrow=2, byrow=TRUE), alternative="less")$p.value


a <- c(1,1)
x <- c(0.5, 0.5)
logdirich <- function(a, data){
  d <- ddirichlet(data, a)
  log(d)
}

a <- c(2, 2)

mydirich <- function(theta, data){
  a1 <- data[1]
  a2 <- data[2]
  f <- theta[1]^(1-a1) + theta[2]^(1-a2)
  log(f)
}

theta <- expand.grid(x=seq(0.01,.99,0.1), y=seq(0.01,.99,0.1))
m <- length(seq(0.01, .99, 0.1))
n <- dim(theta)[1]
z <- rep(0, n)
for(i in 1:n)
  z[i] <- mydirich(theta[i,], c(2,2))

z <- matrix(z, byrow=TRUE, nrow=m)
contour(seq(0.01,.99,0.1), seq(0.01,.99,0.1), z, main="Dirichlet with a=(0,-0.3,-1.4,0)",
        col="blue", lwd=2, xlab=expression(theta[11]), ylab=expression(theta[22]))

## bivariate dirichlet
f <- function(x, y )
{
  #if (is.vector(x))
  # x <- matrix(x, nrow = 1)
  r <-  x^{1}* y^{1}
  as.vector(r)
}

x <- seq(0.1, 0.2, length= 50)
y <- seq(0.1, 0.2, length= 50)
z <- outer(x, y, f)
persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75,xlab="theta11", ylab="theta22",
      zlab="f(theta11, theta22)")
title("Dirichlet with a11=2, a12=1, a21=1, a22=2")

image(x,y,z,col= terrain.colors(100), xlab=expression(theta[11]), ylab=expression(theta[22]))
contour(x,y,z,add=T, col=rainbow(10), lwd=2, )

title("Dirichlet with a11=2, a12=1, a21=1, a22=2")

 
 # Multivariate models with WinBUGS ################################
 
 # Example: Multivariate Normal toy example 
 # Tanner's date
 Y <- matrix( c(1,1,
                1,-1,
                -1,1,
                -1, -1,
                2, NA,
                2, NA,
                -2, NA,
                -2, NA,
                NA, 2,
                NA, 2,
                NA, -2,
                NA, -2), ncol=2, byrow=T)
 
 # save BUGS model into "cammel.bug" file
 cat("model
     {
     for (i in 1 : 12){
     Y[i, 1 : 2] ~ dmnorm(mu[], tau[ , ])
     }
     mu[1] <- 0; mu[2] <- 0
     tau[1 : 2,1 : 2] ~ dwish(R[ , ], 2)
     R[1, 1] <- 0.001;  R[1, 2] <- 0;
     R[2, 1] <- 0; R[2, 2] <- 0.001
     Sigma2[1 : 2,1 : 2] <- inverse(tau[ , ])
     rho <- Sigma2[1, 2] / sqrt(Sigma2[1, 1] * Sigma2[2, 2])
     }",
file = "cammel.bug")
 
 dat <- list("Y")
 par1 <- c("rho")
 
 m.cammel <- bugs(dat, inits=NULL, par1, model="cammel.bug", n.chains = 2,
                  n.iter = 10000,  n.thin=1, bugs.directory = bugsdir,
                  working.directory = getwd(),
                  clearWD=TRUE, debug=TRUE, DIC=FALSE)
 
 rho <- m.cammel$sims.array[, ,"rho"]
 hist(rho, breaks = 80, xlab = expression(rho))
 
 ###############################################################################
 # Example: Complex Multinomial with non-conjugate analysis 
 
 # save BUGS model into "popgen.bug" file
 cat("model
     model{
     XAA[1] <- (1-sigma)*p + sigma
     XAA[2]<- (1- sigma)*q
     XAA[3]<- 0
     XAB[1]<-(1-sigma)*p/2 + sigma/4
     XAB[2]<- 0.5
     XAB[3]<- (1-sigma)*q/2 + sigma/4
     XBB[1]<- 0
     XBB[2]<- (1-sigma)*p
     XBB[3]<- (1- sigma)*q + sigma
     KAA <- sum(NAA[]); KAB <- sum(NAB[]); KBB <- sum(NBB[])
     
     NAA[1:3]   ~ dmulti(XAA[], KAA)
     NAB[1:3]   ~ dmulti(XAB[], KAB)
     NBB[1:3]   ~ dmulti(XBB[], KBB)
     p ~ dunif(0, 1)           # uniform prior
     sigma ~ dunif(0, 1)
     q <- 1 -p
     }",
file = "popgen.bug")

dat <- list(NAA = c(427, 95, 0), NAB=c(108, 161, 71), NBB=c(0, 64, 74))

par1 <- c("p", "sigma")

m.popgen <- bugs(dat, inits=NULL, par1, model="popgen.bug", n.chains = 2,
                 n.iter = 10000,  n.thin=1, bugs.directory = bugsdir,
                 working.directory = getwd(),
                 clearWD=TRUE, debug=TRUE)

print(m.popgen, digits=3)

sigma <- m.popgen$sims.array[1:1000,1 ,"sigma"]
p <- m.popgen$sims.array[1:1000,1 , "p"]

library(car)
scatterplotMatrix(~p+sigma, diagonal = "histogram", smoother=F, reg.line=F)

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Practical:...................................................................

# Exercise 1:

# Normal model for the observations and make predictive model checking ...
library(R2WinBUGS)
library(LearnBayes)
# Change the data for unusual runners ...
marathontimes$time[1:5] <- rnorm(5, mean = 130, sd = 3)

y <- marathontimes$time

n <- length(y)
dat.marathon <- list("y", "n")      # Data
par.marathon <- list("mu", "sigma", "y.pred") # Nodes

# model in BUGS
cat("
    model
    {
    # Non informative Priors
    mu ~ dnorm(0, 0.0001)
    tau ~ dgamma(0.0001, 0.0001)
    sigma <- pow(tau,-0.5)
    
    # Data model
    for( i in 1: n)
    {
    y[i] ~ dnorm(mu, tau)          # Normal Likelihood
    #y[i] ~ dnorm(mu, tau) I(115, )   # Truncated Normal Likelihood
    #y[i] ~ dt(mu, tau, nu)          # t Likelihood with nu df.
    }
    y.pred ~ dnorm(mu, tau)I(115, )
    }",
    file = "marathon.bug")

inits.marathon <- list(
  list(mu = 250, tau = 10, y.pred = 200),
  list(mu = 150, tau = 20, y.pred = 300)
)

m.marathon <- bugs(dat.marathon,
                   inits = inits.marathon,
                   par.marathon,
                   model ="marathon.bug",
                   n.chains = 2,
                   n.iter = 10000,  
                   n.thin = 2, 
                   bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14",
                   working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS",
                   debug = TRUE, DIC = FALSE, clearWD = TRUE)
print(m.marathon)  
attach.bugs(m.marathon)

index <- sample(1:5000, size = 1000)
# Model checking ........................................................................
#y.star <- rnorm(1000, mean = mu[index], sd = sigma[index])

y.star <- y.pred[index]
par(mfrow=c(3,4))
hist(y, breaks=10, xlim =c(150, 400), main="Original Data", col="green")
for(i in 1:11){
  y.sim <- sample(y.star, 20)
  hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue")
}

# Analysis of the minimum, maximum, variability and asymmetry
time <- y
min.y <- max.y <- asy1 <- asy2 <- inter.q <-  rep(0,1000)
time <- sort(time)
for (b in 1:1000){
  y.sim <- sample(y.star, 20)
  mu.star <- sample(mu, 20)
  min.y[b] <- min(y.sim)
  max.y[b] <- max(y.sim)
  y.sim <- sort(y.sim)
  asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star))   # predicted asymmetry measure
  asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star))
  inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25)
}


par(mfrow =c(2,2))
hist(min.y, breaks = 50, col="blue", main = "Minimum y*")
abline(v=min(time), lwd=3, lty=2)
hist(max.y, breaks = 50, col="red", main = "Maximum y*")
abline(v=max(time), lwd=3, lty=2)
hist(inter.q, breaks = 50, col="magenta", main = "Variability y*")
abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2)
plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data",
     ylab ="Asymmetry original data", cex = 0.1)
abline(a=0, b=1, lwd=2)
par(mfrow=c(1,1))


# Use the t-distribution with nu <- 4 df for outliers and make predictive model cheking ...

# model in BUGS
cat("
    model
    {
    # Non informative Priors
    mu ~ dnorm(0, 0.0001)
    tau ~ dgamma(0.0001, 0.0001)
    sigma <- pow(tau,-0.5)
    nu <- 4
    # Data model
    for( i in 1: n)
    {
    #y[i] ~ dnorm(mu, tau)          # Normal Likelihood
    #y[i] ~ dnorm(mu, tau) I(115, )   # Truncated Normal Likelihood
    y[i] ~ dt(mu, tau, nu)          # t Likelihood with nu df.
    }
    y.pred ~ dnorm(mu, tau)I(115, )
    }",
    file = "marathon.bug")

inits.marathon <- list(
  list(mu = 250, tau = 10, y.pred = 200),
  list(mu = 150, tau = 20, y.pred = 300)
)

m.marathon <- bugs(dat.marathon,
                   inits = inits.marathon,
                   par.marathon,
                   model ="marathon.bug",
                   n.chains = 2,
                   n.iter = 10000,  
                   n.thin = 2, 
                   bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14",
                   working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS",
                   debug = TRUE, DIC = FALSE, clearWD = TRUE)
print(m.marathon)  
attach.bugs(m.marathon)

index <- sample(1:5000, size = 1000)
# Model checking ........................................................................
#y.star <- rnorm(1000, mean = mu[index], sd = sigma[index])

y.star <- y.pred[index]
par(mfrow=c(3,4))
hist(y, breaks=10, xlim =c(150, 400), main="Original Data", col="green")
for(i in 1:11){
  y.sim <- sample(y.star, 20)
  hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue")
}

# Analysis of the minimum, maximum, variability and asymmetry
time <- y
min.y <- max.y <- asy1 <- asy2 <- inter.q <-  rep(0,1000)
time <- sort(time)
for (b in 1:1000){
  y.sim <- sample(y.star, 20)
  mu.star <- sample(mu, 20)
  min.y[b] <- min(y.sim)
  max.y[b] <- max(y.sim)
  y.sim <- sort(y.sim)
  asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star))   # predicted asymmetry measure
  asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star))
  inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25)
}


par(mfrow =c(2,2))
hist(min.y, breaks = 50, col="blue", main = "Minimum y*")
abline(v=min(time), lwd=3, lty=2)
hist(max.y, breaks = 50, col="red", main = "Maximum y*")
abline(v=max(time), lwd=3, lty=2)
hist(inter.q, breaks = 50, col="magenta", main = "Variability y*")
abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2)
plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data",
     ylab ="Asymmetry original data", cex = 0.1)
abline(a=0, b=1, lwd=2)
par(mfrow=c(1,1))

# Exercise 2:

diff <- draws[,1] - draws[,2]


# Lecture 6 ...................................................................
# Regression models
# BUGS code in the Lecture
  
# Lecture 7 ...................................................................
# Hierarchical models 
# BUGS code in the Lecture 
 
# Lecture 8 ...................................................................
# Longitudinal data analysis  

library(lattice)
# HIV data
  hiv <- read.csv2("hiv.csv")
  hiv <- na.omit(hiv)

# Set up new patient id numbers from 1 to J
  id <- hiv$person               # Child ID
  unique.pid <- unique (id)
  J <- length (unique.pid)       #J number of children
  person <- rep (NA, n)
  for (j in 1:J){
    person[id==unique.pid[j]] <- j
  }

  # Data for WinBUGS
     y <- hiv$y
  time <- hiv$time
    tr <- as.numeric(hiv$tr) 
     n <- length(y)
  
  tr.group <- tr[person=unique(id)] #Treatment per child

# Plot the data
mykey <- list(space ="top", title = "HIV positive children",
              text = list(c("Control dieat", "Zinc supplement")),
              lines =  list(col = 1:2, lwd = 3, lty = 1:2))

plot1 <- xyplot(y ~ time|tr, group=person,
            subscripts = T,
      panel = function(x, y, subscripts, groups, ...){
        panel.superpose(x, y, subscripts, groups,
    type = c("l"), lwd=1)
    panel.grid()
    },
    data = hiv,
    main = "HIV positive children (1 to 5 years old)",
    xlab ="time(years)", ylab="sqrt(cd4%)")


plot2 <- xyplot(y ~ time, group=tr, type = c("g", "smooth"),
    key = mykey, col = 1:2, lwd = 3, lty = 1:2, ylim = c(4.2, 5.5),
    data = hiv)

plot(plot1, split = c(1,1,1,2))
plot(plot2, split = c(1,2,1,2), newpage = F)

# Pool model .................................................................
# HIV example with dat

# Set up the BUGS model 
dat.hiv <- list("y", "time", "tr", "n")
par.hiv <- list("alpha", "beta","sigma.y")

# Pooled model
cat(
  "model
  {
  # Priors ...........................................
   alpha ~ dnorm(0, 0.01)
   beta[1] ~ dnorm(0, 0.01)    # Slope control group
   beta[2] ~ dnorm(0, 0.01)    # Slope zinc supplent group
   prec.y <- pow(sigma.y, -2)
   sigma.y ~ dunif(0, 10)
  # Data model .......................................
  for(i in 1:n)
  {
    y[i] ~ dnorm(mu[i], prec.y)
    mu[i] <- alpha + beta[tr[i]] * time[i] 
  }
  }
  ", file = "hiv_pool.bug"
)

m.hiv.pool <- bugs(dat.hiv,
                   inits = NULL,
                   parameters.to.save = par.hiv,
                   model ="hiv_pool.bug",
                   n.chains = 2,
                   n.iter = 20000,  
                   n.thin = 2, 
                   bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14",
                   working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS",
                   debug = TRUE, DIC = TRUE, clearWD = TRUE)

m.hiv.pool  

attach.bugs(m.hiv.pool)
delta.pool <- beta[,1] - beta[,2]
hist(delta.pool, breaks = 50, freq = F, col = "lightblue")
# p.Value
sum(delta.pool>0)/length(delta.pool)


# HIV Exchangeable (random intercept) .....................................................
# Set up the BUGS model 

dat.hiv.re <- list("y", "time", "tr","tr.group", "n", "person", "J")
par.hiv.re <- list("mu.alpha", "sigma.alpha", "mu.beta","sigma.beta", "gamma", "sigma.y",
                   "alpha", "beta")

# Pooled model
cat(
  "model
  {
  # Priors ...........................................
 
  for(j in 1:J){
  # alpha[j] ~ dnorm(mu.alpha, prec.alpha)
  #  beta[j] ~ dnorm(mu.beta[tr.group[j]], prec.beta)
   alpha[j] ~ dt(mu.alpha, prec.alpha, 4)
    beta[j] ~ dt(mu.beta[tr.group[j]], prec.beta, 4)  
}
 
  mu.alpha ~ dnorm(0, 0.01)
  prec.alpha <- pow(sigma.alpha, -2)
  sigma.alpha ~ dunif(0, 10)

  mu.beta[1] ~ dnorm(0, 0.01)    # Slope control group
  mu.beta[2] ~ dnorm(0, 0.01)    # Slope zinc supplent group
  prec.beta <- pow(sigma.beta, -2)
  sigma.beta ~ dunif(0, 10)

  prec.y <- pow(sigma.y, -2)
  sigma.y ~ dunif(0, 10)
  
  gamma[1] <- 0        # Control group
  gamma[2] ~ dnorm(0, 0.01)

  # Data model .......................................
  for(i in 1:n)
  {
  # Observations at child level
  #y[i] ~ dnorm(mu[i], prec.y)
   y[i] ~ dt(mu[i], prec.y, 4)
 # mu[i] <- alpha[person[i]] + beta[beta[tr[i]] * time[i] #Random intercept
 #  mu[i] <- alpha[person[i]] + beta[person[i]] * time[i]  #Random intercept and slope
   mu[i] <- alpha[person[i]] + beta[person[i]] * time[i]  + gamma[tr[i]]*pow(time[i], 2)  
}
  }
  ", file = "hiv_re.bug"
)


m.hiv.re <- bugs(dat.hiv.re,
                   inits = NULL,
                   parameters.to.save = par.hiv.re,
                   model ="hiv_re.bug",
                   n.chains = 2,
                   n.iter = 5000,  
                   n.thin = 2, 
                   bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14",
                   working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS",
                   debug = TRUE, DIC = TRUE, clearWD = TRUE)
print(m.hiv.re)  

attach.bugs(m.hiv.re)
delta.re.nor <- mu.beta[,1] - mu.beta[,2]
hist(delta.pool, breaks = 50, freq = F, col = "lightblue", xlim = c(-1.2, 2))

lines(density(delta.re.nor), col = "red", lwd = 3)

#I run the model again with t-distribution
attach.bugs(m.hiv.re)
delta.re.t <- mu.beta[,1] - mu.beta[,2]

lines(density(delta.re.t), col = "blue", lwd = 3)
legend(0.4, 2, legend = c("Identical par", "Exchang. Normal", "Exchang. t"),
       col = c("lightblue", "red", "blue"), lwd = c(2,2,2), lty = c(1,1,1))
abline(v=0, lty = 2)

#posterior for quadratic term 


hist(gamma, breaks = 50, freq = F, col = "green", xlim = c(-.2, .5),
     main = "Posterior for The Quadractic Term: \n Zinc Commplement Group")
abline(v= 0, lty =2)

# p.Value
sum(delta.re.nor>0)/length(delta.re.nor)

#Plot random slopes and intercepts


coefplot(m.hiv.re, var.idx =8:168, col = "red", xlim = c(-2.5, 8), 
         main = "Posteriors: Intercepts",
         cex.var = 0.4)


coefplot(m.hiv.re, var.idx =169:(169+168), col = "blue", xlim = c(-3, 3), 
         main = "Posteriors: Slopes",
         cex.var = 0.4)

# Variability explained
tot.var <- sigma.y^2 + sigma.alpha^2 + sigma.beta^2
mean(sigma.y^2/tot.var) * 100
mean(sigma.alpha^2/tot.var) * 100
mean(sigma.beta^2/tot.var)* 100

# Model validation: predictive checks ...
attach.bugs(m.hiv.re)

# Number of observetions per person
with(hiv, table(table(person)))

## Simulating the predictive data
y.predictive <- function(J = J, K = 7, 
                         # hyperparameters
                         mu.a <- mean(mu.alpha)                  
                         mu.b.1 <- mean(mu.beta[1])                     
                         mu.b.2 <- mean(mu.beta[2])
                         sigma.y <- mean(sigma.y)
                         sigma.a <- mean(sigma.alpha)
                         sigma.b <- mean(sigma.beta)
                         ){
  time <- rep (seq(0,1,length=K), J)  # K measurements during the year
  person <- rep (1:J, each=K)         # person ID's
  treatment <- sample (rep(0:1, J/2))
  treatment1 <- treatment[person] 
  #                                     # personal-level parameters
  a.true <- rnorm (J, mu.a, sigma.a)
  b.true <- rnorm (J, g.0.true + g.1.true*treatment, sigma.b.true) #....
  #                                     # data
  y <- rnorm (J*K, a.true[person] + b.true[person]*time, sigma.y.true)
  return (data.frame (y, time, person, treatment1))
}






# Further Lectures ............................................................

# Lecture : Introduction to MCMC ############################################

# Example: Gibbs sampler
# Normal distribution, unknown mean and variance (variance = 1/ tau)


gibbsNormal <- function(y, mu1, tau1, N, mu0, phi0, a, b) {
  mu <- numeric(N+1) # N = number of iterations
  mu[1] <- mu1 # the initial value for mu
  tau <- numeric(N+1)
  tau[1] <- tau1 # the initial value for tau
  n <- length(y) ; ybar <- mean(y)
  for(i in 2:(N+1)) { # generate samples from full conditional
    mu[i] <- rnorm(1, (mu0*phi0 + n*ybar*tau[i-1])/(phi0 + n*tau[i-1]),
                   1/sqrt(phi0 + n*tau[i-1]))
    tau[i] <- rgamma(1, (n+2*a)/2, (sum((y-mu[i])^2) + 2*b)/2)
  }
  output <- cbind(mu, tau) }

y <- c(160.2, 155.1, 135.0, 138.8, 147.0, 115.0, 113.4,  97.5, 110.3, 115.8)

res<-gibbsNormal(y,150,1,500,150,10,0.1,0.1)

par(mfrow=c(2,2))
plot(res[50:500,1], col = "red", type = "l", ylab = "mu", xlab = "Iterations")
hist(res[50:500,1], main ="Posterior for mu", col = "red", xlab = "mu")
plot(1/res[50:500,2], col = "blue", type = "l", ylab = "tau", xlab = "Iterations")
hist(1/res[50:500,2], col = "blue", type = "l",  xlab = "tau", main = "Posterior for tau")
par(mfrow = c(1,1))

##############################################################################
# Example: Mixture of Normals
# Metropolis-Hasting sampler for mixture of normals using
# the package MCMCpack

library(MCMCpack)

dmix<-function(x,m1,s1,m2,s2,q1)
  q1*dnorm(x,mean=m1,sd=s1)+
  (1-q1)*dnorm(x,mean=m2,sd=s2)

post.mix <-MCMCmetrop1R(dmix, m1=0, m2=5, s1=1, s2=1, q1=0.3,
                        theta.init=1,thin=1,mcmc=10000,burnin=1000,logfun=FALSE)

plot(post.mix)

###############################################################################
# Example: Metropolis toy example
# Gelman and Meng (1991) kernel function

# plot the function 

## Gelman and Meng (1991) kernel function
f <- function(x, y , A = 1, B = 0, C1 = 3, C2 = 3, log = FALSE)
{
  if (is.vector(x))
    x <- matrix(x, nrow = 1)
  r <- -.5 * (A * x^2 * y^2 + x^2 + y^2
              - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y)
  if (!log)
    r <- exp(r)
  as.vector(r)
}

x <- seq(-1, 6, length= 50)
y <- seq(-1, 6, length= 50)
z <- outer(x, y, f)
# Gelman-Meng 1
persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75)

image(x,y,log(z),col=gray(seq(0,1,len=256)))
contour(x,y, log(z),add=T,levels=seq(5,-35,-2),col=rainbow(10))

## Metropolis sampling with MCMCpack
library(MCMCpack)
## Gelman and Meng (1991) kernel function:
# Write the function with first argument as vector
f.g.m <- function(xy , A = 1, B = 0, C1 = 3, C2 = 3)
{ x <- xy[1]; y <- xy[2]
r <- -.5 * (A * x^2 * y^2 + x^2 + y^2
            - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y)
as.vector(r) }

res <- MCMCmetrop1R(f.g.m, theta.init=c(0, 1), mcmc=10000, burnin=5000, 
                    logfun=FALSE)

points(res[,1], res[,2])

par(mfrow=c(2,2))
title("Trace plot for 1000 Metropolis itereations")
plot(res[,1], type="l", col="blue", xlab="iteration", ylab="x")
plot(res[,2], type="l", col="red", xlab="iteration", ylab="y")
par(mfrow=c(1,1))

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)
 
# End ...........................................................................................
# Good luck!