# Bayesian Analysis with R and WinBUGS
# 24 of April 2013
# R script for Lectures
# Dr. Pablo E. Verde

# Set up working directory for R:

# setup WinBUGS directory
bugsdir <-  "C:/Archivos de Programa/WinBUGS14"

# Lecture 1 ###################################################################

# Example: sugical  ...........................................................

#Beta(3, 27)

x <- seq(0, 0.5, by = 0.01)
y <- dbeta(x, 3, 27)
plot(x*100, y, xlab = "Mortality rate (in %)",
ylab =  "Density(mortality rate)", type ="l", lwd =2, col="red")

# Example: Three coins example ................................................
options(digits = 4)
set.seed(12345)
coin.pri <- rep(1/3, 3)         # prior for coins 1, 2, 3.
theta   <- c(0.25, 0.5, 0.75)   # pr. heads under 1, 2, 3.

sim <- 100000                   # number of simulations
y <- rep(0, 3*sim)
dim(y) <- c(sim, 3)

for( i in 1:sim)
   {
     ind  <- sample(c(1,2,3), size = 1, prob = coin.pri, replace = TRUE)
     y.new <- rbinom(1, 1, prob = theta[ind])
     y[i, ind] <- y.new
   }

# average each class and normilize
th.post <- apply(y, 2, mean)/sum( apply(y, 2, mean))
th.post

# monte carlo error
V <- th.post * (1- th.post)/sim
sqrt(V)

# predictive posterior that the next toss is a head:
sum(theta * apply(y, 2, mean)/sum( apply(y, 2, mean)))

 
# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture 2 Simple statistical models #########################################

# Example: flexibility of the Beta
# Beta flexibility
par(mfrow=c(2,2))
curve(dbeta(x, 1, 1), from =0, to=1, lty = 1, lwd=2,
   main="Beta(1, 1)", xlab=expression(theta))
curve(dbeta(x, 1/2, 1/2), from = 0, to = 1, lty = 2,
   lwd=2, col="red", main="Beta(1/2, 1/2)", xlab=expression(theta))
curve(dbeta(x, 2, 5), from = 0, to = 1, lty = 2, lwd=2,
   col="blue", main="Beta(2, 5)", xlab=expression(theta))
curve(dbeta(x, 2, 2), from = 0, to = 1, lty = 2, lwd=2,
   col="green", main="Beta(2, 2)", xlab=expression(theta))
par(mfrow=c(1,1))

# Example: Drug investigation ...
par(mfrow=c(3,1)) # graphical output: 3 by 1 panels
# draw a curve for a beta density
curve(dbeta(x,9.2, 13.8),from=0, to =1,
xlab= "prob of success",main="Prior")
# draw a curve for a binomial density
curve(dbinom(15, 20,x),from=0, to =1,
col="blue", xlab="prob of sucess", main="Likelihood")
# draw the posterior
curve(dbeta(x, 24.2, 18.8),from=0, to =1,
col="red", xlab="prob of sucess", main="Posterior")
par(mfrow=c(1,1))

# Direct Monte Carlo Simulation ...
qbeta( c(0.05, 0.5, 0.75, 0.95),24.2, 18.8)

theta.star <- rbeta(20000, 24.2, 18.8)
quantile(theta.star, prob =  c(0.05, 0.5, 0.75, 0.95))

# Posterior of the Odds ...
odds.star <- theta.star/(1-theta.star)
quantile(odds.star, prob =  c(0.05, 0.5, 0.75, 0.95))

hist(odds.star, breaks = 100, xlab ="odds ratio", freq = FALSE, xlim =c(0, 4))
lines(density(odds.star), lwd =2, lty = 1, col ="red")

# Making predictions for binomial data ...
# Beta-binomial density
betabin <- function(r,a,b,m)
  {
    gamma(a+b)/(gamma(a)*gamma(b)) * choose(m,r) *
             gamma(a+r)*gamma(b+m-r)/gamma(a+b+m)
  }

sum(betabin(0:10, 0.4, 0.1, 10))

x <- 0:40
px <- betabin(0:40, a=24.2,b=18.8,m=40)

plot(x, px, type="s")

# Comparision between posterior for the rate and
# predictive posterior ..
par(mfrow=c(1,2))
# Beta posterior after having observed 15 successes in 20 trials
 curve(dbeta(x,24.2,18.8),from=0, to =1,col="red",
             xlab="prob of sucess", main="Posterior")
# Beta-binomial distribution of the number of successes x in the next
# 40 trials with mean 22.5 and standard deviation 4.3
x <- 0:40; px <- betabin(0:40, a=24.2,b=18.8,m=40)
plot(x, px, type="h", xlab="number of successes out of 40", main="Predictive Posterior")
par(mfrow=c(1,1))

# probability of at least 25 successes out of 40 further trials
> sum(betabin(25:40,24.2,18.8,m=40))
[1] 0.3290134
>

# Simulation of future data with a Beta-Binomial model ...
theta.star <- rbeta(10000, 24.2, 18.8)
y <- rbinom(10000, 40, theta.star)
freq.y <- table(y)
ys <- as.integer(names(freq.y))
predprob <- freq.y/sum(freq.y)
plot(ys, predprob, type = "h", xlab = "y",
   ylab = "Predictive Probability")

sum(predprob[ys>24])
   
# Example: SBP - Bayesian analysis for Normal data
# This is an example of Normal distribution with unknown mean
# and known variance. It is not in the lecture

par(mfrow=c(3,1))
#prior Normal(120, 10)
curve(dnorm(x,120,10),from=100,to=140,col="blue",
xlab="Long-term SBP of 60-year old women",main="Prior")
#likelihood Normal(130, 3.5)
curve(dnorm(x,130,3.5),from=100,to=140,col="black",
xlab="Long-term SBP of 60-year old women",main="Likelihood")
#posterior Normal(128.9, 3.3)
curve(dnorm(x,128.9,sqrt(11.1)),from=100,to=140,col="red",
xlab="Long-term SBP of 60-year old women",main="Posterior")
par(mfrow=c(1,1))

# Example: Annual number of cases of haemolytic uraemic syndrome Henderson and
# Matthews, (1993)

# Prior to posterior ...
# Prior: Gamma(0.679,0.295)
# Posterior: Gamma(16:679; 10:295)
curve(dgamma(x, 16.679,10.295), lty=1, col ="blue", xlim=c(0, 5))
curve(dgamma(x, 0.679, 0.295), lty=2, add=T)

legend(3, 0.4, legend = c("prior", "posterior"), lty = c(1, 2), col =c("black",
"blue"))

# Predictions ...
> sum(dnbinom(6:17, prob=10.295/(1+10.295), size=16.679))
[1] 0.009594483
>

plot(dnbinom(0:10, prob=10.295/(1+10.295), size=16.679), type="h", lwd=2, col="red",
xlab= "counts")

# Lecture 3: Priors ###########################################################

## Predictive prior distribution for sample size and power
set.seed(123)
theta <- rnorm(10000, 0.5, 0.1)
sigma <- rnorm(10000, 1, 0.3)
sigma <- ifelse(sigma <0, -1*sigma, sigma)
n <- 2*sigma^2 /(theta^2)*(0.84 + 1.96)^2
pow <- pnorm( sqrt( 63/2) * theta /sigma - 1.96)
par(mfrow=c(1,2))
hist(n, prob=TRUE, xlim =c(0, 400),ylim =c(0, 0.01), breaks=50)
lines(density(n), lwd= 2, col = "red")
hist(pow, prob =TRUE)
lines(density(pow), lwd=2, col ="blue")
par(mfrow=c(1,1))

round(quantile(n),2)
round(quantile(pow),2)
sum(pow<0.7)/10000

# n=100
 set.seed(123)
 theta <- rnorm(10000, 0.5, 0.1/0.5)
 theta <- abs(theta)
 sigma <- rnorm(10000, 1, 0.3/0.5)
 sigma <- ifelse(sigma <0, -1*sigma, sigma)
 pow <- pnorm( sqrt(100/2) * theta /sigma - 1.96)
 sum(pow<0.7)/10000

# Mixture of betas ...
mixbeta <- function(x,r,a1,b1,a2,b2,q,n)
  {
    qstar <- q*betabin(r,a1,b1,n)/(q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n))
    p1 <- dbeta(x,a1+r,n-r+b1)
    p2 <- dbeta(x,a2+r,n-r+b2)
    posterior <- qstar*p1 + (1-qstar)*p2
  }

# Mixture between 80% informative and 20% un-informative priors ...
par(mfrow=c(2,1))
curve(dbeta(x,9.2,13.8),from=0, to=1,col="red",xlab="probability of response", main="priors")
curve(0.8*dbeta(x,9.2,13.8)+0.2*dbeta(x,1,1),from=0, to=1,col="blue",add=T)

curve(dbeta(x,24.2,18.8),from=0,to=1,col="red",xlab="probability of response", main="posteriors")
curve(mixbeta(x, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20),from=0,to=1,col="blue",add=T)
par(mfrow=c(1,1))


# Same analysis using simulations: this is not presented in the lecture.
theta.star.mixbeta <- function(B,r,a1,b1,a2,b2,q,n)
  {
    qstar <- q*betabin(r,a1,b1,n)/(q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n))
    Ind <- rbinom(B, 1, qstar)
    p1 <- rbeta(B,a1+r,n-r+b1)
    p2 <- rbeta(B,a2+r,n-r+b2)
    posterior <- Ind*p1 + (1-Ind)*p2
  }

sim.theta <- theta.star.mixbeta(B=10000, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20)

# posterior of mixtures by simulation
hist(sim.theta, breaks = 100, xlab ="probability of response", xlim = c(0,1), freq = FALSE)
lines(density(sim.theta), lwd =2, lty = 1, col ="blue")

# posterior of the odds ratio from mixtures
sim.odds <- sim.theta/(1-sim.theta)
hist(sim.odds, breaks = 100, xlab ="odds ratio", freq = FALSE, xlim =c(0, 10))
lines(density(sim.odds), lwd =2, lty = 1, col ="red")

# predictive outcomes
# Simulation of future data
sim.theta <- theta.star.mixbeta(B=10000, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20)
y <- rbinom(10000, 40, sim.theta) # number of success out of n = 40
freq.y <- table(y)
ys <- as.integer(names(freq.y))
predprob <- freq.y/sum(freq.y)
plot(ys, predprob, type = "h", xlab = "y",
   ylab = "Predictive Probability")

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture 4: Multiparameter models with R #####################################

# Introduction Multi-parameter models

# Example: Normal distribution with unknown mean and variance ...
library(LearnBayes)
data(marathontimes)
marathontimes$times

attach(marathontimes)

mycontour(normchi2post, c(220, 330, 500, 9000),
          time,xlab="mean",ylab="variance")

SS <- sum((time - mean(time))^2)
n <- length(time)
sigma2 <- SS/rchisq(1000, n - 1)
mu <- rnorm(1000, mean = mean(time), sd = sqrt(sigma2)/sqrt(n))

points(mu, sigma2, col="blue")

quantile(mu, c(0.025, 0.975))
quantile(sqrt(sigma2), c(0.025, 0.975))

#posterior of the coefficient of variation
cv.star <- sqrt(sigma2)/mu
hist(cv.star*100, breaks = 50, xlab ="coefficient of variation in %",
   freq = FALSE)
lines(density(cv.star*100), col="blue", lwd=2, lty=2)
abline(v=quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)),
   lty=3, lwd =2, col="red")
quantile(cv.star*100, prob = c(0.025, 0.5, 0.975))

###########################
# model checking
y.star <- rnorm(1000, mean = mu, sd =sqrt(sigma2))

par(mfrow=c(3,4))
hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green")
for(i in 1:11){
y.sim <- sample(y.star, 20)
hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue")
}

# Analysis of the minimum, maximum, variability and asymmetry
min.y <- max.y <- asy1 <- asy2 <- inter.q <-  rep(0,1000)
time <- sort(time)
for (b in 1:1000){
 y.sim <- sample(y.star, 20)
 mu.star <- sample(mu, 20)
 min.y[b] <- min(y.sim)
 max.y[b] <- max(y.sim)
 y.sim <- sort(y.sim)
 asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star))   # predicted asymmetry measure
 asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star))
 inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25)
}


par(mfrow =c(2,2))
   hist(min.y, breaks = 50, col="blue", main = "Minimum y*")
   abline(v=min(time), lwd=3, lty=2)
   hist(max.y, breaks = 50, col="red", main = "Maximum y*")
   abline(v=max(time), lwd=3, lty=2)
   hist(inter.q, breaks = 50, col="magenta", main = "Variability y*")
   abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2)
   plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data",
       ylab ="Asymmetry original data")
   abline(a=0, b=1, lwd=2)
par(mfrow=c(1,1))

# Example of 2x2 table and odds ratio: Great Trial ...

 library(LearnBayes)
 draws <- rdirichlet(10000, c(13,23,150,125) ) # here a[i,j]=1
 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3])
 hist(odds, breaks = 100, xlab="Odds Ratio", freq = FALSE)
 lines(density(odds), lty =2, lwd=2, col ="blue")
 abline(v=quantile(odds, prob=c(0.025, 0.5, 0.975)), lty=3, col="red")

 tab.p <- rdirichlet(1000, c(1,1,1,1))
 odds.p <- tab.p[,1]*tab.p[,4]/(tab.p[,2]*tab.p[,3])

 lines(density(odds.p), lty =1, lwd =2)
 legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2),
     col=c("black", "blue"))


 quantile(odds, prob=c(0.025, 0.5, 0.975))

 sum(odds > 1)/10000

 # Fisher exact test
 set.seed(123)
 param <- c(2,1,1,2) # parameter of the dirichlet
 draws <- rdirichlet(10000, c(13,23,150,125)+param ) # here a[i,j]=1
 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3])

  tab.p2 <- rdirichlet(1000, param)
 odds.p2 <- tab.p2[,1]*tab.p2[,4]/(tab.p2[,2]*tab.p2[,3])

 lines(density(odds.p2), lty =1, lwd =2)
 legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2),
     col=c("black", "blue"))


 quantile(odds, prob=c(0.025, 0.5, 0.975))

 sum(odds>1)/10000

 fisher.test(matrix(c(13, 23, 150, 125), nrow=2, byrow=TRUE), alternative="less")$p.value


a <- c(1,1)
x <- c(0.5, 0.5)
logdirich <- function(a, data){
d <- ddirichlet(data, a)
log(d)
}

a <- c(2, 2)

mydirich <- function(theta, data){
a1 <- data[1]
a2 <- data[2]
f <- theta[1]^(1-a1) + theta[2]^(1-a2)
log(f)
}

theta <- expand.grid(x=seq(0.01,.99,0.1), y=seq(0.01,.99,0.1))
m <- length(seq(0.01, .99, 0.1))
n <- dim(theta)[1]
z <- rep(0, n)
for(i in 1:n)
z[i] <- mydirich(theta[i,], c(2,2))

z <- matrix(z, byrow=TRUE, nrow=m)
contour(seq(0.01,.99,0.1), seq(0.01,.99,0.1), z, main="Dirichlet with a=(0,-0.3,-1.4,0)",
col="blue", lwd=2, xlab=expression(theta[11]), ylab=expression(theta[22]))

## bivariate dirichlet
  f <- function(x, y )
  {
    #if (is.vector(x))
     # x <- matrix(x, nrow = 1)
      r <-  x^{1}* y^{1}
    as.vector(r)
  }

x <- seq(0.1, 0.2, length= 50)
y <- seq(0.1, 0.2, length= 50)
z <- outer(x, y, f)
persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75,xlab="theta11", ylab="theta22",
zlab="f(theta11, theta22)")
title("Dirichlet with a11=2, a12=1, a21=1, a22=2")

image(x,y,z,col= terrain.colors(100), xlab=expression(theta[11]), ylab=expression(theta[22]))
contour(x,y,z,add=T, col=rainbow(10), lwd=2, )

title("Dirichlet with a11=2, a12=1, a21=1, a22=2")

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture 5: Introduction to WinBUGS ##########################################
# Using R2WinBUGS package

# load R2WinBUGS package
library(R2WinBUGS)

# For Windows XP
bugsdir <-  "C:/Archivos de programa/WinBUGS14"

n <- 20
y <- 15
data1 <- list ("n", "y")
par1 <- c("theta", "y.pred")

cat("model
{
        y ~ dbin(theta, n)
        logit(theta) <- phi
        phi ~ dnorm(0,0.001)
        y.pred ~ dbin(theta,n) # making prediction   
}",
file = "model1.bug")

m1 <- bugs(data1, inits=NULL, par1, "model1.bug", n.chains = 5,
           n.iter = 2000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)

print(m1, digits.summary = 3)

### obtaining results from the object "m1"
class(m1)
names(m1)
m1$pD
m1$n.chains
m1$sims.array[1:10]
theta <- m1$sims.array[1:1000]
length(theta)

hist(theta, breaks = 50, prob = TRUE)
lines(density(theta), col = "blue", lwd =2)
# End ...

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture 6: Multivariate models with WinBUGS ################################
library(R2WinBUGS)
library(car)
# Example: Multivariate Normal toy example 
# Tanner's date
Y <- matrix( c(1,1,
 1,-1,
-1,1,
-1, -1,
 2, NA,
 2, NA,
-2, NA,
-2, NA,
 NA, 2,
 NA, 2,
 NA, -2,
 NA, -2), ncol=2, byrow=T)

# save BUGS model into "cammel.bug" file
cat("model
{
    for (i in 1 : 12){
			Y[i, 1 : 2] ~ dmnorm(mu[], tau[ , ])
		}
		mu[1] <- 0; mu[2] <- 0
		tau[1 : 2,1 : 2] ~ dwish(R[ , ], 2)
		R[1, 1] <- 0.001;  R[1, 2] <- 0;
		R[2, 1] <- 0; R[2, 2] <- 0.001
		Sigma2[1 : 2,1 : 2] <- inverse(tau[ , ])
		rho <- Sigma2[1, 2] / sqrt(Sigma2[1, 1] * Sigma2[2, 2])
}",
file = "cammel.bug")

dat <- list("Y")
par1 <- c("rho")

m.cammel <- bugs(dat, inits=NULL, par1, model="cammel.bug", n.chains = 2,
                 n.iter = 10000,  n.thin=1, bugs.directory = bugsdir,
                 working.directory = getwd(),
                 clearWD=TRUE, debug=TRUE, DIC=FALSE)

rho <- m.cammel$sims.array[, ,"rho"]
hist(rho, breaks = 80, xlab = expression(rho))

###############################################################################
# Example: Multinomial with non-conjugate

# save BUGS model into "popgen.bug" file
cat("model
model{
XAA[1] <- (1-sigma)*p + sigma
XAA[2]<- (1- sigma)*q
XAA[3]<- 0
XAB[1]<-(1-sigma)*p/2 + sigma/4
XAB[2]<- 0.5
XAB[3]<- (1-sigma)*q/2 + sigma/4
XBB[1]<- 0
XBB[2]<- (1-sigma)*p
XBB[3]<- (1- sigma)*q + sigma
KAA <- sum(NAA[]); KAB <- sum(NAB[]); KBB <- sum(NBB[])

NAA[1:3]   ~ dmulti(XAA[], KAA)
NAB[1:3]   ~ dmulti(XAB[], KAB)
NBB[1:3]   ~ dmulti(XBB[], KBB)
p ~ dunif(0, 1)           # uniform prior
sigma ~ dunif(0, 1)
q <- 1 -p
}",
file = "popgen.bug")

dat <- list(NAA = c(427, 95, 0), NAB=c(108, 161, 71), NBB=c(0, 64, 74))

par1 <- c("p", "sigma")

m.popgen <- bugs(dat, inits=NULL, par1, model="popgen.bug", n.chains = 2,
                 n.iter = 10000,  n.thin=1, bugs.directory = bugsdir,
                 working.directory = getwd(),
                 clearWD=TRUE, debug=TRUE)

print(m.popgen, digits=3)

sigma <- m.popgen$sims.array[1:1000,1 ,"sigma"]
p <- m.popgen$sims.array[1:1000,1 , "p"]

library(car)
scatterplotMatrix(~p+sigma, diagonal = "histogram", smoother=F, reg.line=F)

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture 7: Introduction to MCMC ############################################

# Example: Gibbs sampler
# Normal distribution, unknown mean and variance (variance = 1/ tau)

gibbsNormal <- function(y, mu1, tau1, N, mu0, phi0, a, b) {
    mu <- numeric(N+1) # N = number of iterations
    mu[1] <- mu1 # the initial value for mu
    tau <- numeric(N+1)
    tau[1] <- tau1 # the initial value for tau
    n <- length(y) ; ybar <- mean(y)
    for(i in 2:(N+1)) { # generate samples from full conditional
      mu[i] <- rnorm(1, (mu0*phi0 + n*ybar*tau[i-1])/(phi0 + n*tau[i-1]),
      1/sqrt(phi0 + n*tau[i-1]))
      tau[i] <- rgamma(1, (n+2*a)/2, (sum((y-mu[i])^2) + 2*b)/2)
        }
output <- cbind(mu, tau) }

y <- c(160.2, 155.1, 135.0, 138.8, 147.0, 115.0, 113.4,  97.5, 110.3, 115.8)

 res<-gibbsNormal(y,150,1,500,150,10,0.1,0.1)
 par(mfrow=c(2,2))
 plot(res[50:500,1], col = "red", type = "l", ylab = "mu", xlab = "Iterations")
 hist(res[50:500,1], main ="Posterior for mu")
 plot(1/res[50:500,2])
 hist(1/res[50:500,2], col = "blue", type = "l", ylab = "tau", xlab = "Iterations")

##############################################################################
# Example: Mixture of Normals
# Metropolis-Hasting sampler for mixture of normals using
# the package MCMCpack

library(MCMCpack)

dmix<-function(x,m1,s1,m2,s2,q1)
  q1*dnorm(x,mean=m1,sd=s1)+
(1-q1)*dnorm(x,mean=m2,sd=s2)

post.mix <-MCMCmetrop1R(dmix, m1=0, m2=5, s1=1, s2=1, q1=0.3,
theta.init=1,thin=1,mcmc=10000,burnin=1000,logfun=FALSE)

plot(post.mix)

###############################################################################
# Example: Metropolis toy example
# Gelman and Meng (1991) kernel function

# plot the function 

## Gelman and Meng (1991) kernel function
  f <- function(x, y , A = 1, B = 0, C1 = 3, C2 = 3, log = FALSE)
  {
    if (is.vector(x))
      x <- matrix(x, nrow = 1)
    r <- -.5 * (A * x^2 * y^2 + x^2 + y^2
              - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y)
    if (!log)
      r <- exp(r)
    as.vector(r)
  }

x <- seq(-1, 6, length= 50)
y <- seq(-1, 6, length= 50)
z <- outer(x, y, f)
# Gelman-Meng 1
persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75)

image(x,y,log(z),col=gray(seq(0,1,len=256)))
contour(x,y, log(z),add=T,levels=seq(5,-35,-2),col=rainbow(10))

## Metropolis sampling with MCMCpack
library(MCMC)
## Gelman and Meng (1991) kernel function:
# Write the function with first argument as vector
  f.g.m <- function(xy , A = 1, B = 0, C1 = 3, C2 = 3)
  			{ x <- xy[1]; y <- xy[2]
    		r <- -.5 * (A * x^2 * y^2 + x^2 + y^2
              - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y)
            as.vector(r) }

 res <- MCMCmetrop1R(f.g.m, theta.init=c(0, 1), mcmc=10000, burnin=5000, 
                      logfun=FALSE)
  
points(res[,1], res[,2])

par(mfrow=c(2,2))
title("Trace plot for 1000 Metropolis itereations")
plot(res[,1], type="l", col="blue", xlab="iteration", ylab="x")
plot(res[,2], type="l", col="red", xlab="iteration", ylab="y")
par(mfrow=c(1,1))

###############################################################################

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)


# Lecture 8 ####################################################################
# Bayesian regression

# Part of this lecture is done with WinBUGS

##########################################################################
#Example : stacks

Y <- c(42, 37, 37, 28, 18, 18, 19, 20, 15, 14, 14, 13,
11, 12, 8, 7, 8, 8, 9, 15, 15)

	x <- structure(.Data = c(    80, 27, 89,
									 80, 27, 88,
									 75, 25, 90,
									 62, 24, 87,
									 62, 22, 87,
									 62, 23, 87,
									 62, 24, 93,
									 62, 24, 93,
									 58, 23, 87,
									 58, 18, 80,
									 58, 18, 89,
									 58, 17, 88,
									 58, 18, 82,
									 58, 19, 93,
									 50, 18, 89,
									 50, 18, 86,
									 50, 19, 72,
									 50, 19, 79,
									 50, 20, 80,
									 56, 20, 82,
									 70, 20, 91), .Dim = c(21, 3)
                   )

 Y <- c(42, 37, 37, 28, 18, 18, 19, 20, 15, 14, 14, 13,
 11, 12, 8, 7, 8, 8, 9, 15, 15)

 pairs(cbind(Y, x)) # pairs plot

 library(car)
 scatterplotMatrix(cbind(Y, x), diagonal = "qqplot", lwd=3, reg.line=F, smoother=F)

# Bayesian analysis using WinBUGS 

################################################################################
# ANOVA

library(MASS)
data(michelson)
attach(michelson)
plot(Expt, Speed, main="Speed of Light Data",
       xlab="Experiment No.",ylab="Speed")


# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture : Non linear models ##################
# Not in the current regression
x <- c( 1.0,  1.5,  1.5,  1.5, 2.5,   4.0,  5.0,  5.0,  7.0,
8.0,  8.5,  9.0,  9.5, 9.5,  10.0, 12.0, 12.0, 13.0,
13.0, 14.5, 15.5, 15.5, 16.5, 17.0, 22.5, 29.0, 31.5)

y <- c(1.80, 1.85, 1.87, 1.77, 2.02, 2.27, 2.15, 2.26, 2.47,
2.19, 2.26, 2.40, 2.39, 2.41, 2.50, 2.32, 2.32, 2.43,
2.47, 2.56, 2.65, 2.47, 2.64, 2.56, 2.70, 2.72, 2.57)

plot(x, y, ylim = c(0, 3))


ini.par <- c(alpha=max(y), beta =max(y) - min(y), gamma = 0.5)
ini.par

summary(fit <- nls(y~alpha-beta*gamma^x, start = ini.par))

theta <- coef(fit)
mu <- function(x, alpha=theta[1], beta=theta[2], gamma=theta[3]) alpha-beta*gamma^x

curve(mu(x), from = 1, to =max(x), col="red", lwd =3, add =T)

# Using R2WinBUGS package
library(R2WinBUGS)
N <- 27
data1 <- list ("N", "y", "x")
par1 <- c("alpha", "beta", "gamma", "sigma")

cat("model
{
		for( i in 1 : N ) {
  		y[i] ~ dnorm(mu[i], tau)
  		mu[i] <- alpha - beta * pow(gamma,x[i])
	  	res[i] <- (y[i] - mu[i])/sigma
	    y.pred[i] ~ dnorm(mu[i], tau)
		}
	alpha ~ dunif(0, 5) ; 	beta   ~ dunif(0, 5)
	gamma ~ dunif(0.01, 0.99)
	tau <- 1/(sigma*sigma)
   sigma ~ dunif(0.01, 100) #Gelman's prior
}", file = "nonlin.bug")

nlm1 <- bugs(data1, inits=NULL, par1, "nonlin.bug", n.chains = 3,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)

print(nlm1, digits.summary = 3)

index <- sample(1:500)
alpha <- nlm1$sims.array[index,1 ,"alpha"]
beta <-  nlm1$sims.array[index,1,"beta"]
gamma <- nlm1$sims.array[index,1 ,"gamma"]
sigma <- nlm1$sims.array[index,1 ,"sigma"]

library(car)
 scatterplotMatrix(cbind(alpha, beta, gamma, sigma), 
 diagonal = "histogram", 
 lwd=3, reg.line=F, smoother=F)

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Not in current lecture "logistic regression"
# beetles data
 x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839)
 n <- c(59, 60, 62, 56, 63, 59, 62, 60)
 r <- c(6, 13, 18, 28, 52, 53, 61, 60)
 N <- 8
 x.c <- x - mean(x)
summary(f.b <- glm(cbind(r, n-r) ~ x.c, family=binomial))
update(f.b, family=binomial(link=cloglog))

 data.b <- c("x", "n", "r", "N")
 par.b <- c("alpha", "beta")

 # Using a logistic link function
 cat("model{
	   for( i in 1 : N ) {
		r[i] ~ dbin(p[i], n[i])
    logit(p[i]) <- alpha + beta * (x[i] - mean(x[]))
		rhat[i] <- n[i] * p[i]
		phat[i] <- r[i]/n[i]
	   }
	   beta ~ dnorm(0.0, 0.001)
	   alpha ~ dnorm(0.0,0.001)
	}", file = "insects1.bug") 

 bin.reg <- bugs(data.b, inits=NULL, par.b, "insects1.bug", n.chains = 1,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(), clearWD=TRUE, debug=TRUE)

 print(bin.reg)

 # Using a cloglog link function
 cat("model{
	   for( i in 1 : N ) {
		r[i] ~ dbin(p[i], n[i])
    cloglog(p[i]) <- alpha + beta * (x[i] - mean(x[]))
		rhat[i] <- n[i] * p[i]
		phat[i] <- r[i]/n[i]
	   }
	   beta ~ dnorm(0.0, 0.1)
	   alpha ~ dnorm(0.0,0.1)
	}", file = "insects2.bug") 

 bin.reg2 <- bugs(data.b, inits=NULL, par.b, "insects2.bug", n.chains = 3,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)

# model fitness
alpha <- bin.reg$sims.array[,1 ,"alpha"]
beta  <- bin.reg$sims.array[,1 ,"beta"]

# model fitness
alpha2 <- bin.reg2$sims.array[,1 ,"alpha"]
beta2  <- bin.reg2$sims.array[,1 ,"beta"]

invlogit <- function(x)
{
 1/(1 + exp(-x))
}

invcloglog <- function(x)
{
 1-exp(-exp(x))
}

par(mfrow=c(1,2))
plot(x, r/n, cex=2, col="red", pch="o", type="b", main="logistic link")

for(b in 1: 50){
points(x, invlogit((alpha[b] - beta[b]*mean(x)) + beta[b] * x),
cex=0.5, type="l", col="grey", lty=3)
}

plot(x, r/n, cex=2, col="red", pch="o", type="b", main="complementary log-log ink")
for(b in 1: 50){
points(x, invcloglog((alpha2[b] - beta2[b]*mean(x)) + beta2[b] * x),
cex=0.5, type="l", col="lightblue", lty=3)
}

par(mfrow=c(1,1))

LD50 <- -1*(alpha - beta*mean(x))/beta
mean(LD50)
sd(LD50)
quantile(LD50, prob=c(0.025, 0.5, 0.95))
 hist(LD50, breaks=80, main="LD50")
#.....................................................................................
# mixture of link functions

 data.b <- c("x", "n", "r", "N")
 par.b <- c("alpha", "beta", "w")

 cat("model
	{
	  for( i in 1 : N ) {
		r[i] ~ dbin(p[i],n[i])
    p[i] <- w*q[i,1] + (1-w)*q[i,2]
    logit(q[i, 1]) <- alpha + beta * (x[i] - mean(x[]))
    cloglog(q[i,2]) <- alpha + beta * (x[i] - mean(x[]))
		rhat[i] <- n[i] * p[i]
		phat[i] <- r[i]/n[i]
	   }
	   beta ~ dnorm(0.0, 0.001)
	   alpha ~ dnorm(0.0,0.001)
	   w~dbeta(1,1)
	}", file = "insect-mix.bug")

bin.reg.mix <- bugs(data.b, inits=NULL, par.b, "insect-mix.bug", n.chains = 1,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(), clearWD=TRUE, debug=TRUE)

print(bin.reg.mix)

#.....................................................................................
# O rings

#distress yes=1 , no =0
y <- c(1,1,1, 1,0,0,0, 0,0,0,0, 0,1,1,0,
0,0,1,0, 0,0,0,0)
# temperature at launch time
x <- c(53,57,58, 63,66, 67,67, 67,68, 69,70,
70,70, 70,72, 73,75, 75,76, 76,78, 79,81)
# number of previous flights
N <- 23
summary(f.b1 <- glm(y ~ x, family=binomial))
summary(f.b2 <- update(f.b1, family=binomial(link=cloglog)))


jitter.binary <- function(a, jitt=.15){
 ifelse(a==0, runif(length(a), 0, jitt),
 runif(length(a), 1-jitt,1))
 }

y.jitter <- jitter.binary(y)

plot(x, y.jitter, xlab="Temperature in F",
 ylab="Prob of at least one o-ring incident")
curve(invlogit(coef(f.b1)[1] +  coef(f.b1)[2]*x) ,lty=1, lwd=2, col="red", add=T)
curve(invlogit(coef(f.b2)[1] +  coef(f.b2)[2]*x) ,lty=2, lwd=2, col="blue",,add=T)

#library(arm)
#sim.1 <- sim(f.b)

#for(j in 1:40){
#curve(invlogit(sim.1$coef[j,1] + sim.1$coef[j,2]*I(x>65)), col="gray", lad=.5, add=T)
#}

# change point

 data.b <- c("x", "y", "N")
 par.b <- c("beta0", "beta1", "K")

 cat("model
	{
		for(i in 1 : N) {
		  y[i] ~ dbern(p[i])
		  logit(p[i]) <- beta0 + beta1 * z[i] }

		for(i in 1:N){z[i] <- step(x[i] - K) }

	#priors
		K ~dunif(53, 81)
		beta0 ~ dnorm(0.0, 0.01)
		beta1 ~ dnorm(0.0, 0.01)
 	# prob(a): failure under low temperatures
	p.a <- 1/(1+exp(-1*beta0))
	#prob(b)
	y.b ~dbin(p.b, 7)
	#prob(c)
	y.c ~ dbin(p.c, 2)
	#prob(d)
	y.d ~ dbern(p.d)
	#prob of catastrophic failure in one field
	p.f <- p.a*p.b*p.c*p.d
	#prob catastrophic failure in at least one out of 6 fields
	p.cat <- 1- pow(1-p.f, 6)

  # Risk  at K or more degrees
 	p.aK <- 1/(1+exp(-1*(beta0+beta1)))
 	p.fK <- p.aK*p.b*p.c*p.d
	p.catK <- 1- pow(1-p.fK, 6)

  #priors
	p.b~dbeta(0.5, 0.5)
	p.c~dbeta(0.5, 0.5)
	p.d~dbeta(0.5, 0.5)
		}", file="risk1.bug")

 chall.1 <- bugs(data.b, inits=NULL, par.b, "risk1.bug", n.chains = 1,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(), clearWD=TRUE, debug=TRUE)

 print(chall.1)


par(mfrow=c(1,2))
K <- chall.1$sims.array[,1,"K"]
hist(K, breaks=180, col="lightblue")

# model fitness

beta0 <- chall.1$sims.array[,1 ,"beta0"]
beta1  <- chall.1$sims.array[,1 ,"beta1"]

plot(x, y.jitter, xlab="Temperature in F",
 ylab="Prob of at least one o-ring incident",cex=1.5)

for(i in 1:100){
curve(invlogit( beta0[i] + beta1[i]* I(x>K[i])), col="gray", lad=.5, add=T)
}
par(mfrow=c(1,1))

# Risk analysis
y.b <- 2
y.c <- 1
y.d <- 0

data.r <- c("x", "y", "N", "y.b", "y.c", "y.d")
par.r <- c("beta0", "beta1", "K", "p.cat", "p.catK")

chall.2 <- bugs(data.r, inits=NULL, par.r, "risk1.bug", n.chains = 1,
           n.iter = 20000,  n.thin=1, bugs.directory = bugsdir,
           working.directory = getwd(), clearWD=TRUE, debug=TRUE)

print(chall.2, 3)

p.cat   <- chall.2$sims.array[,1 ,"p.cat"]
p.catK  <- chall.2$sims.array[,1 ,"p.catK"]

odds.31 <- p.cat/(1-p.cat)/p.catK/(1-p.catK)
odds.31 <- odds.31[odds.31<100]

par(mfrow=c(1,2))
hist(p.cat, col="blue", xlim=c(0, 0.8), breaks=80, xlab="Risk at 31�F")
abline(v=quantile(p.cat, prob=c(0.05, 0.5, 0.95) ), lty=2, lwd=2)

hist(odds.31, col="red" , xlim=c(0, 60), breaks=80, xlab="Odds ratio")
abline(v=quantile(odds.31, prob=c(0.05, 0.5, 0.95) ), lty=2, lwd=2)
par(mfrow=c(1,1))

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# Lecture  ###################################################################
# Modelling the data collection process

# From the paper: Verde (2010) 
# Example: non-informative missing process
# Sequence 1
y.true <- c(0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,
0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,0,
1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1)
# missing flips
set.seed(123)
index <- sample(1:100, 40, replace=FALSE)
index
y.miss <- y.true
y.miss[index] <- NA

n <- length(y.true)
mu.phi <- 0
pre.phi <- 0.1
data1 <- list ("n", "y.miss", "mu.phi", "pre.phi")
par1 <- c("theta")

cat("model
{
for(i in 1:n){
  y.miss[i] ~ dbern(theta)      # model for y
  }
  logit(theta) <- phi
  phi ~ dt(mu.phi, pre.phi, 4)  # prior for theta
}" ,file="missing-flips1.bug")

m.miss <- bugs(data1, inits = NULL, par1, "missing-flips1.bug", n.chains = 3,
            n.iter = 4000, n.thin = 5, bugs.directory = bugsdir,
            working.directory = getwd(),
            clearWD = TRUE, debug = TRUE)

print(m.miss, digits.summary = 3)

# Example: non-ignorable missing process...
# Informative missing process...
> mean(y.true)
[1] 0.38
> 0.38/(1+0.38)
[1] 0.2753623
>

# missing flips
set.seed(123)
index <- sample(1:100, 28, replace=FALSE)
y.miss <- y.true
y.miss[index] <- NA
I.mis <- rep(1,100)
I.mis[y.miss!="NA"] <- 0

data1 <- list ("n", "y.miss", "I.mis", "mu.phi", "pre.phi")
par1 <- c("theta")

cat(
"model
{
for(i in 1:n){
  y.miss[i] ~ dbern(theta)      # model for y
  I.mis[i] ~ dbern(gamma)       # model for missing indicator
  }
  gamma <- theta/(1+theta)
  logit(theta) <- phi
  phi ~ dt(mu.phi, pre.phi, 4)  # prior for theta
}" ,file="missing-flips2.bug")


m.miss2 <- bugs(data1, inits=NULL, par1, "missing-flips2.bug", n.chains = 3,
           n.iter = 4000,  n.thin=5, bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)

print(m.miss2, digits.summary = 3)


theta.mis2 <- m.miss2$sims.array[,,"theta"]

# Repeat the analysis without informative missing assumptions...

data1 <- list ("n", "y.miss", "mu.phi", "pre.phi")
par1 <- c("theta")

m.miss3 <- bugs(data1, inits=NULL, par1, "missing-flips1.bug", n.chains = 3,
           n.iter = 4000,  n.thin=5, bugs.directory = bugsdir,
           working.directory = getwd(),
           clearWD=TRUE, debug=TRUE)

print(m.miss3, digits.summary = 3)

theta.mis2 <- m.miss2$sims.array[,,"theta"]

# Lecture 9 #################################################################

library(arm)

I <- 34

r <- c(2,3,1,3,2,3,2,7,3,8,7,0,15,4 ,0,6 ,0,33,4,5 ,2 ,0,8,41,
24,7,  46, 9,23,53,8,3,  1,23)

n <- c(4,20,5,10,2,5,8,19,6,10,24,1,30,22,1,11,1,54,9,18,12,1,11,
77,51,16,82, 13,43,75,13,10,6,37)

rain <- c(1735,1936,2000,1973,1750,1800, 1750,2077,1920,1800,2050,1830,1650,2200,2000,
1770,1920,1770,2240,1620, 1756, 1650, 2250,1796,1890,1871,2063,2100,1918,1834,1780,1900,1976,2292)

rate <- r/n
rate.sd <- sqrt(rate*(1-rate)/n)

coefplot(rate, rate.sd,
         CI = 1, xlim = c(0, 1),
         #mar = c(1,10,5.1,2),
         col = "blue", main="Toxoplasmosis Rates: El Salvador 34 cities"
         )

par(mfrow = c(1,2), oma = c(0,0,4,0))

plot(rain, rate, col="red", xlab="Amount of Rain (mm)",
 ylab="Toxoplasmosis Rate")

for(i in 1:I){
segments(rain[i],rate[i]-rate.sd[i],rain[i],rate[i]+rate.sd[i],
col="red", lwd=2)
}

plot(n, rate, col="red", xlab="Sample Size",
 ylab="Toxoplasmosis Rate")

for(i in 1:I){
segments(n[i], rate[i]-rate.sd[i], n[i],rate[i]+rate.sd[i],
col="blue", lwd=2)
}
par(mfrow=c(1,1))

mtext(side=3, line=0, cex=1.5, outer=T,
"Toxoplasmosis Rates: El Salvador 34 cities")

# using R2WinBUGS ...
library(R2WinBUGS)
# setup WinBUGS directory

N <- 34
data1 <- c("r", "n", "N")
inits1 <- NULL
par1 <- c("p")

cat("model
		{
			for( i in 1 : N ) {
				a[i] ~ dnorm(mu,tau)
				logit(p[i]) <- a[i]
				r[i] ~ dbin(p[i], n[i])
			  	}
# Priors
			  mu ~ dnorm(0.0,1.0E-6)
# Choice of prior on random effects variance
# Prior: uniform on SD
			  sigma~ dunif(0,10)
			  tau<-1/(sigma*sigma)
 		}",
file = "toxo1.bug")

m1 <- bugs(data1,
             inits=inits1,
             parameters=par1,
             model.file="toxo1.bug",
             n.chains=1,
             n.iter=2000,
             DIC=TRUE,
             working.directory=getwd(),
             bugs.directory = bugsdir,
             clearWD=TRUE,
             debug=TRUE)

print(m1, digits.summary = 3)
attach.bugs(m1)

post.mean <- apply(p,2,mean)
post.sd <- apply(p, 2, sd)

coefplot(rate, rate.sd,
         CI = 1, xlim = c(0, 1),
         col = "blue",
         main="Toxoplasmosis Rates: El Salvador 34 cities"
         )

coefplot(post.mean, post.sd,
         CI = 1, xlim = c(0, 1),
         col = "red", add=TRUE)

# Schrinkage
x <- gl(2, 34,labels=c("ML", "Bayes"))
y <- c(rate, post.mean)

plot(x, y, type="n", main="Schrikage effect")
for(i in 1:34)
segments(1,rate[i],2, post.mean[i], lty=3, col="blue", lwd=2)


# New script 2 .............................................................

N <- 34
data2 <- c("r", "n", "N", "rain")
inits2 <- NULL
par2 <- c("beta1", "beta2")

cat("
model
		{
# Model

# log(n)
for(i in 1:N)
{log.n[i] <- log(n[i])}

			for( i in 1 : N ) {
				a[i] ~ dnorm(alpha,tau)
    logit(p[i]) <- a[i] + beta1 * (rain[i]-mean(rain[])) + beta2 * (log.n[i] - mean(log.n[]))
				r[i] ~ dbin(p[i], n[i])
			  	}

# Priors
# precision
tau <- 1/(sigma*sigma)
sigma ~ dunif(0.01, 5)

# average logit rate across cities adjusted by rain
alpha ~ dnorm(0,0.01)
# rain slope
beta1 ~ dnorm(0, 0.01)
# sample size slope
beta2 ~ dnorm(0, 0.01)
}",
file = "toxo2.bug")


m2 <- bugs(data2,
             inits=inits2,
             parameters=par2,
             model.file="toxo2.bug",
             n.chains=3,
             n.iter=20000,
             DIC=TRUE,
             working.directory=getwd(),
             bugs.directory = bugsdir,
             clearWD=TRUE,
             debug=TRUE)

print(m2, digits.summary = 3)
attach.bugs(m2)


par(mfrow =c(1,2))
hist(beta1, breaks =40, prob = TRUE, main="Posterior for Beta_1")
lines(density(beta1), lwd= 2, col = "blue")
abline(v=0, lwd=2, lty = 2)

hist(beta2, breaks = 40, prob = TRUE, main="Posterior for Beta_2")
lines(density(beta2), lwd= 2, col = "red")
abline(v=0, lwd=2, lty = 2)

par(mfrow =c(1,1))

# Remove objects generateds in this lecture ...
tm <- ls()
rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))])
rm(tm)

# ..............................................................................
# The End!
# Good luck!