# Bayesian Analysis with R and WinBUGS # 24 of April 2013 # R script for Lectures # Dr. Pablo E. Verde # Set up working directory for R: # setup WinBUGS directory bugsdir <- "C:/Archivos de Programa/WinBUGS14" # Lecture 1 ################################################################### # Example: sugical ........................................................... #Beta(3, 27) x <- seq(0, 0.5, by = 0.01) y <- dbeta(x, 3, 27) plot(x*100, y, xlab = "Mortality rate (in %)", ylab = "Density(mortality rate)", type ="l", lwd =2, col="red") # Example: Three coins example ................................................ options(digits = 4) set.seed(12345) coin.pri <- rep(1/3, 3) # prior for coins 1, 2, 3. theta <- c(0.25, 0.5, 0.75) # pr. heads under 1, 2, 3. sim <- 100000 # number of simulations y <- rep(0, 3*sim) dim(y) <- c(sim, 3) for( i in 1:sim) { ind <- sample(c(1,2,3), size = 1, prob = coin.pri, replace = TRUE) y.new <- rbinom(1, 1, prob = theta[ind]) y[i, ind] <- y.new } # average each class and normilize th.post <- apply(y, 2, mean)/sum( apply(y, 2, mean)) th.post # monte carlo error V <- th.post * (1- th.post)/sim sqrt(V) # predictive posterior that the next toss is a head: sum(theta * apply(y, 2, mean)/sum( apply(y, 2, mean))) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 2 Simple statistical models ######################################### # Example: flexibility of the Beta # Beta flexibility par(mfrow=c(2,2)) curve(dbeta(x, 1, 1), from =0, to=1, lty = 1, lwd=2, main="Beta(1, 1)", xlab=expression(theta)) curve(dbeta(x, 1/2, 1/2), from = 0, to = 1, lty = 2, lwd=2, col="red", main="Beta(1/2, 1/2)", xlab=expression(theta)) curve(dbeta(x, 2, 5), from = 0, to = 1, lty = 2, lwd=2, col="blue", main="Beta(2, 5)", xlab=expression(theta)) curve(dbeta(x, 2, 2), from = 0, to = 1, lty = 2, lwd=2, col="green", main="Beta(2, 2)", xlab=expression(theta)) par(mfrow=c(1,1)) # Example: Drug investigation ... par(mfrow=c(3,1)) # graphical output: 3 by 1 panels # draw a curve for a beta density curve(dbeta(x,9.2, 13.8),from=0, to =1, xlab= "prob of success",main="Prior") # draw a curve for a binomial density curve(dbinom(15, 20,x),from=0, to =1, col="blue", xlab="prob of sucess", main="Likelihood") # draw the posterior curve(dbeta(x, 24.2, 18.8),from=0, to =1, col="red", xlab="prob of sucess", main="Posterior") par(mfrow=c(1,1)) # Direct Monte Carlo Simulation ... qbeta( c(0.05, 0.5, 0.75, 0.95),24.2, 18.8) theta.star <- rbeta(20000, 24.2, 18.8) quantile(theta.star, prob = c(0.05, 0.5, 0.75, 0.95)) # Posterior of the Odds ... odds.star <- theta.star/(1-theta.star) quantile(odds.star, prob = c(0.05, 0.5, 0.75, 0.95)) hist(odds.star, breaks = 100, xlab ="odds ratio", freq = FALSE, xlim =c(0, 4)) lines(density(odds.star), lwd =2, lty = 1, col ="red") # Making predictions for binomial data ... # Beta-binomial density betabin <- function(r,a,b,m) { gamma(a+b)/(gamma(a)*gamma(b)) * choose(m,r) * gamma(a+r)*gamma(b+m-r)/gamma(a+b+m) } sum(betabin(0:10, 0.4, 0.1, 10)) x <- 0:40 px <- betabin(0:40, a=24.2,b=18.8,m=40) plot(x, px, type="s") # Comparision between posterior for the rate and # predictive posterior .. par(mfrow=c(1,2)) # Beta posterior after having observed 15 successes in 20 trials curve(dbeta(x,24.2,18.8),from=0, to =1,col="red", xlab="prob of sucess", main="Posterior") # Beta-binomial distribution of the number of successes x in the next # 40 trials with mean 22.5 and standard deviation 4.3 x <- 0:40; px <- betabin(0:40, a=24.2,b=18.8,m=40) plot(x, px, type="h", xlab="number of successes out of 40", main="Predictive Posterior") par(mfrow=c(1,1)) # probability of at least 25 successes out of 40 further trials > sum(betabin(25:40,24.2,18.8,m=40)) [1] 0.3290134 > # Simulation of future data with a Beta-Binomial model ... theta.star <- rbeta(10000, 24.2, 18.8) y <- rbinom(10000, 40, theta.star) freq.y <- table(y) ys <- as.integer(names(freq.y)) predprob <- freq.y/sum(freq.y) plot(ys, predprob, type = "h", xlab = "y", ylab = "Predictive Probability") sum(predprob[ys>24]) # Example: SBP - Bayesian analysis for Normal data # This is an example of Normal distribution with unknown mean # and known variance. It is not in the lecture par(mfrow=c(3,1)) #prior Normal(120, 10) curve(dnorm(x,120,10),from=100,to=140,col="blue", xlab="Long-term SBP of 60-year old women",main="Prior") #likelihood Normal(130, 3.5) curve(dnorm(x,130,3.5),from=100,to=140,col="black", xlab="Long-term SBP of 60-year old women",main="Likelihood") #posterior Normal(128.9, 3.3) curve(dnorm(x,128.9,sqrt(11.1)),from=100,to=140,col="red", xlab="Long-term SBP of 60-year old women",main="Posterior") par(mfrow=c(1,1)) # Example: Annual number of cases of haemolytic uraemic syndrome Henderson and # Matthews, (1993) # Prior to posterior ... # Prior: Gamma(0.679,0.295) # Posterior: Gamma(16:679; 10:295) curve(dgamma(x, 16.679,10.295), lty=1, col ="blue", xlim=c(0, 5)) curve(dgamma(x, 0.679, 0.295), lty=2, add=T) legend(3, 0.4, legend = c("prior", "posterior"), lty = c(1, 2), col =c("black", "blue")) # Predictions ... > sum(dnbinom(6:17, prob=10.295/(1+10.295), size=16.679)) [1] 0.009594483 > plot(dnbinom(0:10, prob=10.295/(1+10.295), size=16.679), type="h", lwd=2, col="red", xlab= "counts") # Lecture 3: Priors ########################################################### ## Predictive prior distribution for sample size and power set.seed(123) theta <- rnorm(10000, 0.5, 0.1) sigma <- rnorm(10000, 1, 0.3) sigma <- ifelse(sigma <0, -1*sigma, sigma) n <- 2*sigma^2 /(theta^2)*(0.84 + 1.96)^2 pow <- pnorm( sqrt( 63/2) * theta /sigma - 1.96) par(mfrow=c(1,2)) hist(n, prob=TRUE, xlim =c(0, 400),ylim =c(0, 0.01), breaks=50) lines(density(n), lwd= 2, col = "red") hist(pow, prob =TRUE) lines(density(pow), lwd=2, col ="blue") par(mfrow=c(1,1)) round(quantile(n),2) round(quantile(pow),2) sum(pow<0.7)/10000 # n=100 set.seed(123) theta <- rnorm(10000, 0.5, 0.1/0.5) theta <- abs(theta) sigma <- rnorm(10000, 1, 0.3/0.5) sigma <- ifelse(sigma <0, -1*sigma, sigma) pow <- pnorm( sqrt(100/2) * theta /sigma - 1.96) sum(pow<0.7)/10000 # Mixture of betas ... mixbeta <- function(x,r,a1,b1,a2,b2,q,n) { qstar <- q*betabin(r,a1,b1,n)/(q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n)) p1 <- dbeta(x,a1+r,n-r+b1) p2 <- dbeta(x,a2+r,n-r+b2) posterior <- qstar*p1 + (1-qstar)*p2 } # Mixture between 80% informative and 20% un-informative priors ... par(mfrow=c(2,1)) curve(dbeta(x,9.2,13.8),from=0, to=1,col="red",xlab="probability of response", main="priors") curve(0.8*dbeta(x,9.2,13.8)+0.2*dbeta(x,1,1),from=0, to=1,col="blue",add=T) curve(dbeta(x,24.2,18.8),from=0,to=1,col="red",xlab="probability of response", main="posteriors") curve(mixbeta(x, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20),from=0,to=1,col="blue",add=T) par(mfrow=c(1,1)) # Same analysis using simulations: this is not presented in the lecture. theta.star.mixbeta <- function(B,r,a1,b1,a2,b2,q,n) { qstar <- q*betabin(r,a1,b1,n)/(q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n)) Ind <- rbinom(B, 1, qstar) p1 <- rbeta(B,a1+r,n-r+b1) p2 <- rbeta(B,a2+r,n-r+b2) posterior <- Ind*p1 + (1-Ind)*p2 } sim.theta <- theta.star.mixbeta(B=10000, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20) # posterior of mixtures by simulation hist(sim.theta, breaks = 100, xlab ="probability of response", xlim = c(0,1), freq = FALSE) lines(density(sim.theta), lwd =2, lty = 1, col ="blue") # posterior of the odds ratio from mixtures sim.odds <- sim.theta/(1-sim.theta) hist(sim.odds, breaks = 100, xlab ="odds ratio", freq = FALSE, xlim =c(0, 10)) lines(density(sim.odds), lwd =2, lty = 1, col ="red") # predictive outcomes # Simulation of future data sim.theta <- theta.star.mixbeta(B=10000, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20) y <- rbinom(10000, 40, sim.theta) # number of success out of n = 40 freq.y <- table(y) ys <- as.integer(names(freq.y)) predprob <- freq.y/sum(freq.y) plot(ys, predprob, type = "h", xlab = "y", ylab = "Predictive Probability") # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 4: Multiparameter models with R ##################################### # Introduction Multi-parameter models # Example: Normal distribution with unknown mean and variance ... library(LearnBayes) data(marathontimes) marathontimes$times attach(marathontimes) mycontour(normchi2post, c(220, 330, 500, 9000), time,xlab="mean",ylab="variance") SS <- sum((time - mean(time))^2) n <- length(time) sigma2 <- SS/rchisq(1000, n - 1) mu <- rnorm(1000, mean = mean(time), sd = sqrt(sigma2)/sqrt(n)) points(mu, sigma2, col="blue") quantile(mu, c(0.025, 0.975)) quantile(sqrt(sigma2), c(0.025, 0.975)) #posterior of the coefficient of variation cv.star <- sqrt(sigma2)/mu hist(cv.star*100, breaks = 50, xlab ="coefficient of variation in %", freq = FALSE) lines(density(cv.star*100), col="blue", lwd=2, lty=2) abline(v=quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)), lty=3, lwd =2, col="red") quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)) ########################### # model checking y.star <- rnorm(1000, mean = mu, sd =sqrt(sigma2)) par(mfrow=c(3,4)) hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green") for(i in 1:11){ y.sim <- sample(y.star, 20) hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue") } # Analysis of the minimum, maximum, variability and asymmetry min.y <- max.y <- asy1 <- asy2 <- inter.q <- rep(0,1000) time <- sort(time) for (b in 1:1000){ y.sim <- sample(y.star, 20) mu.star <- sample(mu, 20) min.y[b] <- min(y.sim) max.y[b] <- max(y.sim) y.sim <- sort(y.sim) asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star)) # predicted asymmetry measure asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star)) inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25) } par(mfrow =c(2,2)) hist(min.y, breaks = 50, col="blue", main = "Minimum y*") abline(v=min(time), lwd=3, lty=2) hist(max.y, breaks = 50, col="red", main = "Maximum y*") abline(v=max(time), lwd=3, lty=2) hist(inter.q, breaks = 50, col="magenta", main = "Variability y*") abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2) plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data", ylab ="Asymmetry original data") abline(a=0, b=1, lwd=2) par(mfrow=c(1,1)) # Example of 2x2 table and odds ratio: Great Trial ... library(LearnBayes) draws <- rdirichlet(10000, c(13,23,150,125) ) # here a[i,j]=1 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3]) hist(odds, breaks = 100, xlab="Odds Ratio", freq = FALSE) lines(density(odds), lty =2, lwd=2, col ="blue") abline(v=quantile(odds, prob=c(0.025, 0.5, 0.975)), lty=3, col="red") tab.p <- rdirichlet(1000, c(1,1,1,1)) odds.p <- tab.p[,1]*tab.p[,4]/(tab.p[,2]*tab.p[,3]) lines(density(odds.p), lty =1, lwd =2) legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2), col=c("black", "blue")) quantile(odds, prob=c(0.025, 0.5, 0.975)) sum(odds > 1)/10000 # Fisher exact test set.seed(123) param <- c(2,1,1,2) # parameter of the dirichlet draws <- rdirichlet(10000, c(13,23,150,125)+param ) # here a[i,j]=1 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3]) tab.p2 <- rdirichlet(1000, param) odds.p2 <- tab.p2[,1]*tab.p2[,4]/(tab.p2[,2]*tab.p2[,3]) lines(density(odds.p2), lty =1, lwd =2) legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2), col=c("black", "blue")) quantile(odds, prob=c(0.025, 0.5, 0.975)) sum(odds>1)/10000 fisher.test(matrix(c(13, 23, 150, 125), nrow=2, byrow=TRUE), alternative="less")$p.value a <- c(1,1) x <- c(0.5, 0.5) logdirich <- function(a, data){ d <- ddirichlet(data, a) log(d) } a <- c(2, 2) mydirich <- function(theta, data){ a1 <- data[1] a2 <- data[2] f <- theta[1]^(1-a1) + theta[2]^(1-a2) log(f) } theta <- expand.grid(x=seq(0.01,.99,0.1), y=seq(0.01,.99,0.1)) m <- length(seq(0.01, .99, 0.1)) n <- dim(theta)[1] z <- rep(0, n) for(i in 1:n) z[i] <- mydirich(theta[i,], c(2,2)) z <- matrix(z, byrow=TRUE, nrow=m) contour(seq(0.01,.99,0.1), seq(0.01,.99,0.1), z, main="Dirichlet with a=(0,-0.3,-1.4,0)", col="blue", lwd=2, xlab=expression(theta[11]), ylab=expression(theta[22])) ## bivariate dirichlet f <- function(x, y ) { #if (is.vector(x)) # x <- matrix(x, nrow = 1) r <- x^{1}* y^{1} as.vector(r) } x <- seq(0.1, 0.2, length= 50) y <- seq(0.1, 0.2, length= 50) z <- outer(x, y, f) persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75,xlab="theta11", ylab="theta22", zlab="f(theta11, theta22)") title("Dirichlet with a11=2, a12=1, a21=1, a22=2") image(x,y,z,col= terrain.colors(100), xlab=expression(theta[11]), ylab=expression(theta[22])) contour(x,y,z,add=T, col=rainbow(10), lwd=2, ) title("Dirichlet with a11=2, a12=1, a21=1, a22=2") # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 5: Introduction to WinBUGS ########################################## # Using R2WinBUGS package # load R2WinBUGS package library(R2WinBUGS) # For Windows XP bugsdir <- "C:/Archivos de programa/WinBUGS14" n <- 20 y <- 15 data1 <- list ("n", "y") par1 <- c("theta", "y.pred") cat("model { y ~ dbin(theta, n) logit(theta) <- phi phi ~ dnorm(0,0.001) y.pred ~ dbin(theta,n) # making prediction }", file = "model1.bug") m1 <- bugs(data1, inits=NULL, par1, "model1.bug", n.chains = 5, n.iter = 2000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m1, digits.summary = 3) ### obtaining results from the object "m1" class(m1) names(m1) m1$pD m1$n.chains m1$sims.array[1:10] theta <- m1$sims.array[1:1000] length(theta) hist(theta, breaks = 50, prob = TRUE) lines(density(theta), col = "blue", lwd =2) # End ... # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 6: Multivariate models with WinBUGS ################################ library(R2WinBUGS) library(car) # Example: Multivariate Normal toy example # Tanner's date Y <- matrix( c(1,1, 1,-1, -1,1, -1, -1, 2, NA, 2, NA, -2, NA, -2, NA, NA, 2, NA, 2, NA, -2, NA, -2), ncol=2, byrow=T) # save BUGS model into "cammel.bug" file cat("model { for (i in 1 : 12){ Y[i, 1 : 2] ~ dmnorm(mu[], tau[ , ]) } mu[1] <- 0; mu[2] <- 0 tau[1 : 2,1 : 2] ~ dwish(R[ , ], 2) R[1, 1] <- 0.001; R[1, 2] <- 0; R[2, 1] <- 0; R[2, 2] <- 0.001 Sigma2[1 : 2,1 : 2] <- inverse(tau[ , ]) rho <- Sigma2[1, 2] / sqrt(Sigma2[1, 1] * Sigma2[2, 2]) }", file = "cammel.bug") dat <- list("Y") par1 <- c("rho") m.cammel <- bugs(dat, inits=NULL, par1, model="cammel.bug", n.chains = 2, n.iter = 10000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE, DIC=FALSE) rho <- m.cammel$sims.array[, ,"rho"] hist(rho, breaks = 80, xlab = expression(rho)) ############################################################################### # Example: Multinomial with non-conjugate # save BUGS model into "popgen.bug" file cat("model model{ XAA[1] <- (1-sigma)*p + sigma XAA[2]<- (1- sigma)*q XAA[3]<- 0 XAB[1]<-(1-sigma)*p/2 + sigma/4 XAB[2]<- 0.5 XAB[3]<- (1-sigma)*q/2 + sigma/4 XBB[1]<- 0 XBB[2]<- (1-sigma)*p XBB[3]<- (1- sigma)*q + sigma KAA <- sum(NAA[]); KAB <- sum(NAB[]); KBB <- sum(NBB[]) NAA[1:3] ~ dmulti(XAA[], KAA) NAB[1:3] ~ dmulti(XAB[], KAB) NBB[1:3] ~ dmulti(XBB[], KBB) p ~ dunif(0, 1) # uniform prior sigma ~ dunif(0, 1) q <- 1 -p }", file = "popgen.bug") dat <- list(NAA = c(427, 95, 0), NAB=c(108, 161, 71), NBB=c(0, 64, 74)) par1 <- c("p", "sigma") m.popgen <- bugs(dat, inits=NULL, par1, model="popgen.bug", n.chains = 2, n.iter = 10000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m.popgen, digits=3) sigma <- m.popgen$sims.array[1:1000,1 ,"sigma"] p <- m.popgen$sims.array[1:1000,1 , "p"] library(car) scatterplotMatrix(~p+sigma, diagonal = "histogram", smoother=F, reg.line=F) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 7: Introduction to MCMC ############################################ # Example: Gibbs sampler # Normal distribution, unknown mean and variance (variance = 1/ tau) gibbsNormal <- function(y, mu1, tau1, N, mu0, phi0, a, b) { mu <- numeric(N+1) # N = number of iterations mu[1] <- mu1 # the initial value for mu tau <- numeric(N+1) tau[1] <- tau1 # the initial value for tau n <- length(y) ; ybar <- mean(y) for(i in 2:(N+1)) { # generate samples from full conditional mu[i] <- rnorm(1, (mu0*phi0 + n*ybar*tau[i-1])/(phi0 + n*tau[i-1]), 1/sqrt(phi0 + n*tau[i-1])) tau[i] <- rgamma(1, (n+2*a)/2, (sum((y-mu[i])^2) + 2*b)/2) } output <- cbind(mu, tau) } y <- c(160.2, 155.1, 135.0, 138.8, 147.0, 115.0, 113.4, 97.5, 110.3, 115.8) res<-gibbsNormal(y,150,1,500,150,10,0.1,0.1) par(mfrow=c(2,2)) plot(res[50:500,1], col = "red", type = "l", ylab = "mu", xlab = "Iterations") hist(res[50:500,1], main ="Posterior for mu") plot(1/res[50:500,2]) hist(1/res[50:500,2], col = "blue", type = "l", ylab = "tau", xlab = "Iterations") ############################################################################## # Example: Mixture of Normals # Metropolis-Hasting sampler for mixture of normals using # the package MCMCpack library(MCMCpack) dmix<-function(x,m1,s1,m2,s2,q1) q1*dnorm(x,mean=m1,sd=s1)+ (1-q1)*dnorm(x,mean=m2,sd=s2) post.mix <-MCMCmetrop1R(dmix, m1=0, m2=5, s1=1, s2=1, q1=0.3, theta.init=1,thin=1,mcmc=10000,burnin=1000,logfun=FALSE) plot(post.mix) ############################################################################### # Example: Metropolis toy example # Gelman and Meng (1991) kernel function # plot the function ## Gelman and Meng (1991) kernel function f <- function(x, y , A = 1, B = 0, C1 = 3, C2 = 3, log = FALSE) { if (is.vector(x)) x <- matrix(x, nrow = 1) r <- -.5 * (A * x^2 * y^2 + x^2 + y^2 - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y) if (!log) r <- exp(r) as.vector(r) } x <- seq(-1, 6, length= 50) y <- seq(-1, 6, length= 50) z <- outer(x, y, f) # Gelman-Meng 1 persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75) image(x,y,log(z),col=gray(seq(0,1,len=256))) contour(x,y, log(z),add=T,levels=seq(5,-35,-2),col=rainbow(10)) ## Metropolis sampling with MCMCpack library(MCMC) ## Gelman and Meng (1991) kernel function: # Write the function with first argument as vector f.g.m <- function(xy , A = 1, B = 0, C1 = 3, C2 = 3) { x <- xy[1]; y <- xy[2] r <- -.5 * (A * x^2 * y^2 + x^2 + y^2 - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y) as.vector(r) } res <- MCMCmetrop1R(f.g.m, theta.init=c(0, 1), mcmc=10000, burnin=5000, logfun=FALSE) points(res[,1], res[,2]) par(mfrow=c(2,2)) title("Trace plot for 1000 Metropolis itereations") plot(res[,1], type="l", col="blue", xlab="iteration", ylab="x") plot(res[,2], type="l", col="red", xlab="iteration", ylab="y") par(mfrow=c(1,1)) ############################################################################### # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture 8 #################################################################### # Bayesian regression # Part of this lecture is done with WinBUGS ########################################################################## #Example : stacks Y <- c(42, 37, 37, 28, 18, 18, 19, 20, 15, 14, 14, 13, 11, 12, 8, 7, 8, 8, 9, 15, 15) x <- structure(.Data = c( 80, 27, 89, 80, 27, 88, 75, 25, 90, 62, 24, 87, 62, 22, 87, 62, 23, 87, 62, 24, 93, 62, 24, 93, 58, 23, 87, 58, 18, 80, 58, 18, 89, 58, 17, 88, 58, 18, 82, 58, 19, 93, 50, 18, 89, 50, 18, 86, 50, 19, 72, 50, 19, 79, 50, 20, 80, 56, 20, 82, 70, 20, 91), .Dim = c(21, 3) ) Y <- c(42, 37, 37, 28, 18, 18, 19, 20, 15, 14, 14, 13, 11, 12, 8, 7, 8, 8, 9, 15, 15) pairs(cbind(Y, x)) # pairs plot library(car) scatterplotMatrix(cbind(Y, x), diagonal = "qqplot", lwd=3, reg.line=F, smoother=F) # Bayesian analysis using WinBUGS ################################################################################ # ANOVA library(MASS) data(michelson) attach(michelson) plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No.",ylab="Speed") # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture : Non linear models ################## # Not in the current regression x <- c( 1.0, 1.5, 1.5, 1.5, 2.5, 4.0, 5.0, 5.0, 7.0, 8.0, 8.5, 9.0, 9.5, 9.5, 10.0, 12.0, 12.0, 13.0, 13.0, 14.5, 15.5, 15.5, 16.5, 17.0, 22.5, 29.0, 31.5) y <- c(1.80, 1.85, 1.87, 1.77, 2.02, 2.27, 2.15, 2.26, 2.47, 2.19, 2.26, 2.40, 2.39, 2.41, 2.50, 2.32, 2.32, 2.43, 2.47, 2.56, 2.65, 2.47, 2.64, 2.56, 2.70, 2.72, 2.57) plot(x, y, ylim = c(0, 3)) ini.par <- c(alpha=max(y), beta =max(y) - min(y), gamma = 0.5) ini.par summary(fit <- nls(y~alpha-beta*gamma^x, start = ini.par)) theta <- coef(fit) mu <- function(x, alpha=theta[1], beta=theta[2], gamma=theta[3]) alpha-beta*gamma^x curve(mu(x), from = 1, to =max(x), col="red", lwd =3, add =T) # Using R2WinBUGS package library(R2WinBUGS) N <- 27 data1 <- list ("N", "y", "x") par1 <- c("alpha", "beta", "gamma", "sigma") cat("model { for( i in 1 : N ) { y[i] ~ dnorm(mu[i], tau) mu[i] <- alpha - beta * pow(gamma,x[i]) res[i] <- (y[i] - mu[i])/sigma y.pred[i] ~ dnorm(mu[i], tau) } alpha ~ dunif(0, 5) ; beta ~ dunif(0, 5) gamma ~ dunif(0.01, 0.99) tau <- 1/(sigma*sigma) sigma ~ dunif(0.01, 100) #Gelman's prior }", file = "nonlin.bug") nlm1 <- bugs(data1, inits=NULL, par1, "nonlin.bug", n.chains = 3, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(nlm1, digits.summary = 3) index <- sample(1:500) alpha <- nlm1$sims.array[index,1 ,"alpha"] beta <- nlm1$sims.array[index,1,"beta"] gamma <- nlm1$sims.array[index,1 ,"gamma"] sigma <- nlm1$sims.array[index,1 ,"sigma"] library(car) scatterplotMatrix(cbind(alpha, beta, gamma, sigma), diagonal = "histogram", lwd=3, reg.line=F, smoother=F) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Not in current lecture "logistic regression" # beetles data x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839) n <- c(59, 60, 62, 56, 63, 59, 62, 60) r <- c(6, 13, 18, 28, 52, 53, 61, 60) N <- 8 x.c <- x - mean(x) summary(f.b <- glm(cbind(r, n-r) ~ x.c, family=binomial)) update(f.b, family=binomial(link=cloglog)) data.b <- c("x", "n", "r", "N") par.b <- c("alpha", "beta") # Using a logistic link function cat("model{ for( i in 1 : N ) { r[i] ~ dbin(p[i], n[i]) logit(p[i]) <- alpha + beta * (x[i] - mean(x[])) rhat[i] <- n[i] * p[i] phat[i] <- r[i]/n[i] } beta ~ dnorm(0.0, 0.001) alpha ~ dnorm(0.0,0.001) }", file = "insects1.bug") bin.reg <- bugs(data.b, inits=NULL, par.b, "insects1.bug", n.chains = 1, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(bin.reg) # Using a cloglog link function cat("model{ for( i in 1 : N ) { r[i] ~ dbin(p[i], n[i]) cloglog(p[i]) <- alpha + beta * (x[i] - mean(x[])) rhat[i] <- n[i] * p[i] phat[i] <- r[i]/n[i] } beta ~ dnorm(0.0, 0.1) alpha ~ dnorm(0.0,0.1) }", file = "insects2.bug") bin.reg2 <- bugs(data.b, inits=NULL, par.b, "insects2.bug", n.chains = 3, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) # model fitness alpha <- bin.reg$sims.array[,1 ,"alpha"] beta <- bin.reg$sims.array[,1 ,"beta"] # model fitness alpha2 <- bin.reg2$sims.array[,1 ,"alpha"] beta2 <- bin.reg2$sims.array[,1 ,"beta"] invlogit <- function(x) { 1/(1 + exp(-x)) } invcloglog <- function(x) { 1-exp(-exp(x)) } par(mfrow=c(1,2)) plot(x, r/n, cex=2, col="red", pch="o", type="b", main="logistic link") for(b in 1: 50){ points(x, invlogit((alpha[b] - beta[b]*mean(x)) + beta[b] * x), cex=0.5, type="l", col="grey", lty=3) } plot(x, r/n, cex=2, col="red", pch="o", type="b", main="complementary log-log ink") for(b in 1: 50){ points(x, invcloglog((alpha2[b] - beta2[b]*mean(x)) + beta2[b] * x), cex=0.5, type="l", col="lightblue", lty=3) } par(mfrow=c(1,1)) LD50 <- -1*(alpha - beta*mean(x))/beta mean(LD50) sd(LD50) quantile(LD50, prob=c(0.025, 0.5, 0.95)) hist(LD50, breaks=80, main="LD50") #..................................................................................... # mixture of link functions data.b <- c("x", "n", "r", "N") par.b <- c("alpha", "beta", "w") cat("model { for( i in 1 : N ) { r[i] ~ dbin(p[i],n[i]) p[i] <- w*q[i,1] + (1-w)*q[i,2] logit(q[i, 1]) <- alpha + beta * (x[i] - mean(x[])) cloglog(q[i,2]) <- alpha + beta * (x[i] - mean(x[])) rhat[i] <- n[i] * p[i] phat[i] <- r[i]/n[i] } beta ~ dnorm(0.0, 0.001) alpha ~ dnorm(0.0,0.001) w~dbeta(1,1) }", file = "insect-mix.bug") bin.reg.mix <- bugs(data.b, inits=NULL, par.b, "insect-mix.bug", n.chains = 1, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(bin.reg.mix) #..................................................................................... # O rings #distress yes=1 , no =0 y <- c(1,1,1, 1,0,0,0, 0,0,0,0, 0,1,1,0, 0,0,1,0, 0,0,0,0) # temperature at launch time x <- c(53,57,58, 63,66, 67,67, 67,68, 69,70, 70,70, 70,72, 73,75, 75,76, 76,78, 79,81) # number of previous flights N <- 23 summary(f.b1 <- glm(y ~ x, family=binomial)) summary(f.b2 <- update(f.b1, family=binomial(link=cloglog))) jitter.binary <- function(a, jitt=.15){ ifelse(a==0, runif(length(a), 0, jitt), runif(length(a), 1-jitt,1)) } y.jitter <- jitter.binary(y) plot(x, y.jitter, xlab="Temperature in F", ylab="Prob of at least one o-ring incident") curve(invlogit(coef(f.b1)[1] + coef(f.b1)[2]*x) ,lty=1, lwd=2, col="red", add=T) curve(invlogit(coef(f.b2)[1] + coef(f.b2)[2]*x) ,lty=2, lwd=2, col="blue",,add=T) #library(arm) #sim.1 <- sim(f.b) #for(j in 1:40){ #curve(invlogit(sim.1$coef[j,1] + sim.1$coef[j,2]*I(x>65)), col="gray", lad=.5, add=T) #} # change point data.b <- c("x", "y", "N") par.b <- c("beta0", "beta1", "K") cat("model { for(i in 1 : N) { y[i] ~ dbern(p[i]) logit(p[i]) <- beta0 + beta1 * z[i] } for(i in 1:N){z[i] <- step(x[i] - K) } #priors K ~dunif(53, 81) beta0 ~ dnorm(0.0, 0.01) beta1 ~ dnorm(0.0, 0.01) # prob(a): failure under low temperatures p.a <- 1/(1+exp(-1*beta0)) #prob(b) y.b ~dbin(p.b, 7) #prob(c) y.c ~ dbin(p.c, 2) #prob(d) y.d ~ dbern(p.d) #prob of catastrophic failure in one field p.f <- p.a*p.b*p.c*p.d #prob catastrophic failure in at least one out of 6 fields p.cat <- 1- pow(1-p.f, 6) # Risk at K or more degrees p.aK <- 1/(1+exp(-1*(beta0+beta1))) p.fK <- p.aK*p.b*p.c*p.d p.catK <- 1- pow(1-p.fK, 6) #priors p.b~dbeta(0.5, 0.5) p.c~dbeta(0.5, 0.5) p.d~dbeta(0.5, 0.5) }", file="risk1.bug") chall.1 <- bugs(data.b, inits=NULL, par.b, "risk1.bug", n.chains = 1, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(chall.1) par(mfrow=c(1,2)) K <- chall.1$sims.array[,1,"K"] hist(K, breaks=180, col="lightblue") # model fitness beta0 <- chall.1$sims.array[,1 ,"beta0"] beta1 <- chall.1$sims.array[,1 ,"beta1"] plot(x, y.jitter, xlab="Temperature in F", ylab="Prob of at least one o-ring incident",cex=1.5) for(i in 1:100){ curve(invlogit( beta0[i] + beta1[i]* I(x>K[i])), col="gray", lad=.5, add=T) } par(mfrow=c(1,1)) # Risk analysis y.b <- 2 y.c <- 1 y.d <- 0 data.r <- c("x", "y", "N", "y.b", "y.c", "y.d") par.r <- c("beta0", "beta1", "K", "p.cat", "p.catK") chall.2 <- bugs(data.r, inits=NULL, par.r, "risk1.bug", n.chains = 1, n.iter = 20000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(chall.2, 3) p.cat <- chall.2$sims.array[,1 ,"p.cat"] p.catK <- chall.2$sims.array[,1 ,"p.catK"] odds.31 <- p.cat/(1-p.cat)/p.catK/(1-p.catK) odds.31 <- odds.31[odds.31<100] par(mfrow=c(1,2)) hist(p.cat, col="blue", xlim=c(0, 0.8), breaks=80, xlab="Risk at 31°F") abline(v=quantile(p.cat, prob=c(0.05, 0.5, 0.95) ), lty=2, lwd=2) hist(odds.31, col="red" , xlim=c(0, 60), breaks=80, xlab="Odds ratio") abline(v=quantile(odds.31, prob=c(0.05, 0.5, 0.95) ), lty=2, lwd=2) par(mfrow=c(1,1)) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Lecture ################################################################### # Modelling the data collection process # From the paper: Verde (2010) # Example: non-informative missing process # Sequence 1 y.true <- c(0,0,1,1,1,0,0,0,1,1,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0, 0,0,0,0,0,1,0,0,1,1,0,0,1,0,1,0,1,1,0,0,0,0,1,1,1,1,1,1,0,0,1,1,0,0,0,1,0, 1,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,1,1,0,0,1) # missing flips set.seed(123) index <- sample(1:100, 40, replace=FALSE) index y.miss <- y.true y.miss[index] <- NA n <- length(y.true) mu.phi <- 0 pre.phi <- 0.1 data1 <- list ("n", "y.miss", "mu.phi", "pre.phi") par1 <- c("theta") cat("model { for(i in 1:n){ y.miss[i] ~ dbern(theta) # model for y } logit(theta) <- phi phi ~ dt(mu.phi, pre.phi, 4) # prior for theta }" ,file="missing-flips1.bug") m.miss <- bugs(data1, inits = NULL, par1, "missing-flips1.bug", n.chains = 3, n.iter = 4000, n.thin = 5, bugs.directory = bugsdir, working.directory = getwd(), clearWD = TRUE, debug = TRUE) print(m.miss, digits.summary = 3) # Example: non-ignorable missing process... # Informative missing process... > mean(y.true) [1] 0.38 > 0.38/(1+0.38) [1] 0.2753623 > # missing flips set.seed(123) index <- sample(1:100, 28, replace=FALSE) y.miss <- y.true y.miss[index] <- NA I.mis <- rep(1,100) I.mis[y.miss!="NA"] <- 0 data1 <- list ("n", "y.miss", "I.mis", "mu.phi", "pre.phi") par1 <- c("theta") cat( "model { for(i in 1:n){ y.miss[i] ~ dbern(theta) # model for y I.mis[i] ~ dbern(gamma) # model for missing indicator } gamma <- theta/(1+theta) logit(theta) <- phi phi ~ dt(mu.phi, pre.phi, 4) # prior for theta }" ,file="missing-flips2.bug") m.miss2 <- bugs(data1, inits=NULL, par1, "missing-flips2.bug", n.chains = 3, n.iter = 4000, n.thin=5, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m.miss2, digits.summary = 3) theta.mis2 <- m.miss2$sims.array[,,"theta"] # Repeat the analysis without informative missing assumptions... data1 <- list ("n", "y.miss", "mu.phi", "pre.phi") par1 <- c("theta") m.miss3 <- bugs(data1, inits=NULL, par1, "missing-flips1.bug", n.chains = 3, n.iter = 4000, n.thin=5, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m.miss3, digits.summary = 3) theta.mis2 <- m.miss2$sims.array[,,"theta"] # Lecture 9 ################################################################# library(arm) I <- 34 r <- c(2,3,1,3,2,3,2,7,3,8,7,0,15,4 ,0,6 ,0,33,4,5 ,2 ,0,8,41, 24,7, 46, 9,23,53,8,3, 1,23) n <- c(4,20,5,10,2,5,8,19,6,10,24,1,30,22,1,11,1,54,9,18,12,1,11, 77,51,16,82, 13,43,75,13,10,6,37) rain <- c(1735,1936,2000,1973,1750,1800, 1750,2077,1920,1800,2050,1830,1650,2200,2000, 1770,1920,1770,2240,1620, 1756, 1650, 2250,1796,1890,1871,2063,2100,1918,1834,1780,1900,1976,2292) rate <- r/n rate.sd <- sqrt(rate*(1-rate)/n) coefplot(rate, rate.sd, CI = 1, xlim = c(0, 1), #mar = c(1,10,5.1,2), col = "blue", main="Toxoplasmosis Rates: El Salvador 34 cities" ) par(mfrow = c(1,2), oma = c(0,0,4,0)) plot(rain, rate, col="red", xlab="Amount of Rain (mm)", ylab="Toxoplasmosis Rate") for(i in 1:I){ segments(rain[i],rate[i]-rate.sd[i],rain[i],rate[i]+rate.sd[i], col="red", lwd=2) } plot(n, rate, col="red", xlab="Sample Size", ylab="Toxoplasmosis Rate") for(i in 1:I){ segments(n[i], rate[i]-rate.sd[i], n[i],rate[i]+rate.sd[i], col="blue", lwd=2) } par(mfrow=c(1,1)) mtext(side=3, line=0, cex=1.5, outer=T, "Toxoplasmosis Rates: El Salvador 34 cities") # using R2WinBUGS ... library(R2WinBUGS) # setup WinBUGS directory N <- 34 data1 <- c("r", "n", "N") inits1 <- NULL par1 <- c("p") cat("model { for( i in 1 : N ) { a[i] ~ dnorm(mu,tau) logit(p[i]) <- a[i] r[i] ~ dbin(p[i], n[i]) } # Priors mu ~ dnorm(0.0,1.0E-6) # Choice of prior on random effects variance # Prior: uniform on SD sigma~ dunif(0,10) tau<-1/(sigma*sigma) }", file = "toxo1.bug") m1 <- bugs(data1, inits=inits1, parameters=par1, model.file="toxo1.bug", n.chains=1, n.iter=2000, DIC=TRUE, working.directory=getwd(), bugs.directory = bugsdir, clearWD=TRUE, debug=TRUE) print(m1, digits.summary = 3) attach.bugs(m1) post.mean <- apply(p,2,mean) post.sd <- apply(p, 2, sd) coefplot(rate, rate.sd, CI = 1, xlim = c(0, 1), col = "blue", main="Toxoplasmosis Rates: El Salvador 34 cities" ) coefplot(post.mean, post.sd, CI = 1, xlim = c(0, 1), col = "red", add=TRUE) # Schrinkage x <- gl(2, 34,labels=c("ML", "Bayes")) y <- c(rate, post.mean) plot(x, y, type="n", main="Schrikage effect") for(i in 1:34) segments(1,rate[i],2, post.mean[i], lty=3, col="blue", lwd=2) # New script 2 ............................................................. N <- 34 data2 <- c("r", "n", "N", "rain") inits2 <- NULL par2 <- c("beta1", "beta2") cat(" model { # Model # log(n) for(i in 1:N) {log.n[i] <- log(n[i])} for( i in 1 : N ) { a[i] ~ dnorm(alpha,tau) logit(p[i]) <- a[i] + beta1 * (rain[i]-mean(rain[])) + beta2 * (log.n[i] - mean(log.n[])) r[i] ~ dbin(p[i], n[i]) } # Priors # precision tau <- 1/(sigma*sigma) sigma ~ dunif(0.01, 5) # average logit rate across cities adjusted by rain alpha ~ dnorm(0,0.01) # rain slope beta1 ~ dnorm(0, 0.01) # sample size slope beta2 ~ dnorm(0, 0.01) }", file = "toxo2.bug") m2 <- bugs(data2, inits=inits2, parameters=par2, model.file="toxo2.bug", n.chains=3, n.iter=20000, DIC=TRUE, working.directory=getwd(), bugs.directory = bugsdir, clearWD=TRUE, debug=TRUE) print(m2, digits.summary = 3) attach.bugs(m2) par(mfrow =c(1,2)) hist(beta1, breaks =40, prob = TRUE, main="Posterior for Beta_1") lines(density(beta1), lwd= 2, col = "blue") abline(v=0, lwd=2, lty = 2) hist(beta2, breaks = 40, prob = TRUE, main="Posterior for Beta_2") lines(density(beta2), lwd= 2, col = "red") abline(v=0, lwd=2, lty = 2) par(mfrow =c(1,1)) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # .............................................................................. # The End! # Good luck!