# Dr. Pablo Emilio Verde # Julio 2015 # Barcelona Summer School # Lecture 1 # Practical 1: # Deterministic analysis .................................................. theta <- c(0.25, 0.5, 0.75) prior <- rep(1/3, 3) #prior <- prior/sum(prior) plot(theta, prior, type = "h", ylim=c(0,.6), ylab="Probability", xlab = expression(theta)) lik <- function(theta, x) { theta^x*(1-theta)^(1-x) } lik(theta, 1) curve( lik(x, 1)/sum(lik(theta, 1)), from=0, to =1, add=TRUE, col="red", lwd=2, lty=2) product <- prior * lik(theta, 1) product posterior <- product / sum(product) points(theta, posterior, col="blue", cex=2, pch=19) legend(0.3, 0.5, col=c("black", "red", "blue"), legend=c("prior", "likelihood", "posterior"), lty=c(1,2,1)) posterior # Extra material .......................................................... # Three coins simulation example .......................................... set.seed(1111234) coin.pri <- rep(1/3, 3) # prior for coins 1, 2, 3. theta <- c(0.25, 0.5, 0.75) # pr. heads under 1, 2, 3. sim <- 100000 # number of simulations y <- rep(0, 3*sim) dim(y) <- c(sim, 3) for( i in 1:sim) { ind <- sample(c(1,2,3), size = 1, prob = coin.pri, replace = TRUE) y.new <- rbinom(1, 1, prob = theta[ind]) y[i, ind] <- y.new } # average each class and normalize apply(y, 2, mean) / sum( apply(y, 2, mean)) # Exercise 2:.............................................................. # Beta flexibility par(mfrow=c(2,2)) curve(dbeta(x, 1, 1), from =0, to=1, lty = 1, lwd=2, main="Beta(1, 1)", xlab=expression(theta)) curve(dbeta(x, 1/2, 1/2), from = 0, to = 1, lty = 2, lwd=2, col="red", main="Beta(1/2, 1/2)", xlab=expression(theta)) curve(dbeta(x, 2, 5), from = 0, to = 1, lty = 2, lwd=2, col="blue", main="Beta(2, 5)", xlab=expression(theta)) curve(dbeta(x, 5, 2), from = 0, to = 1, lty = 2, lwd=2, col="green", main="Beta(5, 2)", xlab=expression(theta)) par(mfrow=c(1,1)) # Lecture 2: R script ................................................ #Consider an early investigation of a new drug #Experience with similar compounds has suggested that response rates between 0.2 and 0.6 could be feasible #Interpret this as a distribution with mean=0.4 and standard deviation 0.1 #A Beta(9.2, 13.8) distribution has these properties #Suppose we treat $n=20$ volunteers with the compound and observe $r=15$ positive responses. #Then we update the prior and the posterior is Beta(15 + 9.2, 5 + 13.8) ################################################################################## par(mfrow=c(3,1)) # graphical output: 3 by 1 panels ......................................... # draw a curve for a beta density curve(dbeta(x,9.2, 13.8),from=0, to =1, xlab= "prob of success",main="Prior") # draw a curve for a binomial density curve(dbinom(15, 20,x),from=0, to =1, col="blue", xlab="prob of sucess", main="Likelihood") # draw the posterior curve(dbeta(x, 24.2, 18.8),from=0, to =1, col="red", xlab="prob of sucess", main="Posterior") par(mfrow=c(1,1)) # ...................................................... theta.star <- rbeta(20000, 24.2, 18.8) odds.star <- theta.star/(1-theta.star) quantile(odds.star, prob = c(0.05, 0.5, 0.75, 0.95)) hist(odds.star, breaks=100, xlab="odds ratio", freq=FALSE, xlim=c(0, 4)) lines(density(odds.star), lwd =2, lty = 1, col ="red") # Practical 2: conjugate analysis .................................... # 1) Posteriors mean and median # The posterior mean is: (r+a) / (n + a+b) (15 + 9.2) /( 20 + 9.2 + 13.8) # the posterior median is qbeta(0.5, 24.2, 18.8 ) # 2) Posterior percentiles: qbeta(0.025, 24.2, 18.8 ) # 2.5% qbeta(0.975, 24.2, 18.8 ) # 97.5% # Lecture 3: R script ......................................................... # Priors ## predictive prior distribution for sample size and power set.seed(123) theta <- rnorm(10000, 0.5, 0.1) sigma <- rnorm(10000, 1, 0.3) sigma <- ifelse(sigma <0, -1*sigma, sigma) n <- 2*sigma^2 /(theta^2)*(0.84 + 1.96)^2 pow <- pnorm( sqrt( 63/2) * theta /sigma - 1.96) par(mfrow=c(1,2)) hist(n, xlim = c(0, 400), breaks=50) hist(pow) par(mfrow=c(1,1)) round(quantile(n),2) round(quantile(pow),2) sum(pow<0.7)/10000 # mixture of betas mixbeta <- function(x,r,a1,b1,a2,b2,q,n) { qstar <- q*betabin(r,a1,b1,n)/ (q*betabin(r,a1,b1,n)+(1-q)*betabin(r,a2,b2,n)) p1 <- dbeta(x,a1+r,n-r+b1) p2 <- dbeta(x,a2+r,n-r+b2) posterior <- qstar*p1 + (1-qstar)*p2 } par(mfrow=c(2,1)) # informative beta prior curve(dbeta(x,9.2,13.8),from=0, to=1,col="red", xlab="probability of response",main="priors") # mixture beta prior with 80% informative and 20% flat curve(0.8*dbeta(x, 9.2, 13.8)+0.2*dbeta(x, 1, 1),from=0, to=1,col="blue",add=T) # beta posterior curve(dbeta(x,24.2,18.8),from=0,to=1,col="red", xlab="probability of response", main="posteriors") # posterior from a mixture prior curve(mixbeta(x, r=15, a1=9.2, b1=13.8, a2=1, b2=1, q=.8, 20), from=0,to=1, col="blue",add=T) par(mfrow=c(1,1)) # Lecture 4: WinBUGS .......................................................... # load R2WinBUGS package library(R2WinBUGS) # Write the model in BUGS code ............................................... cat(" model { # WinBUGS code: binary problem non-conjugate analysis y ~ dbin(theta, n) logit(theta) <- phi phi ~ dnorm(0, 0.001) y.pred ~ dbin(theta, n) # making prediction } " , file = "model1.bug") # define you data nodes n <- 20 y <- 15 data1 <- list ("n", "y") # define parameters of interest par1 <- c("theta", "y.pred") # initial values my.inits <- function(){ list(phi = rnorm(1, 0, 10), y.pred = rbinom(1, 20, 15/20)) } # setup OpenBUGS directory bugsdir <- "C:/OpenBUGS/OpenBUGS323" # run the bugs() function: m1 <- bugs(data1, inits = NULL, parameters.to.save = par1, program = "OpenBUGS", model.file = "model1.bug", n.chains = 2, n.iter = 2000, n.thin = 1, useWINE = TRUE, bugs.directory= bugsdir, debug = TRUE, clearWD = TRUE) # setup WinBUGS directory bugsdir <- "C:/WinBUGS14" m1 <- bugs(data1, inits=NULL, par1, "model1.bug", n.chains = 5, n.iter = 2000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m1, digits.summary = 3) class(m1) names(m1) m1$pD m1$n.chains m1$sims.array[1:10, ,"theta"] m1$sims.array[1:10, ,"y.pred"] theta <- m1$sims.array[1:1000, ,"theta"] hist(theta, breaks = 50, prob = TRUE) lines(density(theta), col = "blue", lwd =2) # Using the attach.bugs() function ... # posterior predictive ... post.pred <- table(y.pred) plot(post.pred) # Lecture 5: Multiple paramenters ............................................. # Example: Normal distribution with unknown mean and variance # Marathon times with R non-informative analysis .............................. library(LearnBayes) data(marathontimes) marathontimes$times attach(marathontimes) mycontour(normchi2post, c(220, 330, 500, 9000), time,xlab="mean",ylab="variance") SS <- sum((time - mean(time))^2) n <- length(time) sigma2 <- SS/rchisq(1000, n - 1) mu <- rnorm(1000, mean = mean(time), sd = sqrt(sigma2)/sqrt(n)) points(mu, sigma2, col="blue") quantile(mu, c(0.025, 0.975)) quantile(sqrt(sigma2), c(0.025, 0.975)) #posterior of the coefficient of variation cv.star <- sqrt(sigma2)/mu hist(cv.star*100, breaks = 50, xlab ="coefficient of variation in %", freq = FALSE) lines(density(cv.star*100), col="blue", lwd=2, lty=2) abline(v=quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)), lty=3, lwd =2, col="red") quantile(cv.star*100, prob = c(0.025, 0.5, 0.975)) # Model checking ........................................................................ y.star <- rnorm(1000, mean = mu, sd =sqrt(sigma2)) par(mfrow=c(3,4)) hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green") for(i in 1:11){ y.sim <- sample(y.star, 20) hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue") } # Analysis of the minimum, maximum, variability and asymmetry min.y <- max.y <- asy1 <- asy2 <- inter.q <- rep(0,1000) time <- sort(time) for (b in 1:1000){ y.sim <- sample(y.star, 20) mu.star <- sample(mu, 20) min.y[b] <- min(y.sim) max.y[b] <- max(y.sim) y.sim <- sort(y.sim) asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star)) # predicted asymmetry measure asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star)) inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25) } par(mfrow =c(2,2)) hist(min.y, breaks = 50, col="blue", main = "Minimum y*") abline(v=min(time), lwd=3, lty=2) hist(max.y, breaks = 50, col="red", main = "Maximum y*") abline(v=max(time), lwd=3, lty=2) hist(inter.q, breaks = 50, col="magenta", main = "Variability y*") abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2) plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data", ylab ="Asymmetry original data") abline(a=0, b=1, lwd=2) par(mfrow=c(1,1)) # Marathon times with R2WinBUGS ............................................. # The IAAF world record for men is 2:02:57 # setup WinBUGS directory bugsdir <- "C:/WinBUGS14" library(R2WinBUGS) library(LearnBayes) y <- marathontimes$time n <- length(y) dat.marathon <- list("y", "n") # Data par.marathon <- list("mu", "sigma", "y.pred") # Nodes # model in BUGS cat(" model { # Non informative Priors mu ~ dnorm(0, 0.0001) tau ~ dgamma(0.0001, 0.0001) sigma <- pow(tau,-0.5) # Data model for( i in 1: n) { #y[i] ~ dnorm(mu, tau) # Normal Likelihood y[i] ~ dnorm(mu, tau) I(115, ) # Truncated Normal Likelihood #y[i] ~ dt(mu, tau, nu) # t Likelihood with nu df. } y.pred ~ dnorm(mu, tau)I(115, ) }", file = "marathon.bug") inits.marathon <- list( list(mu = 250, tau = 10, y.pred = 200), list(mu = 150, tau = 20, y.pred = 300) ) m.marathon <- bugs(dat.marathon, inits = inits.marathon, par.marathon, model ="marathon.bug", n.chains = 2, n.iter = 10000, n.thin = 2, bugs.directory = bugsdir, debug = TRUE, DIC = FALSE, clearWD = TRUE) print(m.marathon) attach.bugs(m.marathon) index <- sample(1:5000, size = 1000) # Model checking ........................................................................ #y.star <- rnorm(1000, mean = mu[index], sd = sigma[index]) y.star <- y.pred[index] par(mfrow=c(3,4)) hist(time, breaks=10, xlim =c(150, 400), main="Original Data", col="green") for(i in 1:11){ y.sim <- sample(y.star, 20) hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue") } # Analysis of the minimum, maximum, variability and asymmetry min.y <- max.y <- asy1 <- asy2 <- inter.q <- rep(0,1000) time <- sort(time) for (b in 1:1000){ y.sim <- sample(y.star, 20) mu.star <- sample(mu, 20) min.y[b] <- min(y.sim) max.y[b] <- max(y.sim) y.sim <- sort(y.sim) asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star)) # predicted asymmetry measure asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star)) inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25) } par(mfrow =c(2,2)) hist(min.y, breaks = 50, col="blue", main = "Minimum y*") abline(v=min(time), lwd=3, lty=2) hist(max.y, breaks = 50, col="red", main = "Maximum y*") abline(v=max(time), lwd=3, lty=2) hist(inter.q, breaks = 50, col="magenta", main = "Variability y*") abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2) plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data", ylab ="Asymmetry original data", cex = 0.1) abline(a=0, b=1, lwd=2) par(mfrow=c(1,1)) # Dirichlet priors for Multinomial models ................................................................... # Example of 2x2 table and odds ratio: Great Trial ... library(LearnBayes) draws <- rdirichlet(10000, c(13,23,150,125) ) # here a[i,j]=1 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3]) hist(odds, breaks = 100, xlab="Odds Ratio", freq = FALSE) lines(density(odds), lty =2, lwd=2, col ="blue") abline(v=quantile(odds, prob=c(0.025, 0.5, 0.975)), lty=3, col="red") tab.p <- rdirichlet(1000, c(1,1,1,1)) odds.p <- tab.p[,1]*tab.p[,4]/(tab.p[,2]*tab.p[,3]) lines(density(odds.p), lty =1, lwd =2) legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2), col=c("black", "blue")) quantile(odds, prob=c(0.025, 0.5, 0.975)) sum(odds > 1)/10000 # Fisher exact test........................................................................................ set.seed(123) param <- c(2,1,1,2) # parameter of the dirichlet draws <- rdirichlet(10000, c(13,23,150,125)+param ) # here a[i,j]=1 odds <-draws[,1]*draws[,4]/(draws[,2]*draws[,3]) tab.p2 <- rdirichlet(1000, param) odds.p2 <- tab.p2[,1]*tab.p2[,4]/(tab.p2[,2]*tab.p2[,3]) lines(density(odds.p2), lty =1, lwd =2) legend(1, 1.5, legend=c("prior odds", "posterior odds"), lty=c(1,2), col=c("black", "blue")) quantile(odds, prob=c(0.025, 0.5, 0.975)) sum(odds>1)/10000 fisher.test(matrix(c(13, 23, 150, 125), nrow=2, byrow=TRUE), alternative="less")$p.value a <- c(1,1) x <- c(0.5, 0.5) logdirich <- function(a, data){ d <- ddirichlet(data, a) log(d) } a <- c(2, 2) mydirich <- function(theta, data){ a1 <- data[1] a2 <- data[2] f <- theta[1]^(1-a1) + theta[2]^(1-a2) log(f) } theta <- expand.grid(x=seq(0.01,.99,0.1), y=seq(0.01,.99,0.1)) m <- length(seq(0.01, .99, 0.1)) n <- dim(theta)[1] z <- rep(0, n) for(i in 1:n) z[i] <- mydirich(theta[i,], c(2,2)) z <- matrix(z, byrow=TRUE, nrow=m) contour(seq(0.01,.99,0.1), seq(0.01,.99,0.1), z, main="Dirichlet with a=(0,-0.3,-1.4,0)", col="blue", lwd=2, xlab=expression(theta[11]), ylab=expression(theta[22])) ## bivariate dirichlet f <- function(x, y ) { #if (is.vector(x)) # x <- matrix(x, nrow = 1) r <- x^{1}* y^{1} as.vector(r) } x <- seq(0.1, 0.2, length= 50) y <- seq(0.1, 0.2, length= 50) z <- outer(x, y, f) persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75,xlab="theta11", ylab="theta22", zlab="f(theta11, theta22)") title("Dirichlet with a11=2, a12=1, a21=1, a22=2") image(x,y,z,col= terrain.colors(100), xlab=expression(theta[11]), ylab=expression(theta[22])) contour(x,y,z,add=T, col=rainbow(10), lwd=2, ) title("Dirichlet with a11=2, a12=1, a21=1, a22=2") # Multivariate models with WinBUGS ################################ # Example: Multivariate Normal toy example # Tanner's date Y <- matrix( c(1,1, 1,-1, -1,1, -1, -1, 2, NA, 2, NA, -2, NA, -2, NA, NA, 2, NA, 2, NA, -2, NA, -2), ncol=2, byrow=T) # save BUGS model into "cammel.bug" file cat("model { for (i in 1 : 12){ Y[i, 1 : 2] ~ dmnorm(mu[], tau[ , ]) } mu[1] <- 0; mu[2] <- 0 tau[1 : 2,1 : 2] ~ dwish(R[ , ], 2) R[1, 1] <- 0.001; R[1, 2] <- 0; R[2, 1] <- 0; R[2, 2] <- 0.001 Sigma2[1 : 2,1 : 2] <- inverse(tau[ , ]) rho <- Sigma2[1, 2] / sqrt(Sigma2[1, 1] * Sigma2[2, 2]) }", file = "cammel.bug") dat <- list("Y") par1 <- c("rho") m.cammel <- bugs(dat, inits=NULL, par1, model="cammel.bug", n.chains = 2, n.iter = 10000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE, DIC=FALSE) rho <- m.cammel$sims.array[, ,"rho"] hist(rho, breaks = 80, xlab = expression(rho)) ############################################################################### # Example: Complex Multinomial with non-conjugate analysis # save BUGS model into "popgen.bug" file cat("model model{ XAA[1] <- (1-sigma)*p + sigma XAA[2]<- (1- sigma)*q XAA[3]<- 0 XAB[1]<-(1-sigma)*p/2 + sigma/4 XAB[2]<- 0.5 XAB[3]<- (1-sigma)*q/2 + sigma/4 XBB[1]<- 0 XBB[2]<- (1-sigma)*p XBB[3]<- (1- sigma)*q + sigma KAA <- sum(NAA[]); KAB <- sum(NAB[]); KBB <- sum(NBB[]) NAA[1:3] ~ dmulti(XAA[], KAA) NAB[1:3] ~ dmulti(XAB[], KAB) NBB[1:3] ~ dmulti(XBB[], KBB) p ~ dunif(0, 1) # uniform prior sigma ~ dunif(0, 1) q <- 1 -p }", file = "popgen.bug") dat <- list(NAA = c(427, 95, 0), NAB=c(108, 161, 71), NBB=c(0, 64, 74)) par1 <- c("p", "sigma") m.popgen <- bugs(dat, inits=NULL, par1, model="popgen.bug", n.chains = 2, n.iter = 10000, n.thin=1, bugs.directory = bugsdir, working.directory = getwd(), clearWD=TRUE, debug=TRUE) print(m.popgen, digits=3) sigma <- m.popgen$sims.array[1:1000,1 ,"sigma"] p <- m.popgen$sims.array[1:1000,1 , "p"] library(car) scatterplotMatrix(~p+sigma, diagonal = "histogram", smoother=F, reg.line=F) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # Practical:................................................................... # Exercise 1: # Normal model for the observations and make predictive model checking ... library(R2WinBUGS) library(LearnBayes) # Change the data for unusual runners ... marathontimes$time[1:5] <- rnorm(5, mean = 130, sd = 3) y <- marathontimes$time n <- length(y) dat.marathon <- list("y", "n") # Data par.marathon <- list("mu", "sigma", "y.pred") # Nodes # model in BUGS cat(" model { # Non informative Priors mu ~ dnorm(0, 0.0001) tau ~ dgamma(0.0001, 0.0001) sigma <- pow(tau,-0.5) # Data model for( i in 1: n) { y[i] ~ dnorm(mu, tau) # Normal Likelihood #y[i] ~ dnorm(mu, tau) I(115, ) # Truncated Normal Likelihood #y[i] ~ dt(mu, tau, nu) # t Likelihood with nu df. } y.pred ~ dnorm(mu, tau)I(115, ) }", file = "marathon.bug") inits.marathon <- list( list(mu = 250, tau = 10, y.pred = 200), list(mu = 150, tau = 20, y.pred = 300) ) m.marathon <- bugs(dat.marathon, inits = inits.marathon, par.marathon, model ="marathon.bug", n.chains = 2, n.iter = 10000, n.thin = 2, bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14", working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS", debug = TRUE, DIC = FALSE, clearWD = TRUE) print(m.marathon) attach.bugs(m.marathon) index <- sample(1:5000, size = 1000) # Model checking ........................................................................ #y.star <- rnorm(1000, mean = mu[index], sd = sigma[index]) y.star <- y.pred[index] par(mfrow=c(3,4)) hist(y, breaks=10, xlim =c(150, 400), main="Original Data", col="green") for(i in 1:11){ y.sim <- sample(y.star, 20) hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue") } # Analysis of the minimum, maximum, variability and asymmetry time <- y min.y <- max.y <- asy1 <- asy2 <- inter.q <- rep(0,1000) time <- sort(time) for (b in 1:1000){ y.sim <- sample(y.star, 20) mu.star <- sample(mu, 20) min.y[b] <- min(y.sim) max.y[b] <- max(y.sim) y.sim <- sort(y.sim) asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star)) # predicted asymmetry measure asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star)) inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25) } par(mfrow =c(2,2)) hist(min.y, breaks = 50, col="blue", main = "Minimum y*") abline(v=min(time), lwd=3, lty=2) hist(max.y, breaks = 50, col="red", main = "Maximum y*") abline(v=max(time), lwd=3, lty=2) hist(inter.q, breaks = 50, col="magenta", main = "Variability y*") abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2) plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data", ylab ="Asymmetry original data", cex = 0.1) abline(a=0, b=1, lwd=2) par(mfrow=c(1,1)) # Use the t-distribution with nu <- 4 df for outliers and make predictive model cheking ... # model in BUGS cat(" model { # Non informative Priors mu ~ dnorm(0, 0.0001) tau ~ dgamma(0.0001, 0.0001) sigma <- pow(tau,-0.5) nu <- 4 # Data model for( i in 1: n) { #y[i] ~ dnorm(mu, tau) # Normal Likelihood #y[i] ~ dnorm(mu, tau) I(115, ) # Truncated Normal Likelihood y[i] ~ dt(mu, tau, nu) # t Likelihood with nu df. } y.pred ~ dnorm(mu, tau)I(115, ) }", file = "marathon.bug") inits.marathon <- list( list(mu = 250, tau = 10, y.pred = 200), list(mu = 150, tau = 20, y.pred = 300) ) m.marathon <- bugs(dat.marathon, inits = inits.marathon, par.marathon, model ="marathon.bug", n.chains = 2, n.iter = 10000, n.thin = 2, bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14", working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS", debug = TRUE, DIC = FALSE, clearWD = TRUE) print(m.marathon) attach.bugs(m.marathon) index <- sample(1:5000, size = 1000) # Model checking ........................................................................ #y.star <- rnorm(1000, mean = mu[index], sd = sigma[index]) y.star <- y.pred[index] par(mfrow=c(3,4)) hist(y, breaks=10, xlim =c(150, 400), main="Original Data", col="green") for(i in 1:11){ y.sim <- sample(y.star, 20) hist(y.sim, breaks=10, xlim =c(150, 400), main="Simulated Data", col="blue") } # Analysis of the minimum, maximum, variability and asymmetry time <- y min.y <- max.y <- asy1 <- asy2 <- inter.q <- rep(0,1000) time <- sort(time) for (b in 1:1000){ y.sim <- sample(y.star, 20) mu.star <- sample(mu, 20) min.y[b] <- min(y.sim) max.y[b] <- max(y.sim) y.sim <- sort(y.sim) asy1[b] <- abs(y.sim[18] - mean(mu.star)) - abs(y.sim[2] - mean(mu.star)) # predicted asymmetry measure asy2[b] <- abs(time[18] - mean(mu.star)) - abs(time[2] - mean(mu.star)) inter.q[b] <- quantile(y.sim, prob=0.75) - quantile(y.sim, prob=0.25) } par(mfrow =c(2,2)) hist(min.y, breaks = 50, col="blue", main = "Minimum y*") abline(v=min(time), lwd=3, lty=2) hist(max.y, breaks = 50, col="red", main = "Maximum y*") abline(v=max(time), lwd=3, lty=2) hist(inter.q, breaks = 50, col="magenta", main = "Variability y*") abline(v=quantile(time, prob=0.75) - quantile(time, prob=0.25), lwd=3, lty=2) plot(asy1, asy2, main ="Asymmetry", xlab="Asymmetry predicted data", ylab ="Asymmetry original data", cex = 0.1) abline(a=0, b=1, lwd=2) par(mfrow=c(1,1)) # Exercise 2: diff <- draws[,1] - draws[,2] # Lecture 6 ................................................................... # Regression models # BUGS code in the Lecture # Lecture 7 ................................................................... # Hierarchical models # BUGS code in the Lecture # Lecture 8 ................................................................... # Longitudinal data analysis library(lattice) # HIV data hiv <- read.csv2("hiv.csv") hiv <- na.omit(hiv) # Set up new patient id numbers from 1 to J id <- hiv$person # Child ID unique.pid <- unique (id) J <- length (unique.pid) #J number of children person <- rep (NA, n) for (j in 1:J){ person[id==unique.pid[j]] <- j } # Data for WinBUGS y <- hiv$y time <- hiv$time tr <- as.numeric(hiv$tr) n <- length(y) tr.group <- tr[person=unique(id)] #Treatment per child # Plot the data mykey <- list(space ="top", title = "HIV positive children", text = list(c("Control dieat", "Zinc supplement")), lines = list(col = 1:2, lwd = 3, lty = 1:2)) plot1 <- xyplot(y ~ time|tr, group=person, subscripts = T, panel = function(x, y, subscripts, groups, ...){ panel.superpose(x, y, subscripts, groups, type = c("l"), lwd=1) panel.grid() }, data = hiv, main = "HIV positive children (1 to 5 years old)", xlab ="time(years)", ylab="sqrt(cd4%)") plot2 <- xyplot(y ~ time, group=tr, type = c("g", "smooth"), key = mykey, col = 1:2, lwd = 3, lty = 1:2, ylim = c(4.2, 5.5), data = hiv) plot(plot1, split = c(1,1,1,2)) plot(plot2, split = c(1,2,1,2), newpage = F) # Pool model ................................................................. # HIV example with dat # Set up the BUGS model dat.hiv <- list("y", "time", "tr", "n") par.hiv <- list("alpha", "beta","sigma.y") # Pooled model cat( "model { # Priors ........................................... alpha ~ dnorm(0, 0.01) beta[1] ~ dnorm(0, 0.01) # Slope control group beta[2] ~ dnorm(0, 0.01) # Slope zinc supplent group prec.y <- pow(sigma.y, -2) sigma.y ~ dunif(0, 10) # Data model ....................................... for(i in 1:n) { y[i] ~ dnorm(mu[i], prec.y) mu[i] <- alpha + beta[tr[i]] * time[i] } } ", file = "hiv_pool.bug" ) m.hiv.pool <- bugs(dat.hiv, inits = NULL, parameters.to.save = par.hiv, model ="hiv_pool.bug", n.chains = 2, n.iter = 20000, n.thin = 2, bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14", working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS", debug = TRUE, DIC = TRUE, clearWD = TRUE) m.hiv.pool attach.bugs(m.hiv.pool) delta.pool <- beta[,1] - beta[,2] hist(delta.pool, breaks = 50, freq = F, col = "lightblue") # p.Value sum(delta.pool>0)/length(delta.pool) # HIV Exchangeable (random intercept) ..................................................... # Set up the BUGS model dat.hiv.re <- list("y", "time", "tr","tr.group", "n", "person", "J") par.hiv.re <- list("mu.alpha", "sigma.alpha", "mu.beta","sigma.beta", "gamma", "sigma.y", "alpha", "beta") # Pooled model cat( "model { # Priors ........................................... for(j in 1:J){ # alpha[j] ~ dnorm(mu.alpha, prec.alpha) # beta[j] ~ dnorm(mu.beta[tr.group[j]], prec.beta) alpha[j] ~ dt(mu.alpha, prec.alpha, 4) beta[j] ~ dt(mu.beta[tr.group[j]], prec.beta, 4) } mu.alpha ~ dnorm(0, 0.01) prec.alpha <- pow(sigma.alpha, -2) sigma.alpha ~ dunif(0, 10) mu.beta[1] ~ dnorm(0, 0.01) # Slope control group mu.beta[2] ~ dnorm(0, 0.01) # Slope zinc supplent group prec.beta <- pow(sigma.beta, -2) sigma.beta ~ dunif(0, 10) prec.y <- pow(sigma.y, -2) sigma.y ~ dunif(0, 10) gamma[1] <- 0 # Control group gamma[2] ~ dnorm(0, 0.01) # Data model ....................................... for(i in 1:n) { # Observations at child level #y[i] ~ dnorm(mu[i], prec.y) y[i] ~ dt(mu[i], prec.y, 4) # mu[i] <- alpha[person[i]] + beta[beta[tr[i]] * time[i] #Random intercept # mu[i] <- alpha[person[i]] + beta[person[i]] * time[i] #Random intercept and slope mu[i] <- alpha[person[i]] + beta[person[i]] * time[i] + gamma[tr[i]]*pow(time[i], 2) } } ", file = "hiv_re.bug" ) m.hiv.re <- bugs(dat.hiv.re, inits = NULL, parameters.to.save = par.hiv.re, model ="hiv_re.bug", n.chains = 2, n.iter = 5000, n.thin = 2, bugs.directory="/home/pablik/.wine/drive_c/WinBUGS14", working.directory="/home/pablik/Documents/2015/BHM/R_WinBUGS", debug = TRUE, DIC = TRUE, clearWD = TRUE) print(m.hiv.re) attach.bugs(m.hiv.re) delta.re.nor <- mu.beta[,1] - mu.beta[,2] hist(delta.pool, breaks = 50, freq = F, col = "lightblue", xlim = c(-1.2, 2)) lines(density(delta.re.nor), col = "red", lwd = 3) #I run the model again with t-distribution attach.bugs(m.hiv.re) delta.re.t <- mu.beta[,1] - mu.beta[,2] lines(density(delta.re.t), col = "blue", lwd = 3) legend(0.4, 2, legend = c("Identical par", "Exchang. Normal", "Exchang. t"), col = c("lightblue", "red", "blue"), lwd = c(2,2,2), lty = c(1,1,1)) abline(v=0, lty = 2) #posterior for quadratic term hist(gamma, breaks = 50, freq = F, col = "green", xlim = c(-.2, .5), main = "Posterior for The Quadractic Term: \n Zinc Commplement Group") abline(v= 0, lty =2) # p.Value sum(delta.re.nor>0)/length(delta.re.nor) #Plot random slopes and intercepts coefplot(m.hiv.re, var.idx =8:168, col = "red", xlim = c(-2.5, 8), main = "Posteriors: Intercepts", cex.var = 0.4) coefplot(m.hiv.re, var.idx =169:(169+168), col = "blue", xlim = c(-3, 3), main = "Posteriors: Slopes", cex.var = 0.4) # Variability explained tot.var <- sigma.y^2 + sigma.alpha^2 + sigma.beta^2 mean(sigma.y^2/tot.var) * 100 mean(sigma.alpha^2/tot.var) * 100 mean(sigma.beta^2/tot.var)* 100 # Model validation: predictive checks ... attach.bugs(m.hiv.re) # Number of observetions per person with(hiv, table(table(person))) ## Simulating the predictive data y.predictive <- function(J = J, K = 7, # hyperparameters mu.a <- mean(mu.alpha) mu.b.1 <- mean(mu.beta[1]) mu.b.2 <- mean(mu.beta[2]) sigma.y <- mean(sigma.y) sigma.a <- mean(sigma.alpha) sigma.b <- mean(sigma.beta) ){ time <- rep (seq(0,1,length=K), J) # K measurements during the year person <- rep (1:J, each=K) # person ID's treatment <- sample (rep(0:1, J/2)) treatment1 <- treatment[person] # # personal-level parameters a.true <- rnorm (J, mu.a, sigma.a) b.true <- rnorm (J, g.0.true + g.1.true*treatment, sigma.b.true) #.... # # data y <- rnorm (J*K, a.true[person] + b.true[person]*time, sigma.y.true) return (data.frame (y, time, person, treatment1)) } # Further Lectures ............................................................ # Lecture : Introduction to MCMC ############################################ # Example: Gibbs sampler # Normal distribution, unknown mean and variance (variance = 1/ tau) gibbsNormal <- function(y, mu1, tau1, N, mu0, phi0, a, b) { mu <- numeric(N+1) # N = number of iterations mu[1] <- mu1 # the initial value for mu tau <- numeric(N+1) tau[1] <- tau1 # the initial value for tau n <- length(y) ; ybar <- mean(y) for(i in 2:(N+1)) { # generate samples from full conditional mu[i] <- rnorm(1, (mu0*phi0 + n*ybar*tau[i-1])/(phi0 + n*tau[i-1]), 1/sqrt(phi0 + n*tau[i-1])) tau[i] <- rgamma(1, (n+2*a)/2, (sum((y-mu[i])^2) + 2*b)/2) } output <- cbind(mu, tau) } y <- c(160.2, 155.1, 135.0, 138.8, 147.0, 115.0, 113.4, 97.5, 110.3, 115.8) res<-gibbsNormal(y,150,1,500,150,10,0.1,0.1) par(mfrow=c(2,2)) plot(res[50:500,1], col = "red", type = "l", ylab = "mu", xlab = "Iterations") hist(res[50:500,1], main ="Posterior for mu", col = "red", xlab = "mu") plot(1/res[50:500,2], col = "blue", type = "l", ylab = "tau", xlab = "Iterations") hist(1/res[50:500,2], col = "blue", type = "l", xlab = "tau", main = "Posterior for tau") par(mfrow = c(1,1)) ############################################################################## # Example: Mixture of Normals # Metropolis-Hasting sampler for mixture of normals using # the package MCMCpack library(MCMCpack) dmix<-function(x,m1,s1,m2,s2,q1) q1*dnorm(x,mean=m1,sd=s1)+ (1-q1)*dnorm(x,mean=m2,sd=s2) post.mix <-MCMCmetrop1R(dmix, m1=0, m2=5, s1=1, s2=1, q1=0.3, theta.init=1,thin=1,mcmc=10000,burnin=1000,logfun=FALSE) plot(post.mix) ############################################################################### # Example: Metropolis toy example # Gelman and Meng (1991) kernel function # plot the function ## Gelman and Meng (1991) kernel function f <- function(x, y , A = 1, B = 0, C1 = 3, C2 = 3, log = FALSE) { if (is.vector(x)) x <- matrix(x, nrow = 1) r <- -.5 * (A * x^2 * y^2 + x^2 + y^2 - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y) if (!log) r <- exp(r) as.vector(r) } x <- seq(-1, 6, length= 50) y <- seq(-1, 6, length= 50) z <- outer(x, y, f) # Gelman-Meng 1 persp(x, y, z, theta = 100, phi = 30, expand = 0.5, col = "lightblue", shade=0.75) image(x,y,log(z),col=gray(seq(0,1,len=256))) contour(x,y, log(z),add=T,levels=seq(5,-35,-2),col=rainbow(10)) ## Metropolis sampling with MCMCpack library(MCMCpack) ## Gelman and Meng (1991) kernel function: # Write the function with first argument as vector f.g.m <- function(xy , A = 1, B = 0, C1 = 3, C2 = 3) { x <- xy[1]; y <- xy[2] r <- -.5 * (A * x^2 * y^2 + x^2 + y^2 - 2 * B * x * y - 2 * C1 * x - 2 * C2 * y) as.vector(r) } res <- MCMCmetrop1R(f.g.m, theta.init=c(0, 1), mcmc=10000, burnin=5000, logfun=FALSE) points(res[,1], res[,2]) par(mfrow=c(2,2)) title("Trace plot for 1000 Metropolis itereations") plot(res[,1], type="l", col="blue", xlab="iteration", ylab="x") plot(res[,2], type="l", col="red", xlab="iteration", ylab="y") par(mfrow=c(1,1)) # Remove objects generateds in this lecture ... tm <- ls() rm(list = tm[-which(tm %in% c("bugsdir", "working.directory"))]) rm(tm) # End ........................................................................................... # Good luck!