# STATISTICAL INFERENCE library(faraway) # MLE tossing a coin ##### # (inspired by Adams (2021) section 5.4.2) set.seed(9403) coinFlips <- rbinom(500,size=1,prob=0.2) #if the coin was fair, how likely these 97 heads would be? dbinom(97,size=500,prob=0.5) #the true value of p is 0.2 choose(500, 97) * 0.2^97 * 0.8^403 dbinom(97,size=500,prob=0.2) # but we don't know it, we only know our sample #define log-likelihood logLikelihood <- function(p){ return(-dbinom(97,size=500,prob=p, log=TRUE)) } optimize(f = logLikelihood, interval = c(0,1)) #we get 0.1939823 pVector <- seq(0,1,by=0.01) plot(pVector, -logLikelihood(pVector), type="l", xlab="p",ylab="log likelihood") abline(v=0.1939823) abline(v=0.2, col="red") # MLE motivation - Challenger Crash investigation ##### library(faraway) library(ggplot2) library(MASS) #load the data data(orings, package="faraway") #convert into units we understand orings$tempC <- (orings$temp - 32) * (5 / 9) #plot the data ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5) + labs( x = "Temperature (°C)", y = "# Failed orings", title = "Challenger crash investigation", subtitle = "" ) #try Linear Probability Model ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5) + geom_smooth( method = "lm", formula = y ~ x, se = T )+ labs( x = "Temperature (°C)", y = " # Failed oring", title = "Challenger crash investigation", subtitle = " " ) #we may try quadratic fit ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5) + geom_smooth( method = "lm", formula = y ~ poly(x,2) )+ labs( x = "Temperature (°C)", y = " # Failed oring", title = "Challenger crash investigation", subtitle = " " ) # If the model is too complex it may lead to non-sensical predictions # (overfitted model) ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5) + geom_smooth( method = "lm", formula = y ~ poly(x,7) )+ labs( x = "Temperature (°C)", y = " # Failed oring", title = "Challenger crash investigation", subtitle = "" ) # GLM ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5,aes(x = tempC, y = damage/6)) + geom_smooth( method = "glm", formula = cbind(y,6-y) ~ x, se = T, method.args = list(family = "binomial"),fullrange = F )+ labs( x = "Temperature (°C)", y = "Prob. of failure", title = "Challenger crash investigation", subtitle = " " ) #now visualize the temperature during the Challenger launch ggplot( data = orings, aes(x = tempC, y = damage) ) + geom_point(size = 5,aes(x = tempC, y = damage/6)) + geom_smooth( method = "glm", formula = cbind(y,6-y) ~ x, se = T, method.args = list(family = "binomial"),fullrange = TRUE )+ scale_x_continuous(limit = c(-2, NA))+ labs( x = "Temperature (°C)", y = "Prob. of failure", title = "Challenger crash investigation", subtitle = " " )+ geom_vline(aes(xintercept = -0.5), linetype = "dashed") plot(damage/6 ~ tempC, orings, xlim=c(-5,30), ylim = c(0,1), xlab="Temperature (°C)", ylab="Prob. of failure", main = "Challenger crash investigation") lmod <- lm(damage/6 ~ tempC, orings) abline(lmod) logitmod <- glm(cbind(damage,6-damage) ~ tempC, family=binomial, orings) sumlogit <- summary(logitmod) plot(damage/6 ~ tempC, orings, xlim=c(-5,30), ylim = c(0,1), xlab="Temperature (°C)", ylab="Prob. of failure", main = "Challenger crash investigation") x <- seq(-5,30,1) lines(x,ilogit(2.5-0.2*x)) lines(x,ilogit(-0.5-0.2*x)) lines(x,ilogit(1.5-0.5*x)) lines(x,ilogit(1.5-0.1*x)) lines(x,ilogit(1.6-0.8*x)) lines(x,ilogit(-0.1-7*x)) lines(x,ilogit(6.5-0.8*x)) lines(x,ilogit(7.5-0.3*x)) lines(x,ilogit(-3.9+0.8*x)) lines(x,ilogit(-10+0.7*x)) lines(x,ilogit(-5.5+0.2*x)) lines(x,ilogit(3.5+0.5*x)) #Among these lines, which one will we pick? # we pick the one that is most compatible with the data (in a likelihood sense) lines(x,ilogit(sumlogit$coeff[1,1]+sumlogit$coeff[2,1]*x),col="red",lwd=4) # we may try a different link function - probit instead of logit probitmod <- glm(cbind(damage,6-damage) ~ tempC, family=binomial(link=probit), orings) sumprobit <- summary(probitmod) lines(x,ilogit(sumprobit$coeff[1,1]+sumprobit$coeff[2,1]*x),col="blue",lwd=4) #prediction ilogit(sumlogit$coeff[1,1]+sumlogit$coeff[2,1]*(-0.5)) pnorm(sumprobit$coeff[1,1]+sumprobit$coeff[2,1]*(-0.5)) # MLE Exponential distribution ##### set.seed(849) lambda <- 6 n <- 100 X <- round(rexp(n,rate=1/lambda),2) #check parametrization ?rexp #define likelihood function likFunction <- function(lambda){ prod(dexp(X,rate=1/lambda)) #or -log(lambda) - X/lambda } #define log-likelihood function logLikFunction <- function(lambda){ sum(log(dexp(X,rate=1/lambda))) #or sum(dexp(X,rate=1/lambda,log=TRUE))) #or -n*log(lambda) - n*mean(X)/lambda } #look at the values at some grid lambdaVec <- seq(4,8,1/10) # evaluate the likelihood likValues <- sapply(lambdaVec,likFunction) # evaluate the log-likelihood logLikValues <- sapply(lambdaVec,logLikFunction) #plot it plot(lambdaVec,likValues) plot(lambdaVec,logLikValues) #look at the maximizer (on the grid of values of lambda) lambdaVec[which.max(likValues)] #or use "optimize" function to find the maximum optimize(f = logLikFunction, interval = c(0,10), maximum=TRUE) #or define the 1st derivative of the likelihood function S_n scoreFunction <- function(lambda){ -n/lambda + n*mean(X)/(lambda^2) } #we may check it against the numerical derivative # (to see if we haven't made any mistake) numScoreFunction <- function(lambda, d=0.00001){ (logLikFunction(lambda+d)-logLikFunction(lambda-d))/(2*d) } numScoreValues <- sapply(lambdaVec,numScoreFunction) scoreValues <- sapply(lambdaVec,scoreFunction) max(abs(scoreValues - numScoreValues)) #max differences are small, it seems we have not miscoded it plot(lambdaVec, scoreValues) lines(lambdaVec, numScoreValues, col="red") abline(h=0) # Find a solution to S_n(x) = 0 rootFind <- uniroot(scoreFunction, interval = c(1,10)) # we get the same number as from the optimization (lambdaMLE <- rootFind$root) #Look at the Likelihood Hessian likelihoodHessian <- function(lambda){ -n/(lambda^2) + 2*n*mean(X)/(lambda^3) } likelihoodHessian(lambdaMLE) # this is a positive number # and hence the log-lik. function is concave at the optimum # (it is MINUS the second derivative) likHessianValues <- sapply(lambdaVec,likelihoodHessian) plot(lambdaVec, likHessianValues,ylim= c(-1,max(likHessianValues))) abline(h=0) # it appears GLOBALLY concave # (it indeed is, as we know the function form) #plot the estimated density and several values plot(density(X),xlim=c(0,20)) xValues <- seq(0,20,0.1) lines(xValues,dexp(xValues,1/4),col="red") lines(xValues,dexp(xValues,1/6),col="blue") lines(xValues,dexp(xValues,1/8),col="green") # Bootstrap - rolling dices ##### throws = c(1,1,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,6,6,6,6) length(throws) mean(throws) freq <- c(2,1,8,7,3,4) numbers <- c(1,2,3,4,5,6) t(numbers)%*%freq/sum(freq) hist(throws, breaks=0.5+0:6) #data sample is all we have set.seed(35) #"fake" dataset throws2 = sample(throws, 25, replace=TRUE) hist(throws2, breaks=0.5+0:6, border = "red") #another "fake" dataset throws3 = sample(throws, 25, replace=TRUE) hist(throws3, breaks=0.5+0:6, border = "green") #yet another "fake" dataset throws4 = sample(throws, 25, replace=TRUE) hist(throws4, breaks=0.5+0:6, border = "blue") #let's do it 5000 times nb = 5000 ; bet = NULL ; n = length(throws); ptm <- proc.time() # have a look at the time it takes for (i in 1:nb){ unifnum = sample(c(1:n),n,replace = T) # pick random indices bet[i] = mean(throws[unifnum]) #bet[i] = sample(throws, 25, replace=TRUE) # or use this formulation } proc.time() - ptm #how long did it take? hist(bet, breaks=0.5+0:5) #histogram not very helpful #look at density estimate plot(density(bet)) plot(density(bet,bw=.03,kernel="gaussian")) #how often would we observe number 3? mean(bet<3) mean(bet<3.5) #or use boot library (by Brian Ripley) library(boot) samplemean <- function(x, d) { return(mean(x[d])) } bootbet = boot(data=throws, statistic=samplemean, R=5000) plot(bootbet) # Bootstrap - linear regression ##### set.seed(8322) n <- 20 x <- rexp(n) e <- rexp(n) y <- 2 + 3*x + e lmod <- lm(y ~ x) # we observe intercept = 2.041 and slope = 2.806 #run many bootstrap samples nB <- 10000 resultsB <- numeric(nB) for (b in 1:nB){ id <- sample(1:n,replace=TRUE) lmodb <- lm(y[id] ~ x[id]) resultsB[b] <- lmodb$coeff[2] } # plot the bootstrapped betas plot(density(resultsB)) #95% non-parametric bootstrap CI quantile(resultsB, probs=c(0.025,0.975)) #parametric bootstrap # assume normality, use bootstrap only to estimate the standard errors se <- sd(resultsB) c(2.806 - 1.96*se,2.806 + 1.96*se) # *Bootstrap - non-parametric estimator ##### #Stampes p227 Introduction to the Bootstrap, Efron, Tibshirani #dataset is in the bootstrap library library(bootstrap) data(stamp) summary(stamp) library(stats) kde1 <- density(stamp$Thickness,bw=.003,kernel="gaussian") kde2 <- density(stamp$Thickness,bw=.008,kernel="gaussian") kde3 <- density(stamp$Thickness,bw=.001,kernel="gaussian") kde4 <- density(stamp$Thickness,bw=0.006713547,kernel="gaussian") plot(kde4$x,kde4$y,type="line") plot(kde3$x,kde3$y,type='l',col="blue",main="Bandwidth = ",ylab="Density",xlab="Thickness") #"reproduce" graph on p.228 par(mfrow=c(2,2)) hist(stamp$Thickness,breaks=0.06+0.005*0:15, main = "Histogram",xlab="Thickness") plot(kde1$x,kde1$y,type='l',main="Bandwidth = 0.003",ylab="Density",xlab="Thickness") plot(kde2$x,kde2$y,type='l',col="red",main="Bandwidth = 0.008",ylab="Density",xlab="Thickness") plot(kde3$x,kde3$y,type='l',col="blue",main="Bandwidth = 0.001",ylab="Density",xlab="Thickness") 0.006713547 #how to calculate number of modes of a functions? #stackexchange found this, after modification it works #http://stackoverflow.com/questions/16255622/peak-of-the-kernel-density-estimation n.modes <- function(x,bww) { den <- density(x, bw=bww, kernel=c("gaussian")) den.s <- smooth.spline(den$x, den$y, all.knots=TRUE, spar=0.8) s.0 <- predict(den.s, den.s$x, deriv=0) s.1 <- predict(den.s, den.s$x, deriv=1) s.derv <- data.frame(s0=s.0$y, s1=s.1$y) nmodes <- length(rle(den.sign <- sign(s.derv$s1))$values)/2 if ((nmodes > 10) == TRUE) { nmodes <- 10 } if (is.na(nmodes) == TRUE) { nmodes <- 0 } return( nmodes ) } myBisection <- function(x){ a = 0.0001 b = 1 nSteps = 20 for (i in 1:nSteps){ ab = (a+b)/2 if (n.modes(x,ab) > 1){ a <- (a+b)/2 } else { b <- (a+b)/2 } } return((a+b)/2) } hath1 = myBisection(stamp$Thickness) # 0.006713547 var(stamp$Thickness) # 0.0002239209 smallestWindow <- function(d,i){ y = d[i] #KS density smoothing artificially increased variance #this sampling will ensure that the variance of the estimate, so we rescale it #so that it has the same variance as our sample x = mean(y) + (1+0.0068^2/0.0002239209)^(-0.5)*(y-mean(y)+0.0068*rnorm(485)) h = myBisection(x) return(h) } bootbet = boot(data=stamp$Thickness, statistic=smallestWindow, R=5000) summary(bootbet) #plot(bootbet) mean(bootbet$t > hath1) #0.0004