# ============================================================================== # --------------------------------- SEMINAR 7 ---------------------------------- # ============================================================================== data <- c(19.92, 21.17, 24.87, 22.90, 20.88, 19.43, 19.39, 23.14, 16.99, 24.85, 20.80, 19.93, 20.28, 27.61, 27.50, 19.63, 20.35, 22.96, 17.57, 22.91, 21.84, 25.34, 20.13, 18.30, 15.40, 22.88, 21.90, 25.30, 21.86, 22.47) # ----------------------------------- TASK 1 ----------------------------------- n <- length(data) alpha <- 0.05 # confidence / significance level # MLE: # from the previous lecture formulae: mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # CHECKING THE NORMALITY: xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Histogram of lengths", ylim = c(0, 0.15), col = "gold") lines(xx, density.n) # it seems quite OK (we will see more complex method for checking the normality soon) # ....................................... A .................................... # Estimate parameters mu and sigma of the normal distribution N(mu, sigma) using # the maximum likelihood method. Then construct a two-sided confidence interval # for parameter mu. Create a visualization of all informations you have computed (plot # the histogram together with the density of estimated normal distribution and with CI). # HINT: see slaid 44 from the lecture # MLE: (maximum likelihood estimation from the previous week) mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # QUANTILE: u.q <- qt(1 - alpha/2, n - 1) # LOWER AND UPPER BOUND OF CONFIDENCE INTERVAL: (CI_lower <- mu - u.q * sigma / sqrt(n)) (CI_upper <- mu + u.q * sigma / sqrt(n)) # VISUALIZING THE RESULT: xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Two sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = c(CI_upper, CI_lower), col = 'red', lwd = 2) rect(CI_lower, 0, CI_upper, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.3)) abline(v = mu, col = 'blue', lwd = 2) legend("topright", legend = c("mu from MLE", "CI limits"), col = c("blue", "red"), lty = c(1, 1), bty = "n") box() # ....................................... B .................................... # Do the same for the LEFT-SIDED confidence interval. # HINT: see slaid 45 from the lecture # MLE: mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # QUANTILE: u.q.R <- qnorm(1 - alpha, mean = 0, sd = 1) # CONFIDENCE INTERVAL : CI_right <- mu + u.q.R * sigma / sqrt(n) # VISUALIZATION: xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Left sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = CI_right, col = 'red', lwd = 2) abline(v = mu, col = 'blue', lwd = 2) rect(14, 0, CI_right, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) # ....................................... C .................................... # Do the same for the RIGHT-SIDED confidence interval. # HINT: see slaid 45 from the lecture # MLE: mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # QUANTILE: u.q.R <- qnorm(1 - alpha) # CONFIDENCE INTERVAL: CI_left <- mu - u.q.R * sigma / sqrt(n) # VISUALIZATION: xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = FALSE, main = "Right sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = CI_left, col = 'red', lwd = 2) abline(v = mu, col = 'blue', lwd = 2) rect(CI_left, 0, 28, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) # ----------------------------------- TASK 2 ----------------------------------- # Work with the same dataset. Test a two sided hypothesis about the expected value # mu. Use critical value approach and p-value approach. Interpret the results. mu0 <- 20 alpha <- 0.05 # ....................................... A .................................... # Test the null hypothesis H0: mu = 20, against the alternative H1: mu != 20 # at the significance level alpha = 0.05. Compute the value of the test statistic # and find the critical region. What is your conclusion? # HINT: use slaid 44 from the lecture # TEST STATISTIC: T0 <- sqrt(n) * (mu - mu0) / sigma # CRITICAL REGION: # (quantiles of the student t-distribution): CR_1 <- - qt(1 - alpha / 2, n - 1) CR_2 <- qt(1 - alpha / 2, n - 1) c(- Inf, CR_1) c(CR_2, Inf) T0 # CONCLUSION: # realization of a test statistic T0 lies inside a critical region W # we REJECT the null hypothesis H0 at the significance level 0.05 # ....................................... B .................................... # VOLUNTARY: # Create a density plot of a theoretical distribution of the test statistic # (student t-distribution), visualize the critical region and the test statistic. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = "Critical region", xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2), "(MLE)")) rect(-5, 0, CR_1, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) rect(CR_2, 0, 5, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) abline(v = T0, col = 'blue', lwd = 2) # ....................................... C .................................... # Find the p-value of the previous test and use it for decision about the null # hypothesis. # HINT: use slaid 44 from the lecture p.value <- 2 * (1 - pt(T0, n - 1)) # p-value is "THE LOWEST SIGNIFICANCE LEVEL ALPHA, for which we would REJECT H0". # If p-value is LESSER than alpha = 0.05, we REJECT H0. # If p-value is GREATER than alpha = 0.05, we DO NOT REJECT H0. # CONCLUSION: # we DO reject H0: mu = 20 against H1: mu != 20 # ....................................... D .................................... # VOLUNTARY: # Create a density plot of a theoretical distribution of the test statistic (student # t-distribution), visualize the test statistic and p-value. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = paste("P-value", round(p.value, 3)), xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(-5, 0, -abs(T), 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) rect(abs(T0), 0, 5, 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) abline(v = T0, col = 'blue', lwd = 2)