# ============================================================================== # --------------------------------- SEMINAR 6 ---------------------------------- # ============================================================================== data <- c(19.92, 21.17, 24.87, 22.90, 20.88, 19.43, 19.39, 23.14, 16.99, 24.85, 20.80, 19.93, 20.28, 27.61, 27.50, 19.63, 20.35, 22.96, 17.57, 22.91, 21.84, 25.34, 20.13, 18.30, 15.40, 22.88, 21.90, 25.30, 21.86, 22.47) # ----------------------------------- TASK 1 ----------------------------------- n <- length(data) # ....................................... A .................................... # Estimate parameters mu and sigma of the normal distribution N(mu, sigma) using # the maximum likelihood method. Then construct a two-sided confidence interval # for parameter mu. Create a visualization of all informations you have computed (plot # the histogram together with the density of estimated normal distribution and with CI). # maximum likelihood estimation of parameters mu and sigma # (from the theoretical formulas): mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # MLE sigma <- sd(data) # for the rest of the task we will use this sd definition # Can we assume our data are normal? Plot the histogram together with the density # of the normal distribution with estimated parameters: xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Histogram of lengths", ylim = c(0, 0.15), col = "antiquewhite") lines(xx, density.n) # it seems quite OK (we will see more complex method for checking the normality soon) # now compute the quantile we need: alpha <- 0.05 u.q <- qt(1 - alpha/2, n - 1) # compute the lower and upper limits of the CI for parameter mu # (use formulas from the lecture): (CI_lower <- mu - u.q * sigma / sqrt(n)) (CI_upper <- mu + u.q * sigma / sqrt(n)) # plot the histogram of the data again together with the denstiy of normal distribution # with estimated parameters and visualization of the MLE of mu with the CI for mu: # HINT: for visualizating mu and its CI use function abline(), where "v" input # specifies the x-axis coordinate(s) of the vertical line(s). You can use function # rect() plotting the rectangle as well. ?abline ?rect xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Two sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = c(CI_upper, CI_lower), col = 'red', lwd = 2) rect(CI_lower, 0, CI_upper, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.3)) abline(v = mu, col = 'blue', lwd = 2) legend("topright", legend = c("mu from MLE", "CI limits"), col = c("blue", "red"), lty = c(1, 1), bty = "n") # ....................................... B .................................... # Test the null hypothesis H0: mu = 20, against the alternative H1: mu != 20 # at the significance level alpha = 0.05. Compute the value of the test statistic # and find the critical region. What is your conclusion? # fistly: does the value 20 belong to the 95% CI of mu from the previous subtask? # What will be the result of our test based on this observation? mu0 <- 20 alpha <- 0.05 # test statistic (from the lecture slides): T <- sqrt(n) * (mu - mu0) / sigma # critical region (quantiles of the student t-distribution): CR_1 <- - qt(1 - alpha / 2, n - 1) CR_2 <- qt(1 - alpha / 2, n - 1) # conclusion: we REJECT the null hypothesis H0 at the significance level 0.05 # ....................................... C .................................... # Create a density plot of a theoretical distribution of the test statistic # (student t-distribution), visualize the critical region and the test statistic. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = "Critical region", xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2), "(MLE)")) rect(-5, 0, CR_1, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) rect(CR_2, 0, 5, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2) # ....................................... D .................................... # Find the p-value of the previous test and use it for decision about the null # hypothesis. p.value <- 2 * (1 - pt(T, n - 1)) # volume under the black curve in the blue regions in the following plot # we DO reject H0: mu = 20 against H1: mu != 20 # ....................................... E .................................... # Create a density plot of a theoretical distribution of the test statistic (student # t-distribution), visualize the test statistic and p-value. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = paste("P-value", round(p.value, 3)), xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(-5, 0, -abs(T), 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) rect(abs(T), 0, 5, 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2) # ----------------------------------- TASK 2 ----------------------------------- # Use the data from the previous task. Solve the same tasks as before, but now # for the left-sided confidence interval and the right-sided alternative hypothesis. alpha <- 0.05 n <- length(data) # ....................................... A .................................... # Construct a left-sided conficence interval for parameter mu. Create a # visualization of all informations you computed (plot the histogram together with # the density of estimated normal distribution and with CIs). # maximum likelihood estimation of parameter mu and sigma: mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # MLE sigma <- sd(data) # for the rest of the task we will use this sd definition # compute the quantile we need: u.q.L <- qnorm(1 - alpha) # compute the lower and upper limits of the CI for parameter mu # (use formulas from the lecture): CI_left <- mu - u.q.L * sigma / sqrt(n) xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Left sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = CI_left, col = 'red', lwd = 2) abline(v = mu, col = 'blue', lwd = 2) rect(CI_left, 0, 28, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) # ....................................... B .................................... # Test the null hypothesis about parameter mu H0: mu = 20, against the right-sided # alternative H1: mu > 20 at the significance level alpha = 0.05. Compute the value # of the test statistic and find the critical region. What is your conclusion? mu0 <- 20 alpha <- 0.05 # test statstic: T <- sqrt(n) * (mu - mu0) / sigma # critical region: CR_1 <- qt(1 - alpha, n - 1) # conclusion: we REJECT the null hypothesis H0 at the significance level 0.05 # ....................................... C .................................... # Create a density plot of a theoretical distribution of the test statistic # (student t-distribution), visualize the critical region and the test statistic. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = "Critical region", xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(CR_1, 0, 5, 0.4, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2) # ....................................... D .................................... # Find the p-value of the previous test and use it for decision about the null # hypothesis. p.value <- 1 - pt(T, n - 1) # volume under the black curve in the blue regions # we DO reject H0: mu = 20 against H1: mu > 20 # ....................................... E .................................... # Create a density plot of a theoretical distribution of the test statistic (student # t-distribution), visualize the test statistic and p-value. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = paste("P-value", round(p.value, 3)), xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(T, 0, 5, 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2) # ----------------------------------- TASK 3 ----------------------------------- # Use the data from the previous task. Solve the same tasks as before, but now # for the right-sided confidence interval and the left-sided alternative hypothesis. alpha <- 0.05 n <- length(data) # ....................................... A .................................... # Construct a left-sided conficence interval for parameter mu. Create a # visualization of all informations you computed (plot the histogram together with # the density of estimated normal distribution and with CIs). # maximum likelihood estimation of parameter mu and sigma: mu <- mean(data) sigma <- sqrt(sum((data - mu)^2) / (n - 1)) # MLE sigma <- sd(data) # for the rest of the task we will use this sd definition # compute the quantile we need: u.q.R <- qnorm(1 - alpha) # compute the lower and upper limits of the CI for parameter mu # (use formulas from the lecture): CI_right <- mu + u.q.R * sigma / sqrt(n) xx <- seq(14, 28, by = 0.1) density.n <- dnorm(xx, mu, sigma) hist(data, freq = F, main = "Right sided CI for mean", col = "antiquewhite") lines(xx, density.n, lty = 2, lwd = 1.5) abline(v = CI_right, col = 'red', lwd = 2) abline(v = mu, col = 'blue', lwd = 2) rect(14, 0, CI_right, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) # ....................................... B .................................... # Test the null hypothesis about parameter mu H0: mu = 20, against the left-sided # alternative H1: mu < 20 at the significance level alpha = 0.05. Compute the value # of the test statistic and find the critical region. What is your conclusion? mu0 <- 20 alpha <- 0.05 # test statstic: T <- sqrt(n) * (mu - mu0) / sigma # critical region: CR_2 <- - qt(1 - alpha, n - 1) # conclusion: we DO NOT REJECT the null hypothesis H0 at the significance level 0.05 # ....................................... C .................................... # Create a density plot of a theoretical distribution of the test statistic # (student t-distribution), visualize the critical region and the test statistic. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = "Critical region", xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(-6, 0, CR_2, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2) # ....................................... D .................................... # Find the p-value of the previous test and use it for decision about the null # hypothesis. p.value <- pt(T, n - 1) # volume under the black curve in the blue regions # we DO / DO NOT reject H0: mu = 20 against H1: mu < 20 # ....................................... E .................................... # Create a density plot of a theoretical distribution of the test statistic (student # t-distribution), visualize the test statistic and p-value. xx <- seq(-5, 5, 0.01) density.t <- dt(xx, n - 1) plot(xx, density.t, type = 'l', main = paste("P-value", round(p.value, 3)), xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2))) rect(-6, 0, T, 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0) abline(v = T, col = 'blue', lwd = 2)