# ==============================================================================
# --------------------------------- SEMINAR 7 ----------------------------------
# ==============================================================================


data <- c(19.92, 21.17, 24.87, 22.90, 20.88, 19.43, 19.39, 23.14, 16.99, 24.85,
          20.80, 19.93, 20.28, 27.61, 27.50, 19.63, 20.35, 22.96, 17.57, 22.91,
          21.84, 25.34, 20.13, 18.30, 15.40, 22.88, 21.90, 25.30, 21.86, 22.47)


# ----------------------------------- TASK 1 -----------------------------------


n <- length(data)
alpha <- 0.05 # confidence / significance level

# MLE:

# from the previous lecture formulae:

mu <- mean(data)
sigma <- sqrt(sum((data - mu)^2) / (n - 1))

# CHECKING THE NORMALITY:

xx <- seq(14, 28, by = 0.1)
density.n <- dnorm(xx, mu, sigma)

hist(data, freq = F, main = "Histogram of lengths", ylim = c(0, 0.15), col = "gold")
lines(xx, density.n)

# it seems quite OK (we will see more complex method for checking the normality soon)


# ....................................... A ....................................


# Estimate parameters mu and sigma of the normal distribution N(mu, sigma) using
# the maximum likelihood method. Then construct a two-sided confidence interval
# for parameter mu. Create a visualization of all informations you have computed (plot
# the histogram together with the density of estimated normal distribution and with CI).

# HINT: see slaid 44 from the lecture


# MLE: (maximum likelihood estimation from the previous week)

mu <- mean(data)
sigma <- sqrt(sum((data - mu)^2) / (n - 1))

# QUANTILE:

u.q <- qt(1 - alpha/2, n - 1)

# LOWER AND UPPER BOUND OF CONFIDENCE INTERVAL:

(CI_lower <- mu - u.q * sigma / sqrt(n))
(CI_upper <- mu + u.q * sigma / sqrt(n))

# VISUALIZING THE RESULT:

xx <- seq(14, 28, by = 0.1)
density.n <- dnorm(xx, mu, sigma)

hist(data, freq = F, main = "Two sided CI for mean", col = "antiquewhite")
lines(xx, density.n, lty = 2, lwd = 1.5)

abline(v = c(CI_upper, CI_lower), col = 'red', lwd = 2)

rect(CI_lower, 0, CI_upper, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.3))

abline(v = mu, col = 'blue', lwd = 2)
legend("topright", legend = c("mu from MLE", "CI limits"), col = c("blue", "red"), lty = c(1, 1), bty = "n")
box()


# ....................................... B ....................................


# Do the same for the LEFT-SIDED confidence interval.

# HINT: see slaid 45 from the lecture


# MLE:

mu <- mean(data)
sigma <- sqrt(sum((data - mu)^2) / (n - 1))

# QUANTILE:

u.q.R <- qnorm(1 - alpha, mean = 0, sd = 1)

# CONFIDENCE INTERVAL :

CI_right <- mu + u.q.R * sigma / sqrt(n)

# VISUALIZATION:

xx <- seq(14, 28, by = 0.1)
density.n <- dnorm(xx, mu, sigma)

hist(data, freq = F, main = "Left sided CI for mean", col = "antiquewhite")
lines(xx, density.n, lty = 2, lwd = 1.5)
abline(v = CI_right, col = 'red', lwd = 2)
abline(v = mu, col = 'blue', lwd = 2)
rect(14, 0, CI_right, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0)


# ....................................... C ....................................


# Do the same for the RIGHT-SIDED confidence interval.

# HINT: see slaid 45 from the lecture

# MLE:

mu <- mean(data)
sigma <- sqrt(sum((data - mu)^2) / (n - 1))

# QUANTILE:

u.q.R <- qnorm(1 - alpha)

# CONFIDENCE INTERVAL:

CI_left <- mu - u.q.R * sigma / sqrt(n)

# VISUALIZATION:

xx <- seq(14, 28, by = 0.1)
density.n <- dnorm(xx, mu, sigma)

hist(data, freq = FALSE, main = "Right sided CI for mean", col = "antiquewhite")
lines(xx, density.n, lty = 2, lwd = 1.5)
abline(v = CI_left, col = 'red', lwd = 2)
abline(v = mu, col = 'blue', lwd = 2)
rect(CI_left, 0, 28, 0.15, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0)


# ----------------------------------- TASK 2 -----------------------------------


# Work with the same dataset. Test a two sided hypothesis about the expected value
# mu. Use critical value approach and p-value approach. Interpret the results.

mu0 <- 20
alpha <- 0.05


# ....................................... A ....................................


# Test the null hypothesis H0: mu = 20, against the alternative H1: mu != 20
# at the significance level alpha = 0.05. Compute the value of the test statistic
# and find the critical region. What is your conclusion?

# HINT: use slaid 44 from the lecture


# TEST STATISTIC:

T0 <- sqrt(n) * (mu - mu0) / sigma

# CRITICAL REGION:
# (quantiles of the student t-distribution):

CR_1 <- - qt(1 - alpha / 2, n - 1)
CR_2 <- qt(1 - alpha / 2, n - 1) 

c(- Inf, CR_1)
c(CR_2, Inf)
T0

# CONCLUSION:

# realization of a test statistic T0 lies inside a critical region W
# we REJECT the null hypothesis H0 at the significance level 0.05


# ....................................... B ....................................


# VOLUNTARY:
# Create a density plot of a theoretical distribution of the test statistic
# (student t-distribution), visualize the critical region and the test statistic.

xx <- seq(-5, 5, 0.01)
density.t <- dt(xx, n - 1)

plot(xx, density.t, type = 'l', main = "Critical region",
     xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2), "(MLE)"))
rect(-5, 0, CR_1, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0)
rect(CR_2, 0, 5, 1, col = rgb(red = 1, green = 0, blue = 0, alpha = 0.2), lty = 0)
abline(v = T0, col = 'blue', lwd = 2)


# ....................................... C ....................................


# Find the p-value of the previous test and use it for decision about the null
# hypothesis.

# HINT: use slaid 44 from the lecture


p.value <- 2 * (1 - pt(T0, n - 1))

# p-value is "THE LOWEST SIGNIFICANCE LEVEL ALPHA, for which we would REJECT H0".
# If p-value is LESSER than alpha = 0.05, we REJECT H0.
# If p-value is GREATER than alpha = 0.05, we DO NOT REJECT H0.

# CONCLUSION:

# we DO reject H0: mu = 20 against H1: mu != 20


# ....................................... D ....................................


# VOLUNTARY:
# Create a density plot of a theoretical distribution of the test statistic (student
# t-distribution), visualize the test statistic and p-value.

xx <- seq(-5, 5, 0.01)
density.t <- dt(xx, n - 1)

plot(xx, density.t, type = 'l', main = paste("P-value", round(p.value, 3)),
     xlab = "x", ylab = "f(x)", sub = paste("mu=", round(mu, 2)))
rect(-5, 0, -abs(T), 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0)
rect(abs(T0), 0, 5, 1, col = rgb(red = 0, green = 0, blue = 1, alpha = 0.2), lty = 0)
abline(v = T0, col = 'blue', lwd = 2)