# ==============================================================================
# --------------------------------- SEMINAR 12 ---------------------------------
# ==============================================================================


# ----------------------------------- TASK 1 -----------------------------------


setwd("")

prices <- read.csv("GoldSilver.csv")

prices$time <- as.Date(prices$time)

time <- prices$time
gold <- prices$gold
silver <- prices$silver

# plot the dependence of the time on the gold price and silver price (2 figures):

plot()
plot()

# plot the dependence of the time on the logarithm of gold
# price and silver price (2 figures):

plot()
plot()


# ....................................... A ....................................


# Create a linear model of the silver price dependent on the time.

# HINT: use lm() function:

?lm

m1_silver <- lm(silver ~ time)

(s1_silver <- summary(m1_silver))


# ..................................... B ......................................


# DIAGNOSTIC GRAPHS:

par(mfrow = c(2, 2))
plot(m1_silver)
par(mfrow = c(1, 1))

# residuals do not look independent and normally distributed with the same variance


# ..................................... C ......................................


# REGRESSION LINE:

# now plot the results: firstly, acces the coefficients beta, then compute the
# values of price for the time vector using your model (write the prescription
# for the regression line in the form y = a*x + b) and the coefficients beta:

coeffs <- m1_silver$coefficients
silver_line <-  +  * as.numeric(time)

plot(, , type = 'l')
lines(, , type = 'l', col = 'red')


# ..................................... D ......................................


# QUALITY of the model:

s1_silver["adj.r.squared"]

AIC(m1_silver)


# ..................................... E ......................................


# PREDICTION:

?as.Date

### 1) manually:

 +  * as.numeric(as.Date("2013-04-25"))

### 2) built-in function:

data_new <- data.frame(time = as.Date("2013-04-25"))

predict(m1_silver, data_new)


# ..................................... F ......................................


# Asses the estimate of parameters beta. Compute (by built-in function)
# confidence intervals of parameters beta. What does it tell You?

coef(m1_silver)
confint(m1_silver)

# FOR VOLUNTEERS:

coeffs_L <- confint(m1_silver)[, 1]
coeffs_U <- confint(m1_silver)[, 2]

silver_line <- coeffs[1] + coeffs[2] * as.numeric(time)
silver_line_L <- coeffs_L[1] + coeffs_L[2] * as.numeric(time)
silver_line_U <- coeffs_U[1] + coeffs_U[2] * as.numeric(time)

plot(time, silver, type = 'l')
lines(time, silver_line, type = 'l', col = 'red')


lines(time, silver_line_L, type = 'l', col = 'salmon', lty = 3)
lines(time, silver_line_U, type = 'l', col = 'salmon', lty = 3)


# ----------------------------------- TASK 2 -----------------------------------


# Create a linear model of the logarithm of the silver price dependent on the time.

silver_log <- log(silver) 

m2_silver <- lm()

s2_silver <- 


# DIAGNOSTIC GRAPHS:

par(mfrow = c(2, 2))
plot()
par(mfrow = c(1, 1))

# the pressumptions of the linear regression model seems to be more realistic then before


# REGRESSION LINE:

# now plot the results (the logarithm of silver price and the line against the time):

# HINT: choose the coefficients from the model_silver_2:

coeffs <- 

# write the regression line prescription:

silver_line_2 <-  +  * as.numeric(time)

plot(, , type = 'l')
lines(, , type = 'l', col = 'red')

# finally, plot the original silver data together with the final regression curve

# HINT: what is the inverse transformation of the logaritmic one? Apply this
# transformation to both: logarithm of silver (the result should be the original
# silver data sample) and the regression line

silver_exp <- 

plot(time, silver, type = 'l')
lines(time, silver_exp, type = 'l', col = 'red')


# QUALITY of the model:

s2_silver[]
AIC()


# PREDICTION:

# HINT: apply the exp transformation to the reggression line prescription
# evaluated on the date 2013-04-25:




# ----------------------------------- TASK 3 -----------------------------------


comp <- read.csv("Computers.csv", sep = ",")
comp <- comp[, -1]

# converting the variables to the appropriate data type:

for (i in c(4, 5, 6, 7, 8)) {
  comp[, i] <- as.factor(comp[, i])
}

for (i in c(1, 2, 3, 9)) {
  comp[, i] <- as.numeric(comp[, i])
}

# better approach (using the apply() function):

comp[, c(4, 5, 6, 7, 8)] <- lapply(comp[, c(4, 5, 6, 7, 8)], as.factor)
comp[, c(1, 2, 3, 9)] <- lapply(comp[, c(1, 2, 3, 9)], as.numeric)


# ..................................... A ......................................


# Create a linear model using the variables SPEED and RAM. Check the assumptions
# and visualize the result.

# MODEL DEFINITION:

model_1 <- lm()

(sum_1 <- summary(model_1))

# DIAGNOSTIC PLOT:

par(mfrow = c(2, 2))
# FILL
par(mfrow = c(1, 1))

# REGRESSION LINE:

xx <- 

plot(comp$speed, comp$price)

# ADD the line for each RAM size separately:

for (i in c(2, 4, 8, 16, 24, 32)) {
  df <- data.frame()
  ram_line <- predict()
  lines()
}


# ..................................... B ......................................


# Create a full linear regression model considering all the variables.

model_2 <- lm(price ~ , data = comp) # FILL

(sum_2 <- summary(model_2))

# the diagnostic plots:

par(mfrow = c(2, 2))
plot(model_2)
par(mfrow = c(1, 1))


# ..................................... C ......................................


# Select the best model of PRICE based on your data. Use step() function for
# both backward stepwise procedure (from the full model B) and forward stepwise
# procedure (from model A).


# BACKWARD step-wise procedure:

model_3 <- step(model_2)
model_3$anova

(sum_3 <- summary(model_3))

par(mfrow = c(2, 2))
plot(model_3)
par(mfrow = c(1, 1))


# FORWARD step-wise procedure:


model_4 <- step(model_1, direction = "forward",
                scope =  ~ speed + hd + ram + screen + cd + multi + premium + trend)
model_4$anova

(sum_4 <- summary(model_4))

par(mfrow = c(2, 2))
plot(model_4)
par(mfrow = c(1, 1))


# ..................................... E ......................................


# Compare the quality of your models using the adjusted R-squared and AIC.

# adjusted R2:

# HINT: pull this out from the summary objects sum_i by $ notation

(results <- c())

# AIC:

# HINT: apply AIC function to your models: model_i

?AIC

(results <- c())