# ============================================================================== # --------------------------------- SEMINAR 12 --------------------------------- # ============================================================================== # ----------------------------------- TASK 1 ----------------------------------- setwd("") prices <- read.csv("GoldSilver.csv") prices$time <- as.Date(prices$time) time <- prices$time gold <- prices$gold silver <- prices$silver # plot the dependence of the time on the gold price and silver price (2 figures): plot() plot() # plot the dependence of the time on the logarithm of gold # price and silver price (2 figures): plot() plot() # ....................................... A .................................... # Create a linear model of the silver price dependent on the time. # HINT: use lm() function: ?lm m1_silver <- lm(silver ~ time) (s1_silver <- summary(m1_silver)) # ..................................... B ...................................... # DIAGNOSTIC GRAPHS: par(mfrow = c(2, 2)) plot(m1_silver) par(mfrow = c(1, 1)) # residuals do not look independent and normally distributed with the same variance # ..................................... C ...................................... # REGRESSION LINE: # now plot the results: firstly, acces the coefficients beta, then compute the # values of price for the time vector using your model (write the prescription # for the regression line in the form y = a*x + b) and the coefficients beta: coeffs <- m1_silver$coefficients silver_line <- + * as.numeric(time) plot(, , type = 'l') lines(, , type = 'l', col = 'red') # ..................................... D ...................................... # QUALITY of the model: s1_silver["adj.r.squared"] AIC(m1_silver) # ..................................... E ...................................... # PREDICTION: ?as.Date ### 1) manually: + * as.numeric(as.Date("2013-04-25")) ### 2) built-in function: data_new <- data.frame(time = as.Date("2013-04-25")) predict(m1_silver, data_new) # ..................................... F ...................................... # Asses the estimate of parameters beta. Compute (by built-in function) # confidence intervals of parameters beta. What does it tell You? coef(m1_silver) confint(m1_silver) # FOR VOLUNTEERS: coeffs_L <- confint(m1_silver)[, 1] coeffs_U <- confint(m1_silver)[, 2] silver_line <- coeffs[1] + coeffs[2] * as.numeric(time) silver_line_L <- coeffs_L[1] + coeffs_L[2] * as.numeric(time) silver_line_U <- coeffs_U[1] + coeffs_U[2] * as.numeric(time) plot(time, silver, type = 'l') lines(time, silver_line, type = 'l', col = 'red') lines(time, silver_line_L, type = 'l', col = 'salmon', lty = 3) lines(time, silver_line_U, type = 'l', col = 'salmon', lty = 3) # ----------------------------------- TASK 2 ----------------------------------- # Create a linear model of the logarithm of the silver price dependent on the time. silver_log <- log(silver) m2_silver <- lm() s2_silver <- # DIAGNOSTIC GRAPHS: par(mfrow = c(2, 2)) plot() par(mfrow = c(1, 1)) # the pressumptions of the linear regression model seems to be more realistic then before # REGRESSION LINE: # now plot the results (the logarithm of silver price and the line against the time): # HINT: choose the coefficients from the model_silver_2: coeffs <- # write the regression line prescription: silver_line_2 <- + * as.numeric(time) plot(, , type = 'l') lines(, , type = 'l', col = 'red') # finally, plot the original silver data together with the final regression curve # HINT: what is the inverse transformation of the logaritmic one? Apply this # transformation to both: logarithm of silver (the result should be the original # silver data sample) and the regression line silver_exp <- plot(time, silver, type = 'l') lines(time, silver_exp, type = 'l', col = 'red') # QUALITY of the model: s2_silver[] AIC() # PREDICTION: # HINT: apply the exp transformation to the reggression line prescription # evaluated on the date 2013-04-25: # ----------------------------------- TASK 3 ----------------------------------- comp <- read.csv("Computers.csv", sep = ",") comp <- comp[, -1] # converting the variables to the appropriate data type: for (i in c(4, 5, 6, 7, 8)) { comp[, i] <- as.factor(comp[, i]) } for (i in c(1, 2, 3, 9)) { comp[, i] <- as.numeric(comp[, i]) } # better approach (using the apply() function): comp[, c(4, 5, 6, 7, 8)] <- lapply(comp[, c(4, 5, 6, 7, 8)], as.factor) comp[, c(1, 2, 3, 9)] <- lapply(comp[, c(1, 2, 3, 9)], as.numeric) # ..................................... A ...................................... # Create a linear model using the variables SPEED and RAM. Check the assumptions # and visualize the result. # MODEL DEFINITION: model_1 <- lm() (sum_1 <- summary(model_1)) # DIAGNOSTIC PLOT: par(mfrow = c(2, 2)) # FILL par(mfrow = c(1, 1)) # REGRESSION LINE: xx <- plot(comp$speed, comp$price) # ADD the line for each RAM size separately: for (i in c(2, 4, 8, 16, 24, 32)) { df <- data.frame() ram_line <- predict() lines() } # ..................................... B ...................................... # Create a full linear regression model considering all the variables. model_2 <- lm(price ~ , data = comp) # FILL (sum_2 <- summary(model_2)) # the diagnostic plots: par(mfrow = c(2, 2)) plot(model_2) par(mfrow = c(1, 1)) # ..................................... C ...................................... # Select the best model of PRICE based on your data. Use step() function for # both backward stepwise procedure (from the full model B) and forward stepwise # procedure (from model A). # BACKWARD step-wise procedure: model_3 <- step(model_2) model_3$anova (sum_3 <- summary(model_3)) par(mfrow = c(2, 2)) plot(model_3) par(mfrow = c(1, 1)) # FORWARD step-wise procedure: model_4 <- step(model_1, direction = "forward", scope = ~ speed + hd + ram + screen + cd + multi + premium + trend) model_4$anova (sum_4 <- summary(model_4)) par(mfrow = c(2, 2)) plot(model_4) par(mfrow = c(1, 1)) # ..................................... E ...................................... # Compare the quality of your models using the adjusted R-squared and AIC. # adjusted R2: # HINT: pull this out from the summary objects sum_i by $ notation (results <- c()) # AIC: # HINT: apply AIC function to your models: model_i ?AIC (results <- c())