#1. Upload the "employment.csv" data set. employ<-read.csv("employment.csv") #2. Build the plot to look at the relationship between the variables. #What will the dependent variable (outcome), what will be the independent variable (predictor)? plot(employ$Employment ~ employ$Inflation, pch=16, col="darkblue") #3. Perform linear regression analysis (Fit a simple linear regression model between the variables). #Draw the best-fit regression line. model<-lm(employ$Employment ~ employ$Inflation) abline(model, col="red", lwd=3) #4. Check the main assumptions of the model, use the four main plots for checking: #Plot 1.Linearity of the data, independence of residuals #Plot 2.Normality of residuals using Q-Q plot #Plot 3.Constant variance of residuals #Plot 4. No influential outliers par(mfrow=c(2,2)) plot(model) par(mfrow=c(1,1)) #5. Check the assumption "Normality of residuals" using histogram and normality tests; and "Zero mean of residuals". Don't forget to look at the Q-Q plot from the previous question. hist(model$residuals, breaks=20) #+the Q-Q plot from the previous question mean(model$residuals) ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE)) shapiro.test(model$residuals) #6. Obtain parameters of the regression (the intercept, the slope of the line, the 95% confidence intervals). summary(model) confint(model) #7. Obtain criteria for the model evaluation (Adjusted R-squared, AIC). summary(model) AIC(model) #8. After checking all the assumptions, what conclusion can you make? #The assumptions are met. The model and the independent variable (inflation) are significant; #The inflation variable explains 99.5% of the employment variability. #The estimate of the β1 coefficient equals -1.65 (95% CI [-1.68;-1.63]), the intercept α equals 0.03. # Y=0.03-1.65*X (for each one-unit shift of the inflation the employment decreases by 1.65) #9.Repeat all the steps for the "bpa_age_data.csv" data set. bpa<-read.csv("bpa_age_data.csv") plot(bpa$BPA_Concentration ~ bpa$Age, pch=16, col="darkblue") model<-lm(bpa$BPA_Concentration ~ bpa$Age) abline(model, col="red", lwd=3) par(mfrow=c(2,2)) plot(model) par(mfrow=c(1,1)) hist(model$residuals, breaks=20) mean(model$residuals) ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE)) shapiro.test(model$residuals) summary(model) confint(model) AIC(model) #The assumptions are met, it a very good example of that. The model and the independent variable (age) are significant; #The age variable explains 31% of the BPA concentration variability. #The estimate of the β1 coefficient equals 0.19 (95% CI [0.14;0.25]), the intercept α equals 5.10. # Y=5.10+0.19*X (for each one-unit shift of the age (one year) the BPA concentration increases by 0.19 (ug/L)) #10.Repeat all the steps for the "employment_1.csv" data set. employ<-read.csv("employment_1.csv") plot(employ$Employment ~ employ$Inflation, pch=16, col="darkblue") model<-lm(employ$Employment ~ employ$Inflation) abline(model, col="red", lwd=3) par(mfrow=c(2,2)) plot(model) par(mfrow=c(1,1)) hist(model$residuals, breaks=20) mean(model$residuals) ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE)) shapiro.test(model$residuals) summary(model) confint(model) AIC(model) #The assumptions are NOT met, we can't use linear regression for the relationship quantification between the variables and should #use other methods. #11. Repeat all the steps for the "age.csv" data set. age<-read.csv("age.csv") plot(age$Employment ~ age$Age, pch=16, col="darkblue") model<-lm(age$Employment ~ age$Age) abline(model, col="red", lwd=3) par(mfrow=c(2,2)) plot(model) par(mfrow=c(1,1)) hist(model$residuals, breaks=20) mean(model$residuals) ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE)) shapiro.test(model$residuals) summary(model) confint(model) AIC(model) #The assumptions are met. However, both the model and the independent variable (age) are not statistically #significant, we can't use this model.