#WOMEN
women<-read.csv("women_data.csv")

plot(women$Weight ~ women$Height, pch=16, col="darkblue")

model<-lm(women$Weight ~ women$Height)
abline(model, col="red", lwd=3)

par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))

hist(model$residuals, breaks=20)
mean(model$residuals) #Interpret the number
ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE))
shapiro.test(model$residuals)

summary(model)
confint(model)
AIC(model)

#The assumptions are met. The model and the independent variable (height) are significant;
#The height variable explains 59% of the weight variability. 
#The estimate of the β1 coefficient equals 0.77 (95% CI [0.65;0.90]), the intercept α equals -50.53.
# Y=-50.53+0.77*X (for each one-unit shift of the height (cm) the weight increases by 0.77 kg)


#EMPLOYMENT
employ<-read.csv("employment_2.csv")

plot(employ$Employment ~ employ$Inflation, pch=16, col="darkblue")


model<-lm(employ$Employment ~ employ$Inflation)
abline(model, col="red", lwd=3)

par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))

hist(model$residuals, breaks=20)
mean(model$residuals)
ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE))
shapiro.test(model$residuals)

summary(model)
confint(model)
AIC(model)


#work with outliers
plot(employ$Employment ~ employ$Inflation, pch=16, col="darkblue")
library(tidyverse)
employ<- employ %>%
  filter(Inflation<4)
plot(employ$Employment ~ employ$Inflation, pch=16, col="darkblue")

model<-lm(employ$Employment ~ employ$Inflation)
abline(model, col="red", lwd=3)

par(mfrow=c(2,2))
plot(model)
par(mfrow=c(1,1))

hist(model$residuals, breaks=20)
mean(model$residuals)
ks.test(model$residuals,"pnorm",mean(model$residuals, na.rm=TRUE),sd(model$residuals, na.rm=TRUE))
shapiro.test(model$residuals)

summary(model)
confint(model)
AIC(model)