# ============================================================================== # --------------------------------- HOMEWORK 1 --------------------------------- # ============================================================================== library(MASS) library(corrplot) # ----------------------------------- TASK 1 ----------------------------------- load("customer_behaviour.RData") data <- data[data$money_spent > 0, ] data <- data[data$age > 0, ] data <- data[data$web_visits >= 0, ] data$web_visits <- as.numeric(data$web_visits) data <- data[!is.na(data$web_visits), ] data <- data[data$mail_ads >= 0, ] data <- data[data$mail_ads < 1000, ] data$shop_visits <- as.numeric(data$shop_visits) data <- data[!is.na(data$shop_visits), ] data <- data[data$shop_visits >= 0, ] # ----------------------------------- TASK 2 ----------------------------------- data$big = (data[, 1] > 5000) + 2 boxplot(data$money_spent, col = "yellow") boxplot(data$money_spent ~ data$big, col = 2:3, xlab = "big money indicator", ylab = "money spent", main = "boxplot") d <- density(data$money_spent) hist(data$money_spent, col = "red", freq = F, xlab = "money spent", main = "histogram") box() lines(d, col = "black", lwd = 2) mean(data$age) quantile(data$age, probs = c(0.25, 0.5, 0.75)) diff(quantile(data$age, probs = c(0.25, 0.75))) # IQR var(data$age) # ----------------------------------- TASK 3 ----------------------------------- est_cor <- cor(data) sum_diag <- sum(diag(est_cor)) corrplot(est_cor, method = "circle") cor(data$money_spent, data$age) cor(data$money_spent, data$web_visits) cor(data$money_spent, data$shop_visits) cor(data$money_spent, data$mail_ads) # ----------------------------------- TASK 4 ----------------------------------- data.pca <- prcomp(data[, 2:5], scale = T) (s <- summary(data.pca)) s$rotation library(ggfortify) autoplot(data.pca, data = data[, 1:5], colour = data$big) autoplot(data.pca, data = data[, 1:5], colour = data$big, loadings = T, loadings.label = T)