library(odbc) library(DBI) library(dplyr) library(ggplot2) library(tidyr) library(corrr) library(modeldb) library(tidypredict) con <- dbConnect(odbc(), Driver = "ODBC Driver 17 for SQL Server", Server = "REX\\SQLEXPRESS", Database = "VYUKA_DATABAZE", Trusted_Connection = "yes", encoding = "CP1250") sectors<-tbl(con, "sectors") my_cor<-sectors %>% filter(country %not like% "Euro%") %>% select(-country) %>% correlate(quiet=TRUE) my_cor %>% shave() my_cor %>% rplot() my_cor %>% network_plot() my_cor %>%focus(Industry)%>%arrange(desc(Industry)) computers<-tbl(con, "Computers") n_total<-pull(count(computers)) n<-as.integer(round(n_total)*0.25) index<-c(rep(T,n),rep(F,n_total-n)) index<-index[order(runif(n_total))] test_id<-(1:n_total)[index] train_id<-(1:n_total)[!index] test_sample<-computers %>% filter(id %in% test_id) %>% select(speed,hd,ram) %>% mutate(price=log(price),cd=as.integer(cd=="yes")) count(test_sample) model<-test_sample %>% linear_regression_db(price,n) pm<-as_parsed_model(model) tidypredict_fit(pm) result<-test_sample %>% tidypredict_to_column(pm) %>% select(price, fit, everything()) result %>% ggplot(data=.)+geom_point(aes(x=fit,y=fit-price)) result %>% select(price,fit) %>% correlate(quiet=T) %>% filter(!is.na(fit)) %>% mutate(r2=fit^2) %>% select(r2) mean_price<-pull(result %>% summarise(mean(price,na.rm=T))) result %>% select(price,fit) %>% mutate(e=(fit-price)^2,t=(price-mean_price)^2) %>% summarise(s1=sum(e), s2=sum(t)) %>% mutate(r_2=1-s1/s2) train_sample<-computers %>% filter(id %in% train_id) %>% select(speed,hd,ram) %>% mutate(price=log(price),cd=as.integer(cd=="yes")) result<-train_sample %>% tidypredict_to_column(pm) %>% select(price, fit, everything()) result %>% select(price,fit) %>% correlate(quiet=T) %>% filter(!is.na(fit)) %>% mutate(r2=fit^2) %>% select(r2) mean_price<-pull(result %>% summarise(mean(price,na.rm=T))) result %>% select(price,fit) %>% mutate(e=(fit-price)^2,t=(price-mean_price)^2) %>% summarise(s1=sum(e), s2=sum(t)) %>% mutate(r_2=1-s1/s2)