descriptors = read.csv("ca_desc.csv") experiment = read.csv("carboxylic_acids.csv",sep=";") dataset = merge(experiment[,c(1,3)],descriptors[,1:1445],by.x="NSC",by.y="Name") dataset = dataset[complete.cases(dataset),] remove = c() for(i in 3:1445){ if(var(dataset[i])<0.1){ print(names(dataset)[i]) remove = append(remove,i) } } dataset = dataset[,-remove] cor(dataset[,2:20]) cor(dataset$pKa,dataset[,-c(1,2)]) model = regsubsets(pKa~.,data = dataset[2:50]) summary(model)$rsq summary(model)$adjr2 models = c() for(i in 1:9){ print(which(summary(model)$which[i,]==TRUE)) f = paste(names(which(summary(model)$which[i,]==TRUE))[-1],collapse="+") print(f) models[[i]] = lm(paste("pKa~",f),data=dataset) } n = 9 model = regsubsets(pKa~.,data = dataset[-1],method="forward",nvmax=n) summary(model)$rsq summary(model)$adjr2 models = c() for(i in 1:n){ print(which(summary(model)$which[i,]==TRUE)) f = paste(names(which(summary(model)$which[i,]==TRUE))[-1],collapse="+") print(f) models[[i]] = lm(paste("pKa~",f),data=dataset) } model = regsubsets(pKa~.,data = dataset[-1],method="backward",nvmax=n) summary(model)$rsq summary(model)$adjr2 models = c() for(i in 1:n){ print(which(summary(model)$which[i,]==TRUE)) f = paste(names(which(summary(model)$which[i,]==TRUE))[-1],collapse="+") print(f) models[[i]] = lm(paste("pKa~",f),data=dataset) }