# We will create two functions(). One will use names of response (target) variable and names of features and returns an as.formula() object. Second will run the algorithm.
gen.fm = function(dep='price',x=features) {
  # This will create the 'dep~1' string - which is y = beta_0 model
  # paste() is a useful function... it links strings
  spec = paste(dep,'~1',sep='')
  # If there are no features - the vector 'x' has no elements - return
  if (is.null(x)) return(as.formula(spec))
  # Number of features
  NV = length(x)
  # Loop over all features - we add one feature at a time
  for (v in 1:NV) spec = paste(spec,'+',x[v],sep='')
  return(as.formula(spec))
}
# Now the algorithm with backward elimination:
bwelim <- function(dt=ocr,pt=0.1,dep='price',features=features) {
  library(sandwich)
  library(lmtest)
  pval  <- 1
  # We start by the initial full model
  spec  <- gen.fm(dep=dep,x=features)
  model <- lm(spec,data=dt)
  while (pval > pt) {
    estim <- coeftest(model,vcov=vcovHC(model,type='HC1'))
    idx   <- which(estim[,4] > pt)
    if (length(idx)>0) {
      to.remove <- rownames(estim)[which(estim[,4] == max(estim[,4]))]
      features  <- features[-which(features == to.remove)]
      new_spec  <- gen.fm(dep=dep,x=features)
      model     <- lm(new_spec,data=dt)
    } else {
      pval  <- pt - 0.01
    }
    #print(features)
  }
  return(model)
}
# We will create a function() that will run the algorithm.
fwelim <- function(dt=ocr,pt=0.1,dep='price',features=features) {
  # Number of features
  NV <- length(features)
  # We start by the baseline model with only a constant
  spec <- gen.fm(dep=dep,x=NULL)
  # Estimate model
  m <- lm(spec,data=dt)
  # Here we will store variables that 'pass' the test
  good.features <- c()
  # In forward elimination we need to start to add the 'best' variable at a time.
  # We loop over all variables - initially (v=1) we have NV variables to consider. Finally, only one is left.
  for (v in 1:NV) {
    # Remaining number of features
    NF <- length(features)
    # Store pvalues
    mat.pvals <- matrix(NA,nrow=NF,ncol=1)
    # We need to try to add one-at-a-time all remaining features
    for (f in 1:NF) {
      # Generate specification
      spec  <- gen.fm(dep=dep,x=c(good.features,features[f]))
      # Estimate model
      model <- lm(spec,data=dt)
      # Find p-values
      estim <- coeftest(model,vcov=vcovHC(model,type='HC1'))
      # Store p-values
      mat.pvals[f,1] <- estim[dim(estim)[1],4]
    }
    # Check if there is at least one pvalue lower as threshold - if not - we are done - Algo stops
    if (length(which(mat.pvals < pt))==0) break
    # Which variable has lowest p-value?
    add.var       <- features[(which(mat.pvals==min(mat.pvals)))][1]
    # We will add this variable to the set of good features
    good.features <- c(good.features,add.var)
    # We will remove this variable from the set of remaining features
    features      <- features[-which(features==add.var)]
    # Now we repeat the procedure with remaining features until we:
    # - either have only features that have a p-value above threshold
    # - or we are with one left
  }
  spec <- gen.fm(dep=dep,x=good.features)
  return(lm(spec,data=dt))
}
# We will create a function() that will run the algorithm.
bielim <- function(dt=ocr,pt=0.1,dep='price',features=features) {
  # Number of features
  NV <- length(features)
  # We start by the baseline model with only a constant
  spec <- gen.fm(dep=dep,x=NULL)
  # Estimate model
  m <- lm(spec,data=dt)
  # Here we will store variables that 'pass' the test
  good.features <- c()
  # Here we will store variable that should be tested
  test.features <- features
  # In forward elimination we need to start to add the 'best' variable at a time.
  # We loop over all variables - initially (v=1) we have NV variables to consider. Finally, only one is left.
  pvalue <- 1
  while (pvalue > pt) {
    # Remaining number of features
    NF <- length(test.features)
    
    # Forward elimination first
    # Store pvalues
    mat.pvals <- matrix(NA,nrow=NF,ncol=1)
    rownames(mat.pvals) <- test.features
    # We need to try to add one-at-a-time all remaining features
    for (f in 1:NF) {
      # Generate specification
      spec  <- gen.fm(dep=dep,x=c(good.features,test.features[f]))
      # Estimate model
      model <- lm(spec,data=dt)
      # Find p-values
      estim <- coeftest(model,vcov=vcovHC(model,type='HC1'))
      # Store p-values
      mat.pvals[f,1] <- estim[dim(estim)[1],4]
    }
    # Check if there is at least one pvalue lower as threshold - if not - we are done - Algo stops
    if (length(which(mat.pvals < pt))==0) break
    # Which variable has lowest p-value?
    add.var       <- rownames(mat.pvals)[which(mat.pvals==min(mat.pvals))]
    # We will add this variable to the set of good features
    good.features <- c(good.features,add.var)
    # We will remove this variable from the set of remaining features
    test.features <- features[-which(features %in% good.features)]
    
    # Backward elimination second
    tmp <- bwelim(dt=ocr,pt=pt,dep=dep,features=good.features)
    good.features <- names(tmp$coefficients[-1])
    test.features <- features[-which(features%in%good.features)]
    
  }
  spec <- gen.fm(dep=dep,x=good.features)
  return(lm(spec,data=dt))
}
csr.method <- function(spec,train=ocr,test=oct,q=3,cv=10,trim=0.2,nc=14) {
  
  library(foreach)
  library(doParallel)
  
  # Overall number of observations
  TT = dim(train)[1]
  # Number of variables
  NV = length(all.vars(spec)) - 1
  # Check the following condition
  if (q < 1 | q >= TT) return('Complexity parameter need to be higher as 0 and less as number of observations')
  # Models
  models = combn(NV,q)
  # Number of models
  NM = dim(models)[2]
  # Number of training observations
  NT = dim(train)[1]
  
  # 1) We need a place to store predictions for various forecasts
  # 2) We need a place to store forecast evaluations - used to select optimum models
  # 3) We need a place to store actual forecasts of models
  
  # -> 1) Store MSE
  grid.mse = array(NA,dim=c(NM,cv))
  dimnames(grid.mse)[[1]] = paste('Model_',1:NM,sep='')
  dimnames(grid.mse)[[2]] = paste('cv_',1:cv,sep='')
  # -> 2) Initialize multi core
  cl <- makeCluster(nc)
  registerDoParallel(cl)
  
  # We loop over different models
  TMP = foreach (m = 1:NM,.packages=c('glmnet')) %dopar% {
    
    mse.cv = c()
    
    for (v in 1:cv) {
      
      # Select validation sample
      valid.idx = seq(from=v,to=NT,by=cv)
      valid     = train[valid.idx,]
      # Select sub-training dataset
      train_cv  = train[-valid.idx,]
      
      # Estimate & predict model
      mDT = train_cv[,c(all.vars(spec)[1],all.vars(spec)[models[,m]+1])]
      
      # Mean square error
      mse.cv[v] <- mean((predict(lm(mDT),newdata=valid) -  valid[,all.vars(spec)[1]])^2)
    }
    
    return(mse.cv)
    
  }
  stopCluster(cl)
  
  for (m in 1:NM) grid.mse[m,] = TMP[[m]]
  
  # Average forecast error across cross-validation samples
  mean.mse = apply(grid.mse,1,mean)
  
  # Which ONE model led to best forecast?
  top.model = as.numeric(which(mean.mse == min(mean.mse)))
  mDT = train[,c(all.vars(spec)[1],all.vars(spec)[models[,top.model]+1])]
  pred.top.1 <- predict(lm(mDT),newdata=test)
  
  # Which 10 models led to best forecats?
  top.10 = as.numeric(which(rank(mean.mse)<=10))
  pred   = c()
  for (m in 1:10) {
    mDT = train[,c(all.vars(spec)[1],all.vars(spec)[models[,top.10[m]]+1])]
    pred <- cbind(pred,predict(lm(mDT),newdata=test))
  }
  pred.top.10 <- apply(pred,1,mean)
  
  # Which 100 models led to best forecats?
  top.100 = as.numeric(which(rank(mean.mse)<=100))
  pred   = c()
  for (m in 1:100) {
    mDT = train[,c(all.vars(spec)[1],all.vars(spec)[models[,top.100[m]]+1])]
    pred <- cbind(pred,predict(lm(mDT),newdata=test))
  }
  pred.top.100 <- apply(pred,1,mean,trim=0.2)
  
  # Combine into one output
  results = cbind(pred.top.1,pred.top.10,pred.top.100)
  return(results)
}