1 Chapter 1: Motivation

2 Chapter 2: Mathematics and Physics Foundations

3 Chapter 3: Time Complexity

The function fftshift() is useful for visualizing the Fourier transform with the zero-frequency component in the middle of the spectrum. Its inverse counterpart, ifftshift(), is needed to rearrange again the indices appropriately after the IFT is employed, so that the image is correctly reconstructed in spacetime. The FT only computes half of the frequency spectrum corresponding to the non-negative (positive and zero if the length(f) is odd) frequencies in order to save computation time. To preserve the dimensions of the output \(\hat{f}=FT(f)\), the second half of the frequency spectrum (the complex conjugate of the first half) is just added at the end of this vector. In a 1D setting, the result of fft() is:

\(0\ 1\ 2\ 3\ ...\ (freq\ bins > 0)\ ... {\frac{Fs}{2}}\) and \(-{\frac{Fs}{2}}\ ... \ (freq\ bins < 0)\ ...\ -3\ -2\ -1.\)

where \(F_s\) is the frequency sample. The fftshift method sets the zero-frequency component in the center of the array, i.e., it just shifts (offsets) the second part with the negative frequency bins to the beginning and the first part to the end of the resulting FT vector, or matrix. Thus, the shifted discrete FT can be nicely plotted in the center covering the frequency spectrum from \(-{\frac{Fs}{2}}\) on the left to \({\frac{Fs}{2}}\) on the right. This is not necessary, but is used for better visualization aesthetics. To synthesize back the correct image, after using fftshift on the FT signal, we always have to undo that re-indexing by using ifftshift() on the inverse-FT.

3.0.1 fftshift()

# FFT SHIFT
#' This function is useful for visualizing the Fourier transform with the zero-frequency 
#' component in the middle of the spectrum.
#' 
#' @param img_ff A Fourier transform of a 1D signal, 2D image, or 3D volume.
#' @param dim Number of dimensions (-1, 1, 2, 3).
#' @return A properly shifted FT of the array.
#' 
fftshift <- function(img_ff, dim = -1) {

  rows <- dim(img_ff)[1]    
  cols <- dim(img_ff)[2]
  # planes <- dim(img_ff)[3]

  swap_up_down <- function(img_ff) {
    rows_half <- ceiling(rows/2)
    return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
  }

  swap_left_right <- function(img_ff) {
    cols_half <- ceiling(cols/2)
    return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
  }
  
  #swap_side2side <- function(img_ff) {
  #  planes_half <- ceiling(planes/2)
  #  return(cbind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], img_ff[1:rows, 1:cols, 1:planes_half]))
  #}

  if (dim == -1) {
    img_ff <- swap_up_down(img_ff)
    return(swap_left_right(img_ff))
  }
  else if (dim == 1) {
    return(swap_up_down(img_ff))
  }
  else if (dim == 2) {
    return(swap_left_right(img_ff))
  }
  else if (dim == 3) {
    # Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
    # install.packages("abind")
    library(abind)
    
    planes <- dim(img_ff)[3]
    rows_half <- ceiling(rows/2)
    cols_half <- ceiling(cols/2)
    planes_half <- ceiling(planes/2)
    
    img_ff <- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)], 
                    img_ff[(1:rows_half), (1:cols), (1:planes)], along=1)
    img_ff <- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)], 
                    img_ff[1:rows, 1:cols_half, (1:planes)], along=2)
    img_ff <- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], 
                    img_ff[1:rows, 1:cols, 1:planes_half], along=3)
    return(img_ff)
  }
  else {
    stop("Invalid dimension parameter")
  }
}

3.0.2 ifftshift()

# IFFT SHIFT
#' This function is useful for moving back the zero-frequency component in the middle of the spectrum
#' back to (0,0,0).  It rearranges in reverse (relative to fftshift()) the indices appropriately,
#' so that the image can be correctly reconstructed by the IFT in spacetime
#' 
#' @param img_ff An Inverse Fourier transform of a 1D signal, 2D image, or 3D volume.
#' @param dim Number of dimensions (-1, 1, 2, 3).
#' @return A properly shifted IFT of the input array.
#' 
ifftshift <- function(img_ff, dim = -1) {
  rows <- dim(img_ff)[1]    
  cols <- dim(img_ff)[2]    

  swap_up_down <- function(img_ff) {
    rows_half <- floor(rows/2)
    return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
  }

  swap_left_right <- function(img_ff) {
    cols_half <- floor(cols/2)
    return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
  }

  if (dim == -1) {
    img_ff <- swap_left_right(img_ff)
    return(swap_up_down(img_ff))
  }
  else if (dim == 1) {
    return(swap_up_down(img_ff))
  }
  else if (dim == 2) {
    return(swap_left_right(img_ff))
  }
  else if (dim == 3) {
    # Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
    # install.packages("abind")
    library(abind)
    
    planes <- dim(img_ff)[3]
    rows_half <- floor(rows/2)
    cols_half <- floor(cols/2)
    planes_half <- floor(planes/2)
    
    img_ff <- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], 
                    img_ff[1:rows, 1:cols, 1:planes_half], along=3)
    img_ff <- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)], 
                    img_ff[1:rows, 1:cols_half, (1:planes)], along=2)
    img_ff <- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)], 
                    img_ff[(1:rows_half), (1:cols), (1:planes)], along=1)
    return(img_ff)
  }
  else {
    stop("Invalid dimension parameter")
  }
}

3.0.3 fftinv()

# Implicitly Invert the FT (IFT)
#' This function does the IFT and scales appropriately the  result to ensure IFT(FT()) = I()
#' 
#' @param x FT of a dataset.
#' @return The IFT of the input array.
#'
fftinv <- function( x ) { 
  fft( x, inverse=TRUE ) / length( x ) 
  }

4 Chapter 4: Inferential Uncertainty

5 Chapter 5: Applications

5.0.1 ff_harmonics()

# Compute the spectral decomposition of an array (harmonics)
#' This function computes the FT of the singlal and plots the first few harmonics
#' 
#' @param x Original signal (1D, 2D, or 3D array).
#' @param n Number of first harmonics to report (integer).
#' @param up Upsamping rate (default=10).
#' @param plot Boolean indicating whether to print the harmonics plot(default==TRUE).
#' @param add whether to overplot the harmonics on an existing graph (default=FALSE), 
#' @param main Title for the plot.
#' @return A plot and a dataframe with the sampled harmonics and their corresponding FT magnitudes/amplitudes.
#'
ff_harmonics = function(x=NULL, n=NULL, up=10L, plot=TRUE, add=F, main=NULL, ...) {
  # The discrete Fourier transformation
  dff = fft(x)
  # time
  t = seq(from = 1, to = length(x))
  
  # Upsampled time
  nt = seq(from = 1, to = length(x)+1-1/up, by = 1/up)
 
  #New spectrum
  ndff = array(data = 0, dim = c(length(nt), 1L))
  ndff[1] = dff[1] # mean, DC component
  if(n != 0){
    ndff[2:(n+1)] = dff[2:(n+1)] # positive frequencies come first
    ndff[length(ndff):(length(ndff) - n + 1)] = dff[length(x):(length(x) - n + 1)] # negative frequencies
  }
  
  # Invert the FT
  indff = fft(ndff/length(y), inverse = TRUE)
  idff = fft(dff/length(y), inverse = TRUE)
  if(plot){
    if(!add){
      plot(x = t, y = x, pch = 16L, xlab = "Time", ylab = "Measurement", 
          col = rgb(red = 0.5, green = 0.5, blue = 0.5, alpha = 0.5),
          main = ifelse(is.null(main), paste(n, "harmonics"), main))
      lines(y = Mod(idff), x = t, col = adjustcolor(1L, alpha = 0.5))
    }
    lines(y = Mod(indff), x = nt, ...)
  }
  ret = data.frame(time = nt, y = Mod(indff))
  return(ret)
}

5.0.2 fftshift1D()

# A special case of imlementaiton of `fftshift` for 1D arrays
#' This function is useful for visualizing the 1D Fourier transform with the zero-frequency 
#' component in the middle of the spectrum.
#' 
#' @param img_ff A Fourier transform of a 1D signal.
#' @return A properly shifted FT of the 1D array.
#' 
fftshift1D <- function(img_ff) {
  rows <- length(img_ff)   
  rows_half <- ceiling(rows/2)
  return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}

5.0.3 trainIndex()

#' Drawing the indices of traing data 
#' 
#' @param mat A matrix.
#' @param proportion A number represents proportion of training set to the whole
#'                   datset. By default, proportion = 0.6.
#' @return The 60% of randomly selected row indices of \code{mat}.
trainIndex <- function(mat, proportion = 0.6) {
  len <- nrow(mat)
  seq <- seq(1:len)
  return( sample( seq, as.integer( len * proportion ) )  )
}

5.0.4 tuning.lambda()

#' Tune lambda for LASSO regression.
#' 
#' @param cv.LASSO A cv.glmnet object.
#' @param option A number.
#' @param k A number.
#' @return A list of \code{lambda}.
tuning.lambda <- function(cv.LASSO,option,k = NULL) {
  lambmat <- cbind(cv.LASSO$glmnet.fit$df,cv.LASSO$glmnet.fit$lambda)
  result <- tapply(lambmat[,2],lambmat[,1],max)
  kresult <- result[which(names(result)==as.factor(k))]
  if(option==1) { return(result) }
  else if (option==2) {return(kresult)}
  else (return("Not a valid option"))
}

5.0.5 showfeatures()

#' Show the features.
#' 
#' @param object A number.
#' @param lambda A number.
#' @param k A number.
#' @return The sum of \code{x} and \code{y}.
#' @examples
showfeatures <- function(object,lambda,k ,...) {
  lam<-lambda[which(names(lambda)==as.factor(k))]
  beta <- predict(object, s = lam, type = "coef")
  if(is.list(beta)) {
    out <- 
      do.call("cbind", lapply(beta, function(x) x[,1])) %>%
      as.data.frame()
    s <- rowSums(out)
    out <- out[which(s)!=0,,drop=FALSE]
  } else  {
    out<-data.frame(Overall = beta[,1])
    out<-out[which(out!=0),,drop=FALSE]
  }
  out <- abs(out[rownames(out) != "(Intercept)",,drop = FALSE])
  return( out )
}

5.0.6 ML_comparison()

#' Compare prediction performance of different machine learning algorithms
#' 
#' @param fitlas A glmnet LASSO object. 
#' @param cvlas A cv.glmnet LASSO object.
#' @param train A training dataset.
#' @param test A test dataset.
#' @param option A number.
#' @return A table of prediction accuracy of Bagging, Random Forest, Adaboost,
#'         Logistic Regression and Support Vector Machine with diffrent number 
#'         of features.
ML_comparison <- function(fitlas,cvlas,train,test,option=1) {
  allfeat <- as.numeric(names(tuning.lambda(cv.LASSO = cvlas,option = 1)))
  allfeat <- allfeat[which(allfeat>4)]
  # Build an empty data.frame to store the results
  resultframe <- data.frame( origin = rep(NA,length(allfeat) ) )
  rownames(resultframe) <- allfeat
  resultframe %>%
    mutate(Bagging =  rep(NA,length(allfeat) ),
           RandomForest = rep(NA,length(allfeat)),
           Adaboost = rep(NA,length(allfeat)),
           GLM = rep(NA,length(allfeat)),
           SVM = rep(NA,length(allfeat))       )
  trainlist <- as.list(NULL)
  # Fit all the ML models for each number of features and store in the dataframe
  for (i in 1:length(allfeat)) {
    # Set up training data in this step
    index = 
      which(colnames(train) %in% 
              c( row.names(
                showfeatures(fitlas,
                             tuning.lambda(cv.LASSO = cvlas,1),
                             allfeat[i])),"Rank") )
    trainlist[[i]] <- train[, index]
    # Switch between different ML algorithms.
    for (j in 1:5){
      aaa = i;bbb = j
      set.seed(1234)
      # An optional step for SVM
      if (j == 5){
        svmtune <- tune.svm(Rank~.,data = trainlist[[i]],
                            gamma = 10^(-6:1),cost = 10^(-10:10))   }
      model =
        switch(j, # j = 1: bagging; 2: Random Forest; etc.
               ipred::bagging(Rank~.,data = trainlist[[i]],nbagg=100),
               randomForest(Rank~.,data=trainlist[[i]]),
               ada(Rank~.,data = trainlist[[i]],iter=50),
               glm(Rank~.,data = trainlist[[i]],family = "binomial"),
               svm(Rank~.,data=trainlist[[i]],
                   gamma=svmtune$best.parameters[1],
                   cost=svmtune$best.parameters[2]) )
      test_result = predict(model,eutest)
      # An optional step for GLM model
      if (j == 4){ # Logistic requires an extra step after prediction
        test_result = predict(model,eutest)
        test_result <- ifelse(test_result < 0, 0, 1 ) 
      } else {
          test_result = predict(model,eutest)
          }
      modelagg = (test_result == eutest$Rank)
      accuracy = prop.table(table(modelagg))[c("TRUE")]
      switch(j,
             { resultframe$Bagging[i] <- accuracy },
             { resultframe$RandomForest[i] <- accuracy },
             { resultframe$Adaboost[i] <- accuracy },
             { resultframe$GLM[i]<-accuracy },
             { resultframe$SVM[i]<-accuracy } )
    }
  }
  resultframe$origin<-NULL # Drop placeholder column origin
  if(option==1){ return(resultframe) }
}

5.0.7 cluster.feat()

#' Generate results to be used for visualization of clustering
#' 
#' @param N_features An integer. 
#' @return A list of results to be used to visualize with different number of
#'         features.
cluster.feat = function(N_features = 5){
  features.show = 
    showfeatures(fit.LASSO,tuning.lambda(cv.LASSO,1),N_features)
  feat_N =  
    predict(fit.LASSO, s = tuning.lambda(cv.LASSO,2,N_features), 
            newx = data.matrix(X) ) 
  table.feat_N = 
    as.data.frame(rbind(as.numeric(feat_N),Y),row.names = c("predict","original")) 
  r_feat_N <-
    rbind(
      cbind(as.character(country.Name),"original",as.numeric(feat_N)),
      cbind(as.character(country.Name),"predict",Y) ) %>%
    as.data.frame() %>%
    transmute("country" = V1, "res" = V2, "rank" = as.numeric(Y))
  
  LASSOdiff = 
    data.frame(country = country.Name, 
               difference = as.numeric( as.numeric(feat_N)-Y ) )
  feat_N.MSE = mean((feat_N - Y)^2)
  cluster_N <-
    X[,which(colnames(X) %in% 
               row.names(
                 showfeatures(fit.LASSO,tuning.lambda(cv.LASSO,1),N_features)))]
  rownames(cluster_N) <- country.Name
  
  # Hierarchical Clustering
  scaled_cluster_N <- scale(cluster_N)
  ##deal with NAN values
  #scaled_country<-scaled_country[,which(is.nan(scaled_country[1,])==FALSE)]
  dis_SC_N <- dist(scaled_cluster_N)
  H_clust_SC_N <- hclust(dis_SC_N)
  H_clust_SC_N <- eclust(scaled_cluster_N,k=5,"hclust")
  H_clusters_SC_N <- H_clust_SC_N$cluster
  
  # create plot based on hierachical clustering
  Matrix_Lasso_N <-
    data.frame( 
      predict = as.numeric(feat_N),original = Y,
      groups = as.factor(H_clusters_SC_N)  )
  table.N <- as.data.table(Matrix_Lasso_N)
  hullsN <- table.N[, .SD[chull(predict, original)], by = "groups"]
  pamN <- pam(cluster_N,k = 5,stand = TRUE)
  
  #K-Means Clustering
  NAN_scaled_cluster_N<-
    scaled_cluster_N[,which(is.nan(scaled_cluster_N[1,])==FALSE)]
  gap_statN = 
    clusGap(NAN_scaled_cluster_N, 
            FUN = kmeans, nstart = 25, K.max = 10, B = 500)
  gap_statN.fviz_gap_stat = fviz_gap_stat(gap_statN)
  # Take K=8 as default
  KM_cluster <- kmeans(NAN_scaled_cluster_N,8,nstart = 25)
  result_KM<-
    data.frame( "Overall" = Y,
                "predictive groups" = KM_cluster$cluster )
  table.KMeans = 
    as.data.frame(result_KM)[order(as.data.frame(result_KM)$Overall),]
  
  return(
    list(feat_N.MSE = feat_N.MSE,features.show = features.show, 
         table.feat_N = table.feat_N, table.KMeans = table.KMeans, 
         pamN = pamN, NAN_scaled_cluster_N = NAN_scaled_cluster_N,
         gap_statN.fviz_gap_stat = gap_statN.fviz_gap_stat,
         r_feat_N = r_feat_N, LASSOdiff = LASSOdiff, 
         KM_cluster = KM_cluster, cluster_N = cluster_N,
         H_clust_SC_N = H_clust_SC_N, Matrix_Lasso_N = Matrix_Lasso_N,
         hullsN = hullsN, gap_statN = gap_statN ) )
}

5.0.8 wplot()

#' This function plots the within cluster sum of squares
#' 
#' @param data A data matrix.
#' @param nc An integer (number of clusters).
#' @param seed An integer.
#' @return A plot of Within Group Sum of Squares against  number of clusters 
wplot <- function(data, nc=15, seed=319){
    wss <-c(NA, (nrow(data)-1)*sum(apply(data,2,var)))
    for (i in 2:nc){
      set.seed(seed)
      wss[i] <- sum(kmeans(data, centers=i)$withinss)} 
    plot(1:nc, wss, type="b", 
         xlab="Number of Clusters",
         ylab="Within groups sum of squares")
    }

5.0.9 coef_plot()

#' This function plots the coefficients
#' 
#' @param betahat A vector that stores estimated coefficients.
#' @param varn A vector that stores names for the coefficients.
#' @param plotname A string that to be used as the plot name.
#' @return A plot of coefficients.
coef_plot <- function(betahat,varn,plotname) {
  betahat <- betahat[-1]
  P <-
    coefplot(betahat[which(betahat!=0)],
             sd = rep(0, length(betahat[which(betahat!=0)])),
             pch=0, cex.pts = 3, col="red", main = plotname,
             varnames = varn[which(betahat!=0)] )
  return(P)
}

5.0.10 findfeatures()

#' This function combines the features selected by LASSO and the 10 most import features   
#' suggested by RIDGE regression.
#' 
#' @param beta.lasso A vector of coefficients by LASSO regression.
#' @param beta.ridge A vector of coefficients by RIDGE regression.
#' @param k A number indicating number of important features to be selected by Ridge.
#' @return The union the features selected by LASSO and the k most import features   
#'         suggested by RIDGE regression.
findfeatures <- function(beta.lasso, beta.ridge, k = 10) {
  if (k >= length(beta.ridge)){ 
    stop("k should be less than the length of beta.ridge!")
    }
  beta.lasso <- beta.lasso[-1]
  feat1 <- which(beta.lasso!=0)
  beta.ridge <- beta.ridge[-1]
  feat2 <- order( abs(beta.ridge), decreasing = TRUE )[1:k]
  features <- union(feat1,feat2)
  return(features)
}

5.0.11 varImp()

#' This function takes a fitted model and return the coefficients.
#' 
#' @param object A fiited model, could be LASSO.
#' @param lambda A number, tuning parameter if object is LASSO.
#' @return A dataframe consits of coefficients for features selected by LASSO.
varImp <- function(object, lambda = NULL, ...) {
  ## skipping a few lines
  beta <- predict(object, s = lambda, type = "coef")
  # Extract beta's for LASSO regression
  if(is.list(beta)) {
    out <- do.call("cbind", lapply(beta, function(x) x[,1]))
    out <- as.data.frame(out)
    s <- rowSums(out)
    out <- out[which(s)!=0,,drop=FALSE]
  } else  {
    out <- data.frame(Overall = beta[,1])
    out <- out[which(out!=0),,drop=FALSE]
  }
  out <- abs(out[rownames(out) != "(Intercept)",,drop = FALSE])
  return(out)
}

5.0.12 countrydata()

#' This function extracts data associated with a certain country from the dataset
#' 
#' @param country A integer that represents a country.
#' @return Time and feature data related to the country specified.
countrydata <- function(country) {
  return( t(as.matrix(eu_3D_array[country,,])) )
}

5.0.13 select.response()

#' This function selets a column as response variable in analysis.
#' 
#' @param countrydata A data matrix.
#' @param k A number represents a column index.
#' @return A vector, i.e.,a column of data where NA's are filled with mean 
#'         of the rest values.
select.response <- function(countrydata,k) {
  data <- countrydata[,k]
  data[is.na(data)] <- mean(data,na.rm = T)
  return(data)
}

5.0.14 select.predictors()

#' This function excludes the specified column and select the rest as 
#' the predictors from a data matrix for analysis.
#' 
#' @param countrydata A data matrix.
#' @param k A number represents a column index.
#'                   datset. By default, proportion = 0.6.
#' @return A predictor matrix where all NA's are filled
select.predictors <- function(countrydata,k) {
  matrix <- countrydata[,-k]
  for (i in 1:ncol(matrix)) {
    matrix[is.na(matrix[,i]),i] <- mean(matrix[,i],na.rm = T)
  }
  colnames(matrix)<-c(1:ncol(countrydata))[-k]
  # remove features with only NA/NAN value
  matrix = matrix[,which(is.nan(matrix[1,])==FALSE)]
  return(matrix)
}

5.0.15 extract.PC()

#' This function extracts PCs that explaines more than a1 proportion of  the variance. 
#' After PCA, reweight each PC according to proportion of variance explained and then
#' get the weights of remaining weights(importance of) within the selected PCs. Last, 
#' choose the features with the highest importance (sum greater than a2).
#' 
#' @param mat A data matrix.
#' @param a1 A number, which reprents a threshold of variance explained to conduct PCA.
#' @param a2 A number that reprents a threshold to select features from weights of features.
#' @return Names of features that satisfy the requirement sepecified.

extract.PC <- function(mat,a1,a2) {
  result.PCA <- prcomp(mat)
  imporance.PCA <- summary(result.PCA)$importance
  # Set a threshold and use it to select certain number of principal components 
  Num.PC <- min( which(imporance.PCA[3,] > a1) )
  # Extract the loadings of selected PCs
  loadings.PCA <-result.PCA$rotation[,1:Num.PC]
  # Reweight each PC according to proportion of variance explained
  weights.PC <- imporance.PCA[2,1:Num.PC]/sum(imporance.PCA[2,1:Num.PC])
  importance.features <- crossprod( t(loadings.PCA)^2,weights.PC )
  colnames(importance.features) <- c("importance")
  # Order remaining features in descending order
  importance.features <- 
    importance.features[rev(order(importance.features[,1])),]
  # Return features that weighs more than a2 percent in the data after PCA
  for (i in 1:length(importance.features)) {
    if (Reduce(`+`, importance.features[1:i]) > a2){
      return( as.numeric(names(importance.features[1:i])) )
    } 
  }
}

5.0.16 listfeat()

#' This function extracts a list of features that satisfy extract.PC()'s (a1* a2)
#' from 0.8*0.8 to 0.99*0.99 
#' 
#' @param mat A matrix.
#' @return A list of feature names.
listfeat <- function(mat) {
  featlist<-list(c(0))
  for (i in 80:99) {
    newlist<-list(c(extract.PC(mat,i/100,i/100)))
    if (identical(newlist[[1]],featlist[[length(featlist)]])==FALSE){
      featlist=c(featlist,newlist)
      }
  }
  featlist[[1]]<-NULL
  return(featlist)
}

5.0.17 bestARIMAX()

#' This function uses the least square method to choose features, taking into account
#' the effect of seasonal features
#' 
#' @param timeseries A data matrix.
#' @param featmatrix A matrix of features.
#' @param seamatrix A matrix of seasonal features.
#' @return The forcasting result.
bestARIMAX <- function(timeseries,featmatrix,seamatrix) {
  featlist <- listfeat(featmatrix)
  forresult <- NULL
  tsdata <- ts(timeseries,frequency = 1)
  for (i in 1:length(featlist)) {
    timematrix <-
      featmatrix[,which(colnames(featmatrix) %in% as.numeric(featlist[[i]]))]
    timematrix <- cbind(timematrix,seamatrix)
    ARIMAX <- auto.arima(tsdata[1:60],xreg = timematrix[1:60,])
    fore <- forecast(ARIMAX,xreg = timematrix[61:72,],h=12)
    result <- sum((as.numeric(tsdata[61:72])-as.numeric(fore$mean))^2)
    forresult <- cbind(forresult,result)
  }
  colnames(forresult)<-seq(1:length(featlist))
  return(forresult)
}

5.0.18 choosebestARIMAX()

#' This function chooses the best ARIMAX model
#' 
#' @param timeseries A data matrix.
#' @param featmatrix A matrix of features.
#' @param seamatrix A matrix of seasonal features.
#' @return The best ARIMAX model.
choosebestARIMAX <- function(timeseries,featmatrix,seamatrix,k) {
  featlist <- listfeat(featmatrix)
  tsdata <- ts(timeseries,frequency = 1)
  timematrix <-
    featmatrix[,which(colnames(featmatrix) %in% as.numeric(featlist[[k]]))]
  timematrix <- cbind(timematrix,seamatrix)
  ARIMAX <- auto.arima(tsdata[1:60],xreg = timematrix[1:60,])
  return(ARIMAX)
}

5.0.19 ARIMAXforecast()

#' This function makes the ARIMAX forecast
#' 
#' @param timeseries A data matrix.
#' @param featmatrix A matrix of features.
#' @param seamatrix A matrix of seasonal features.
#' @param k A number that stands for the column index.
#' @return The ARIMAX model forcasting result.
ARIMAXforecast <- function(ARIMAX,featmatrix,seamatrix,k) {
  featlist <- listfeat(featmatrix)
  timematrix <-
    featmatrix[,which(colnames(featmatrix) %in% as.numeric(featlist[[k]]))]
  timematrix <- cbind(timematrix,seamatrix)
  fore <- forecast(ARIMAX,xreg = timematrix[61:72,],h=12)
  return(fore)
}

5.0.20 splinecreate()

#' This function uses spline regression model to expand the dataset
#' Here we will use the spline model to create independent smooth regression model 
#' for each features. Then estimate new prediction points inside the original time 
#' series points. Thus expands the original quarterly data into a weekly data. 
#' During the process, a Gaussian noise will be added onto prediction values to 
#' make the prediction data more reasonable.
#' To deal with the problem of rank deficiency, we'll use full-rank matrix in the ARIMAX model. 
#' This means some more features will be eliminated from the original model. 
#' Moreover, a parameter k would be used to narrow down the variance of the 
#' Gaussian noise we apply artifically, in order to get a better-fitted model.
#' 
#' @param mat A matrix.
#' @param k A number that helps control the variance of the Gaussian noise.
#' @return A matrix that contains the regression spline results for each feature.
splinecreate <- function(mat,k) {
  res<-NULL
  for (i in 1:ncol(mat)) {
    sp <- smooth.spline(seq(1:72),mat[,i])
    spresult <- predict(sp,seq(from=0,by=1/13,length.out = 936))
    spfeat <- spresult$y + rnorm(936,sd=(sd(spresult$y))/k)
    res <- cbind(res,spfeat)
  } 
  colnames(res) <- colnames(mat)
  return(res)
}

5.0.21 cleardata()

#' This function imputes missing values for a dataset
#' 
#' @param mat A matrix to be cleaned.
#' @return A matrix whose NA's are all filled with column mean of the rest items.
cleardata <- function(mat) {
  for (i in 1:ncol(mat)) {
    mat[is.na(mat[,i]),i]<-mean(mat[,i],na.rm = T)
  }
  #remove data with only NA/NAN value
  mat = mat[,which(is.nan(mat[1,])==FALSE)]
  
  ## Below is a version that utilizes R's vectorization property
  # mat = 
  #   apply(mat, 2, function(column) 
  #     sapply(column, function(x) 
  #       ifelse(is.na(x),
  #              mean(column,na.rm = T) + 
  #                rnorm( sum(is.na(column) ), sd = sd(column, na.rm = T) ),
  #              x) ) )
  return(mat)
}

5.0.22 fullrank()

#' This function gets a full-rank sub-matrix to be used in analysis
#' 
#' @param mat A matrix.
#' @return A full-rank matrix
fullrank <- function(mat) {
  return(mat[,qr(mat)$pivot[seq_len(qr(mat)$rank)]])
}

5.0.23 rmv_miss_ftr()

#' This function removes features with all missing values
#'
#' This function deletes features that are missing for all time points 
#' associated with a certain country
#' @param countryName give a country name that is shown on the original dataset
rmv_miss_ftr = function(countryName = "Bulgaria"){
  DataSuperSample = 
    time_series %>% 
    filter(country == countryName) %>%
    select_if( function(col) sum(is.na(col)) != length(col) ) %>%
    select(-time, -country) %>%
    as.matrix() %>%
    cleardata() %>%
    as.data.frame()%>%
    # remove feature that only has one value
    select_if(function(col) sum(is.na(col)) == 0 ) %>%
    # remove feature that all the values are 0
    select_if(function(col) sum(col) != 0 ) %>%
    splinecreate() %>%
    as.data.frame()
  return(DataSuperSample)
}

##Concern: The name of country "German" has changed inside the dataset once.

5.0.24 Fit_ARIMA()

#' This function cleans the data and fits the ARIMA model
#'
#' @param country Give a country name that is shown on the original dataset
#' @param start Select the start year to create the ARIMA model
#' @param end Select the end year to create the ARIMA model
#' @param frequency The number of observations per unit of time. The same in function "ts"
#' @param feature Choose one feature to create the ARIMA model on
Fit_ARIMA <- function(country = 'Belgium', 
                      start = 2000, end = 2017, frequency = 20,
                      feature="Unemployment , Females, From 15-64 years, Total")
{
  # delete features that are missing for all time points
  DataSuperSample = rmv_miss_ftr(countryName = country)
  if (feature=="Unemployment , Females, From 15-64 years, Total") {
      Y = select(DataSuperSample, "Unemployment , Females, From 15-64 years, Total")
      X = select(DataSuperSample, -starts_with("Unemployment"))
  } else if (feature=="Gross domestic product at market prices") {
    Y = select(DataSuperSample, "Gross domestic product at market prices"); dim(Y)
    X = select(DataSuperSample, -matches("debt|Debt")); dim(X)  # 360 167
    print(paste0("dim(X)=(", dim(X)[1], ",", dim(X)[2], ");  ",
                 " dim(Y)=(", dim(Y)[1], ",", dim(Y)[2], ") ..."))
    # ensure full-rank design matrix, X
    X <- X[, qr(X)$pivot[seq_len(qr(X)$rank)]]; dim(X)  
  }
  else {
    print(paste0("This feature ", feature, " is not imlemented yet! Exiting Fit_ARIMA() method ...")) 
    return(NULL)
  }
  ts_Y <- ts(Y, start=c(start, 1), end=c(end, frequency), 
             frequency = frequency); length(ts_Y)
  set.seed(1234)
  fitArimaX = auto.arima(ts_Y, xreg=as.matrix(X) )
  return(fitArimaX)
}

5.0.25 preprocess_ARIMA()

#' This function preprocesses dataset of given countries to ensure full rank
#' Then, extend the Fit-ARIMA method to ensure testing-training 
#' modeling/assessment for 2 countries works
#'
#' @param country Give a country name that is shown on the original dataset
#' @param start Select the start year to create the ARIMA model
#' @param end Select the end year to create the ARIMA model
#' @param frequency The number of observations per unit of time. The same in function "ts"
#' @param feature Choose one feature to create the ARIMA model on
preprocess_ARIMA <- 
  function(country='Belgium',start=2000, end=2017, frequency=20,
           feature="Unemployment , Females, From 15-64 years, Total"){
  # delete features that are missing for all time points
  DataSuperSample = rmv_miss_ftr(countryName = country)

  print(paste0("Processing feature: ...", feature, "... "))
      
  if (feature == "Unemployment , Females, From 15-64 years, Total") {
      Y = select(DataSuperSample, 
                 "Unemployment , Females, From 15-64 years, Total")
      X = select(DataSuperSample, 
                 -starts_with("Unemployment"))
  } else if (feature == "Gross domestic product at market prices") {
      Y = select(DataSuperSample, 
                 "Gross domestic product at market prices"); dim(Y)
      X = select(DataSuperSample, 
                 -matches("debt|Debt") )
      X <- X [, -c(50:80)]; dim(X)  # 360 167
  } else {
    print(paste0("This feature: ...", 
                 feature, 
                 "... is not imlemented yet! Exiting preprocess_ARIMA() method ...")) 
    return(NULL)
  }
  # reduce the number of observations (matrix rows) to specified timerange
  len_1 <- (end + 1 - start) * frequency; print(paste0("dim(X)[1]=", len_1))
  X <- X[1:len_1 , qr(X[1:len_1 , ])$pivot[seq_len(qr(X[1:len_1 , ])$rank)]]; dim(X)  
  # ensure full-rank design matrix, X
  Y <- as.data.frame(Y[1:len_1 , ])
  print(paste0("dim(X)=(", dim(X)[1], ",", dim(X)[2], ");  ",         # 300 136
                 " dim(Y)=(", dim(Y)[1], ",", dim(Y)[2], ") ..."))    # 300 1
  return( list("X"=X, "Y"=Y) )
}

5.0.26 homologousX_features()

#' A general function that ensures the XReg predictors for 2 countries are homologous
#'
#' @param X_Country1 value inhereted from previous function "preprocess_ARIMA"
#' @param X_Country2 value inhereted from previous function "preprocess_ARIMA"
homologousX_features <- function (X_Country1, X_Country2){
  # Check if Xreg for Belgium and Bulgaria are homologous (same feature columns)
  common_cols <- intersect(colnames(X_Country1), colnames(X_Country2))
  X_Country1 <- subset(X_Country1, select = common_cols)
  X_Country2 <- subset(X_Country2, select = common_cols)
  print(paste0("dim(X1)=(", dim(X_Country1)[1], ",", dim(X_Country1)[2], ");  ", # 300 131
              " dim(X2)=(", dim(X_Country2)[1], ",", dim(X_Country2)[2], ")!"))  # 300 131
  return(list("X_Country1"=X_Country1, "X_Country2"=X_Country2))
}

5.0.27 fit_ARIMA()

#' This function sets up ARIMAX model with homologous features
#' It is based on two previous functions "preprocess_ARIMA" and "homologousX_features".
#' It gets the homologous features between two countries and create ARIMAX model on them.
#'
#' @param country1 Give a country name that is shown on the original dataset, ARIMAX model will be 
#' created based on the data of this country
#' @param country2 Give a country name that is shown on the original dataset, this country data will 
#' only be used to get the homologous features with country 1
#' @param start Select the start year to create the ARIMA model
#' @param end Select the end year to create the ARIMA model
#' @param frequency The number of observations per unit of time. The same in function "ts"
#' @param feature Choose one feature to create the ARIMA model on
fit_ARIMA <- 
  function(country1='Belgium', country2= 'Bulgaria', 
           start=2000, end=2014, frequency=20,
           feature="Gross domestic product at market prices"){
  # This function 
  preprocess_Country1 <- preprocess_ARIMA(country = country1, 
                    start=start, end=end, frequency=frequency, feature=feature)
  preprocess_Country2 <- preprocess_ARIMA(country = country2, 
                    start=start, end=end, frequency=frequency, feature=feature)
  ts_Y_Country1 <- ts(preprocess_Country1$Y, start=c(start, 1), 
            end=c(end, frequency), frequency = frequency); length(ts_Y_Country1)
  
  homoFeat <- homologousX_features(preprocess_Country1$X, preprocess_Country2$X)
  X_Country1  <- homoFeat$X_Country1
  X_Country2 <- homoFeat$X_Country2
  
  set.seed(1234)
  fitArimaX_Country1 = auto.arima(ts_Y_Country1, xreg=as.matrix(X_Country1))
  return(fitArimaX_Country1)
}

5.0.28 kSpaceTransform()

#' A generic function to Transform Data ={all predictors (X) and outcome (Y)} to k-space (Fourier domain)
#' For ForwardFT, set parameters as (rawData, FALSE, NULL)
#' For InverseFT, there are two parameters setting: (magnitudes, TRUE, reconPhasesToUse) or (FT_data, TRUE, NULL)
#'
#' @param data dataset that needs K-space transformation
kSpaceTransform <- function(data, inverse = FALSE, reconPhases = NULL) {
  # ForwardFT (rawData, FALSE, NULL)
  # InverseFT(magnitudes, TRUE, reconPhasesToUse) or InverseFT(FT_data, TRUE, NULL)
  FT_data <- array(complex(), c(dim(data)[1], dim(data)[2]))
  mag_FT_data <- array(complex(), c(dim(data)[1], dim(data)[2]))
  phase_FT_data <- array(complex(), c(dim(data)[1], dim(data)[2]))
  IFT_reconPhases_data <- array(complex(), c(dim(data)[1], dim(data)[2]))

  for (i in 1:dim(data)[2]) {
    if (inverse == FALSE | is.null(reconPhases)) {
      FT_data[ , i] <- fft(data[ , i], inverse)
      X2 <- FT_data[ , i]
      # plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))") 
      mag_FT_data[ , i] <- sqrt(Re(X2)^2+Im(X2)^2); 
      # plot(log(fftshift1D(Re(mag_FT_MCSI_data))), main = "log(Magnitude(FFT(timeseries)))") 
      phase_FT_data[ , i] <- atan2(Im(X2), Re(X2)); 
      # plot(Re(fftshift1D(phase_FT_MCSI_data[ , 1])), main = "Shift(Phase(FFT(timeseries)))")
    }
    else {  # for IFT synthesis using user-provided Phases, typically from kime-phase aggregators
      Real <- data[ , i] * cos(reconPhases[ , i])  
      Imaginary <- data[ , i] * sin(reconPhases[ , i]) 
      IFT_reconPhases_data[ ,i] <- 
          Re(fft(Real+1i*Imaginary, inverse = TRUE)/length(data[ , i]))
    }
  }
    ######### Test the FT-IFT analysis-synthesis back-and-forth transform process 
    #         to confirm calculations
    # X2 <- FT_data[ , 1]; mag_FT_data[ , 1] <- sqrt(Re(X2)^2+Im(X2)^2); 
    # phase_FT_data[ , 1] <- atan2(Im(X2), Re(X2)); 
    # Real2 = mag_FT_data[ , 1] * cos(phase_FT_data[ , 1])
    # Imaginary2 = mag_FT_data[ , 1] * sin(phase_FT_data[ , 1])
    # man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
    # ifelse(abs(man_hat_X2[5] - data[5, 1]) < 0.001, "Perfect Syntesis", "Problems!!!")
    #########
  
    if (inverse == FALSE | is.null(reconPhases)) {
      return(list("magnitudes"=mag_FT_data, "phases"=phase_FT_data))
      # Use kSpaceTransform$magnitudes & kSpaceTransform$phases to retrieve teh Mags and Phases
    }
    else {
      return(IFT_reconPhases_data)
      # Use Re(kSpaceTransform) to extract spacetime Real-valued reconstructed data
    }
}

5.0.29 changetitle

#' Reshape the title
#'
#' @param titl vector contains titles that need to be reshaped
changetitle <- function(titl) {
  newtitle<-rep(NA,length(titl))
  for (i in 1:length(titl)) {
    tempchar<-substring(titl[i],1:nchar(titl[i]),1:nchar(titl[i]))
    for (j in 1:8) {
      k=25*j
      tempchar[which(tempchar==" ")[which(tempchar==" ")>k][1]]<-"\n"
    }
    tempchar<-paste(tempchar,collapse = "")
    newtitle[i]<-tempchar
  }
  return(newtitle)
}

5.0.30 plotPhaseDistributions()

#' Creating plot result for K-space transformation
#'
#' This function can choose to generate all plots related to the K-space transformation. 
#' Or we could choose features code to select features we like to see to generate K-space transformation result.
#'
#' @param dataFT data inherited from function "kSpaceTransform", K-space transformed dataset.
#' @param dataColnames feature names for the transformed dataset
#' @param size size of the text inside the plot
#' @param option two options avaliable: "All": to plot for all features, "Select": to select several features you wish to show 
#' @param select_feature if "option" is setted as "Select", then put inside a sequence to indicate the features you wish to show with this function.
plotPhaseDistributions <- function (dataFT, dataColnames, size=10, option="ALL",select_feature=NULL,...) {
  df.phase <- as.data.frame(Re(dataFT$phases))
  df.phase %>% gather() %>% head()
  colnames(df.phase) <- dataColnames
  phaseDistributions <- gather(df.phase)
  colnames(phaseDistributions) <- c("Feature", "Phase")
  if (is.null(size)) size=10
  
  # map the value as our x variable, and use facet_wrap to separate by the key column:
  if(option=="ALL"){
  ggplot(phaseDistributions, aes(Phase)) + 
    # geom_histogram(bins = 10) + 
    geom_histogram(aes(y=..density..), bins = 10) + 
    facet_wrap( ~Feature, scales = 'free_x') +
    xlim(-pi, pi) + 
    theme(strip.text.x = element_text(size = size, colour = "black", angle = 0))}
  else if(option=="Select"){
    choosefeat<-dataColnames[select_feature]
    NphaseDistributions<-phaseDistributions[which(phaseDistributions$Feature %in% choosefeat),]
  ggplot(NphaseDistributions, aes(Phase)) + 
    # geom_histogram(bins = 10) + 
    geom_histogram(aes(y=..density..), bins = 10) + 
    facet_wrap( ~Feature, scales = 'free_x') +
    xlim(-pi, pi) + 
    theme(strip.text.x = element_text(size = size, colour = "black", angle = 0))}
  else{
    return("Wrong input on option, Please try again")
  }
}

5.0.31 PS_ARIMAX_remodel()

#' Conduct three different phase synthesis methods and create ARIMAX model based on different method.
#' This function can reconstruct the dataset based on three phase synthesis methods: 
#' Nil-Phase Synthesis, Swapped-Phase Synthesis and Random-Phase Synthesis.
#' Also it can construct different ARIMAX model based on the phase synthesis method chosen. 
#'
#' @param X inherited from function "homologousX_features", we can get parameter X by 
#' "homologousX_features(country1,country2)$$X_Country1"
#' @param Y inherited from function "preprocess_ARIMA", we can get parameter Y by "preprocess_ARIMA$Y"
#' @param ts_Y_test time series vector, must be set from function "ts" and 
#' must be created based on testing set
#' @param option choose the phase synthesis method used in the reconstruction or ARIMAX model.
#' Three options are valid: "Nil-Phase", "Swapped-Phase", "Random-Phase"
#' @param result choose the result shown by this function. Two options are valid: 
#' "ARIMAX": this will show the ARIMAX model created by the chosen phase synthesis method. 
#' "reconstruct": this will show the reconstruction procedure based on the chosen phase synthesis method, 
#' but the ARIMAX model won't be built.
#' @param rename_Y rename the return result of Y
#' @return If the "result" parameter is setted as "reconstruct", then the reconstruction result will be shown. if setted as "ARIMAX", then the detailed ARIMAX model will be built.
PS_ARIMAX_remodel <- function(X,Y,ts_Y_test,option="Nil-Phase",result="ARIMAX",rename_Y="Y_GDP_Belgium") {
  temp_data<-cbind(X,Y)
  FT<-kSpaceTransform(temp_data, FALSE, NULL)
  if(option=="Nil-Phase"){
    nilPhase_FT_data <- array(complex(real=0, imaginary=0), c(dim(temp_data)[1], dim(temp_data)[2]))
    ##IFT_NilPhase_FT <- array(complex(), c(dim(temp_data)[1], dim(temp_data)[2]))
    #       Invert back to spacetime the FT_Belgium$magnitudes[ , i] signal with nil-phase
    IFT_NilPhase_FT<- Re(kSpaceTransform(FT$magnitudes, TRUE, nilPhase_FT_data))
    colnames(IFT_NilPhase_FT) <- c(colnames(X), rename_Y)

    #result
    if(result=="reconstruct"){
      return(list(
    dim(nilPhase_FT_data),dim(IFT_NilPhase_FT), dim(FT$magnitudes),
    colnames(IFT_NilPhase_FT), head(IFT_NilPhase_FT),IFT_NilPhase_FT)) # head(temp_Data)
    }
    #rename
    IFT_FT<-IFT_NilPhase_FT
  }
  else if(option=="Swapped-Phase"){
    
    swapped_phase_FT_data <- FT$phases
    colnames(swapped_phase_FT_data) <- c(colnames(X), rename_Y)
    swapped_phase_FT_data1 <- swapped_phase_FT_data
    IFT_SwappedPhase_FT <- array(complex(), c(dim(temp_data)[1], dim(temp_data)[2]))
    set.seed(12345)   # sample randomly Phase-columns for each of the 131 covariates (X)
    #swap_phase_FT_Belgium_indices <- sample(ncol(swapped_phase_FT_Belgium_data)-1)
    # for (j in 1:131) {  # for all coluns of the design Xreg matrix, excluding Y, randomly swap columns phases
    #  swapped_phase_FT_Belgium_data1[ , j] <- swapped_phase_FT_Belgium_data[, swap_phase_FT_Belgium_indices[j]]
    #}
    swapped_phase_FT_data1 <- as.data.frame(cbind(
      swapped_phase_FT_data[ , sample(ncol(swapped_phase_FT_data[ , 1:(ncol(swapped_phase_FT_data)-1)]))], 
      swapped_phase_FT_data[ , ncol(swapped_phase_FT_data)]))
    swapped_phase_FT_data <- swapped_phase_FT_data1
    colnames(swapped_phase_FT_data)[ncol(swapped_phase_FT_data)] <- rename_Y
    colnames(swapped_phase_FT_data)
    
    # Invert back to spacetime the FT_Belgium$magnitudes[ , i] signal using the feature swapped phases
    IFT_SwappedPhase_FT <- Re(kSpaceTransform(FT$magnitudes, TRUE, swapped_phase_FT_data))
    
    colnames(IFT_SwappedPhase_FT) <- c(colnames(X), rename_Y)

    #result
    if(result=="reconstruct"){
      return(list(
    dim(swapped_phase_FT_data) ,
    # [1] 360 132
    dim(swapped_phase_FT_data), dim(FT$phases),
    dim(IFT_SwappedPhase_FT), dim(FT$magnitudes),
    colnames(IFT_SwappedPhase_FT), tail(IFT_SwappedPhase_FT),IFT_SwappedPhase_FT)) # tail(temp_Data)
    }
    #rename
    IFT_FT<-IFT_SwappedPhase_FT
  }
  else if(option=="Random-Phase"){
    
    #randPhase_FT_data <- array(complex(), c(dim(temp_data)[1], dim(temp_data)[2]))
    IFT_RandPhase_FT <- array(complex(), c(dim(temp_data)[1], dim(temp_data)[2]))
    randPhase_FT_data <- FT$phases
    for (i in 1:(dim(randPhase_FT_data)[2] -1)) {
      if (i < dim(randPhase_FT_data)[2]) {
        set.seed(12345)   # sample randomly Phases for each of the 131 predictors covariates (X)
        randPhase_FT_data[ , i] <- FT$phases[sample(nrow(FT$phases)), i]
      } else {   } # for the Y outcome (Last Column) - do not change the phases of the Y
    }
    #       Invert back to spacetime the FT_Belgium$magnitudes[ , i] signal with avg-phase
    IFT_RandPhase_FT <- Re(kSpaceTransform(FT$magnitudes, TRUE, randPhase_FT_data))
    colnames(IFT_RandPhase_FT) <- c(colnames(X), rename_Y)
    
    #result
    if(result=="reconstruct"){
    return(  list(
    dim(randPhase_FT_data),    # ;  head(randPhase_FT_data)
    # [1] 360 132
    dim(IFT_RandPhase_FT), dim(FT$magnitudes) ,
    colnames(IFT_RandPhase_FT), tail(IFT_RandPhase_FT), # tail(temp_Data)
    dim(IFT_RandPhase_FT), head(Re(IFT_RandPhase_FT)), tail(Re(IFT_RandPhase_FT))))
    }
    #rename
    IFT_FT<-IFT_RandPhase_FT
  }
  if(result=="ARIMAX"){
  #construction of time-series analysis
  # Perform ARIMAX modeling on IFT_NilPhase_FT_Belgium; report (p,d,q) params and quality metrics AIC/BIC
  # library(forecast)
  IFT_FT_Y_train <- IFT_FT[1:300, ncol(IFT_FT)]
  IFT_FT_Y_test <- IFT_FT[301:360]
  
  # Training and Testing Data Covariates explaining the longitudinal outcome (Y)
  IFT_FT_X_train <- as.data.frame(IFT_FT)[1:300, 1:(ncol(IFT_FT)-1)]; dim(IFT_FT_X_train)
  IFT_FT_X_test <- as.data.frame(IFT_FT)[301:360, 1:(ncol(IFT_FT)-1)]; dim(IFT_FT_X_test)
  
  # Outcome Variable to be ARIMAX-modelled, as a timeseries
  ts_IFT_FT_Y_train <- 
    ts(IFT_FT_Y_train, start=c(2000,1), end=c(2014, 20), frequency = 20)
  
  set.seed(1234)
  modArima_IFT_FT_Y_train <- 
    auto.arima(ts_IFT_FT_Y_train, xreg=as.matrix(IFT_FT_X_train))

  pred_arimax <- forecast(modArima_IFT_FT_Y_train, xreg = as.matrix(IFT_FT_X_test))
  pred_arimax_2015_2017 <- 
    ts(pred_arimax$mean, frequency=20, start=c(2015,1), end=c(2017,20))
  # alternatively:
  # pred_arimax_1_0_1_2015_2017 <- predict(modArima_IFT_NilPhase_FT_Belgium_Y_train, 
  #                                              n.ahead = 3*20, newxreg = IFT_NilPhase_FT_Belgium_X_test)$pred
  sort(modArima_IFT_FT_Y_train$coef)[1:10]
  # Labor cost for LCI (compensation of employees plus taxes minus subsidies), effect=-1.5972295 
  #                                                                      ar1, effect=-1.4231617 
  #                                 Labor cost other than wages and salaries, effect=-1.2213214 
  #                                                                      ma1, effect=-0.9869571 
  #                                                                      ar2, effect=-0.8591937 
  #           Unemployment , Females, From 15-64 years, From 12 to 17 months, effect=-0.7075454 
  #             Unemployment , Total, From 15-64 years, From 18 to 23 months, effect=-0.5797656 
  #               Unemployment , Males, From 15-64 years, from 3 to 5 months, effect=-0.5026139 
  #                                                                     sar2, effect=-0.3450866 
  #             Unemployment , Males, From 15-64 years, from 24 to 47 months, effect=-0.2965540 
  return(list(
  training_set_length=length(IFT_FT_Y_train),
  testing_set_length=length(IFT_FT_Y_test),
  AR_MA=modArima_IFT_FT_Y_train$arma,
  ARIMAX_model=pred_arimax_2015_2017,
  feature_effect=sort(modArima_IFT_FT_Y_train$coef)[1:10],
  Correlation=cor(pred_arimax$mean, ts_Y_test), 
  Mean=mean(pred_arimax_2015_2017)))} 
  else{
    print(paste0("Wrong input for option or result, please try again.") )
    return(NULL)
  }
}

5.0.32 completeHomologousX_features()

#' This function ensures the XReg predictors for ALL 31 EU countries are homologous
#'
#' @param list_of_dfs a list of datasets of different countries
#' @return
completeHomologousX_features <- function (list_of_dfs) {
  # delete features that are missing at all time points
  for (j in 1:length(list_of_dfs)) {
    print(paste0("Pre-processing Country: ...", countryNames[j], "... "))
    data = list_of_dfs[[j]]
    data = data[ , colSums(is.na(data)) != nrow(data)]%>%
    select(-time, -country)%>%
    as.matrix()%>%
    cleardata()->DataMatrix
    DataMatrix = DataMatrix[ , colSums(is.na(DataMatrix)) == 0] # remove features with only 1 value
    DataMatrix = DataMatrix[ , colSums(DataMatrix) != 0] %>% # remove features with all values=0
    # Supersample 72 --*5--> 360 timepoints 
    splinecreate()%>%
    as.data.frame()->DataSuperSample # super-Sample the data
    # remove some of features  
    DataSuperSample = DataSuperSample[, -c(50:80)]; dim(X)  # 360 167
    # ensure full-rank design matrix, DataSuperSample
    DataSuperSample <- 
      DataSuperSample[ , qr(DataSuperSample)$pivot[seq_len(qr(DataSuperSample)$rank)]]
    print(paste0("dim()=(", dim(DataSuperSample)[1], ",", dim(DataSuperSample)[2], ") ..."))
    # update the current DF/Country
    list_of_dfs_CommonFeatures[[j]] <- DataSuperSample
  }
  
  # Identify All Xreg features that are homologous (same feature columns) across All 31 countries
  # Identify Common Columns (freatures)
  comCol <- Reduce(intersect, lapply(list_of_dfs_CommonFeatures, colnames))
  list_of_dfs_CommonFeatures <- lapply(list_of_dfs_CommonFeatures, function(x) x[comCol])
  
  for (j in 1:length(list_of_dfs_CommonFeatures)) {
    list_of_dfs_CommonFeatures[[j]] <- subset(list_of_dfs_CommonFeatures[[j]], select = comCol)
    print(paste0("dim(", countryNames[j], ")=(", dim(list_of_dfs_CommonFeatures[[j]])[1], 
                 ",", dim(list_of_dfs_CommonFeatures[[j]])[2], ")!"))  # 72 * 197
  }
  return(list_of_dfs_CommonFeatures)
}

5.0.33 index2CountryFeature()

#' This function maps to convert between 1D indices
#'
#' @param indx Indicate the feature you wish to see after transfering to 1D index
#' @return return the original country and feature code of the feature you wish to show
index2CountryFeature <- function(indx=1) { 
  if (indx<1 | indx>length(arimaModels_ARMA_coefs)) {
    cat("Index out of bounds: indx=", indx, "; must be between 1 and ", 
        length(arimaModels_ARMA_coefs), " ... Exiting ...")
    return (NULL)
  } else { 
    feature = (indx-1) %% (dim(list_of_dfs_CommonFeatures[[1]])[2])
    country = floor((indx - feature)/(dim(list_of_dfs_CommonFeatures[[1]])[2]))
    return(list("feature"=(feature+1), "country"=(country+1)))  }
}

5.0.34 countryFeature2Index()

#' This function transfer country and feature code to 1D index
#' An opposite procedure of function "index2CountryFeature"
#'
#' @param countryIndx indicate the country you wish to transfer 
#' @param featureIndx indicate the feature you wish to transfer
countryFeature2Index <- function(countryIndx=1, featureIndx=1) { 
  if (countryIndx<1 | countryIndx>(dim(list_of_dfs_CommonFeatures[[1]])[1]) |
      featureIndx<1 | featureIndx>(dim(list_of_dfs_CommonFeatures[[1]])[2])) {
    cat("Indices out of bounds: countryIndx=", countryIndx, "; featureIndx=", featureIndx, "; Exiting ...")
    return (NULL)
  } else { return (featureIndx + (countryIndx-1)*(dim(list_of_dfs_CommonFeatures[[1]])[2])) }
}

5.0.35 Spacetime_Kime_Rank()

Spacetime_Kime_Rank <- function(X,Y,signal="Weak",phase="Normal",result=1,Seed=4321,lam="min") {
  X = X[ , colSums(is.na(X)) == 0]
  if(signal=="Weak"){
    X<-X[,1:378]
  }
  if(signal=="Strong"){
    X<-X[,1:386]
  }
  if(phase=="Nil"){
    FT_aggregate_arima_vector_country_ranking_df <- 
      kSpaceTransform(aggregate_arima_vector_country_ranking_df, inverse = FALSE, reconPhases = NULL) 
    if(result==6){
      return(plotPhaseDistributions(FT_aggregate_arima_vector_country_ranking_df,
                             colnames(aggregate_arima_vector_country_ranking_df), size=4, cex=0.1))
    }
    IFT_FT_aggregate_arima_vector_country_ranking_df <- 
      kSpaceTransform(FT_aggregate_arima_vector_country_ranking_df$magnitudes, 
                      TRUE, FT_aggregate_arima_vector_country_ranking_df$phases)
    if(result==7){
    return(PS_ARIMAX_remodel(aggregate_arima_vector_country_ranking_df[,1:ncol(aggregate_arima_vector_country_ranking_df)-1],
                      Y,NULL,result = "reconstruct",rename_Y = "Yvec")[1:4])
    }
    IFT_NilPhase_FT_aggregate_arima_vector<-PS_ARIMAX_remodel(aggregate_arima_vector_country_ranking_df[,
        1:ncol(aggregate_arima_vector_country_ranking_df)-1],Y,NULL,result = "reconstruct",rename_Y = "Yvec")[[6]]
    rownames(IFT_NilPhase_FT_aggregate_arima_vector)<-rownames(X)
  }
  if(phase=="Swap"){
    FT_aggregate_arima_vector_country_ranking_df <- 
      kSpaceTransform(aggregate_arima_vector_country_ranking_df, inverse = FALSE, reconPhases = NULL) 
    swappedPhase_FT_aggregate_arima_vector <- FT_aggregate_arima_vector_country_ranking_df$phases
    IFT_SwappedPhase_FT_aggregate_arima_vector <- array(complex(), c(dim(temp_Data)[1], dim(temp_Data)[2]))
    set.seed(Seed)   # sample randomly Phase-columns for each of the 131 covariates (X)
    swappedPhase_FT_aggregate_arima_vector1 <- as.data.frame(cbind(
      swappedPhase_FT_aggregate_arima_vector[ , 
                                              sample(ncol(swappedPhase_FT_aggregate_arima_vector[ , 1:378]))],  # mix ARIMA signature phases
      swappedPhase_FT_aggregate_arima_vector[ , 
                                              sample(ncol(swappedPhase_FT_aggregate_arima_vector[ , 379:386]))],# mix the meta-data phases
      swappedPhase_FT_aggregate_arima_vector[ , 387]))                          # add correct Outcome phase
    swappedPhase_FT_aggregate_arima_vector <- swappedPhase_FT_aggregate_arima_vector1
    
    colnames(swappedPhase_FT_aggregate_arima_vector) <- colnames(temp_Data)
  }
  
    text0<-dim(X)
    set.seed(Seed)
    if(phase=="Normal"){
      Xmat<-X
      CVlassotext<-c("CV LASSO (using only Timeseries data): Number of Nonzero (Active) Coefficients")
      result2text<-c("Top 9 salient features (LASSO penalty)")
    }
    if(phase=="Nil"){
      Xmat<-IFT_NilPhase_FT_aggregate_arima_vector
      CVlassotext<-c("(Spacekime, Nil-phase) CV LASSO: Number of Nonzero (Active) Coefficients")
      result2text<-c("(Spacekime) Top 9 salient features (LASSO penalty)")
    }
    if(phase=="Swap"){
      Xmat<-swappedPhase_FT_aggregate_arima_vector
      CVlassotext<-c("(Spacekime, Swapped-Phases) CV LASSO: Number of Nonzero (Active) Coefficients")
      result2text<-c("(Spacekime, Swapped-Phases) Top 9 salient features (LASSO penalty)")
    }
    CVlasso<-cv.glmnet(data.matrix(Xmat[ , 1:ncol(Xmat)]), Y, alpha = 1, parallel=TRUE)
    if(result==1){
    plot(CVlasso)
    mtext(CVlassotext, side=3, line=2.5)
    }
    PREDlasso<-predict(CVlasso,s=ifelse(lam=="min",CVlasso$lambda.min,lam),newx=data.matrix(Xmat[,1:ncol(Xmat)]))
    Coeflist<-coef(CVlasso,s=ifelse(lam=="min",CVlasso$lambda.min,lam))
    Coeflist <- data.frame(Coeflist@Dimnames[[1]][Coeflist@i+1],Coeflist@x)
    names(Coeflist) <- c('Feature','EffectSize')
    text1<-arrange(Coeflist, -abs(EffectSize))[2:10, ] ##This place Normal and Nil phase are not the same. See through it.
    text2<-cor(Y, predLASSO_lim[, 1])
    text3<-varImp(CVlasso, lambda = ifelse(lam=="min",CVlasso$lambda.min,lam))
    if(result==2){
      COEFplot<-coef(CVlasso, s = ifelse(lam=="min",CVlasso$lambda.min,lam)) %>%  # "lambda.min"
        broom::tidy() %>%
        filter(row != "(Intercept)") %>%
        top_n(100, wt = abs(value)) %>%
        ggplot(aes(value, reorder(row, value), color = value > 0)) +
        geom_point(show.legend = FALSE, aes(size = abs(value))) +
        ggtitle(result2text) +
        xlab("Effect-size") +
        ylab(NULL)
      return(COEFplot)
    }
    if(phase=="Normal"){
    validation_lim <- data.frame(matrix(NA, nrow = dim(PREDlasso)[1], ncol=2), row.names=countryNames)
    validation_lim [ , 1] <- Y; validation_lim[ , 2] <- PREDlasso[, 1]
    colnames(validation_lim) <- c("Orig_Y", "LASSO")
    NN<-3
    LMylab<-c("LASSO (42*9 +8) param model")
    LMmain<-c("Observed (X) vs. LASSO-Predicted (Y) Overall Country Ranking, cor=%.02f")
    Xin_result4<-validation_lim[ , 1]
    Yin_result4<-validation_lim[ , 2]
    result4title<-c("Spacetime LASSO Predicted (y) vs. Observed (x) Overall Country Ranking, cor=%.02f")
    result4Y_T<-c( "Spacetime LASSO Predicted")
    }
    if(phase=="Nil"){
      validation_lim <- cbind(PREDlasso[, 1], 
                                   Xmat[ , 387], Y)  
      colnames(validation_lim) <- c("predLASSO_kime", "IFT_NilPhase_FT_Y", "Orig_Y")
      rownames(validation_lim)[11] <- "Germany"
      NN<-4
      LMylab<-c("IFT_NilPhase predLASSO_kime")
      LMmain<-c("Observed (x) vs. IFT_NilPhase Predicted (y) Overall Country Ranking, cor=%.02f")
      Xin_result4<-Y
      Yin_result4<-PREDlasso
      result4title<-c("Spacekime LASSO Predicted, Nil-Phases (y) vs. Observed (x) Overall Country Ranking, cor=%.02f")
      result4Y_T<-c("Spacekime LASSO Predicted, using Nil-Phases")
    }
    if(phase=="Swap"){
      set.seed(Seed)
      cvLASSO_lim = cv.glmnet(data.matrix(X[ , 1:ncol(X)]), Y, alpha = 1, parallel=TRUE)
      predLASSO_lim <-  predict(cvLASSO_lim, s = ifelse(lam=="min",cvLASSO_lim$lambda.min,lam), # cvLASSO_lim$lambda.min, 
                                newx = data.matrix(X[ , 1:ncol(X)]))
      IFT_NilPhase_FT_aggregate_arima_vector<-PS_ARIMAX_remodel(aggregate_arima_vector_country_ranking_df[,
            1:ncol(aggregate_arima_vector_country_ranking_df)-1],Y,NULL,result = "reconstruct",rename_Y = "Yvec")[[6]]
      rownames(IFT_NilPhase_FT_aggregate_arima_vector)<-rownames(X)
      set.seed(Seed)
      cvLASSO_nil_kime = cv.glmnet(data.matrix(IFT_NilPhase_FT_aggregate_arima_vector[ , 1:ncol(IFT_NilPhase_FT_aggregate_arima_vector)]), 
                                   Y, alpha = 1, parallel=TRUE)
      predLASSO_nil_kime <-  predict(cvLASSO_nil_kime, s = ifelse(lam=="min",cvLASSO_nil_kime$lambda.min,lam), 
                                     newx = data.matrix(IFT_NilPhase_FT_aggregate_arima_vector[ , 1:ncol(IFT_NilPhase_FT_aggregate_arima_vector)]))
      validation_lim <- cbind(PREDlasso[ , 1],predLASSO_lim[, 1], predLASSO_nil_kime[, 1], Y)  
      colnames(validation_kime_swapped) <- c("predLASSO_IFT_SwappedPhase","predLASSO (spacetime)", "predLASSO_IFT_NilPhase", "Orig_Y")
      NN<-5
      LMylab<-c("predLASSO_spacekime_swapped Country Overall Ranking")
      LMmain<-c("Observed (x) vs. Kime IFT_SwappedPhase_FT_Y (y) Overall Country Ranking, cor=%.02f")
      Xin_result4<-Y
      Yin_result4<-PREDlasso
      result4title<-c("Spacekime LASSO Predicted, Swapped-Phases (y) vs. Observed (x) Overall Country Ranking, cor=%.02f")
      result4Y_T<-c("Spacekime LASSO Predicted, using Swapped-Phases")
    }
    head(validation_nil_kime)
    text4<-dim(validation_lim)
    text5<-head(validation_lim)
    validation_lim <- as.data.frame(cbind(validation_lim, ifelse (validation_lim[, 1]<=30, 1, 0)))
    colnames(validation_lim)[NN] <- "Top30Rank"
    text6<-head(validation_lim)
    text7<-cor(validation_lim[ , 1], validation_lim[, NN-1])
    linFit_lim <- lm(validation_lim[ , 1] ~ validation_lim[, 2])
    if(result==3){
    plot(validation_lim[ , 1] ~ validation_lim[, NN-1],
         col="blue", xaxt='n', yaxt='n', pch = 16, cex=3,
         xlab="Observed Country Overall Ranking", ylab=LMylab,
         main = sprintf(LMmain, 
                        cor(validation_lim[ , 1], validation_lim[, NN-1])))
    abline(linFit_lim, lwd=3, col="red")
    }
    if(result==4){
      myPlot <- ggplot(as.data.frame(validation_lim), aes(x=Xin_result4,
                                                          y=Yin_result4, label=rownames(validation_lim))) +
        geom_smooth(method='lm') +
        geom_point() +
        # Color by groups
        # geom_text(aes(color=factor(rownames(validation_lim)))) +
        geom_label_repel(aes(label = rownames(validation_lim),
                             fill = factor(Top30Rank)), color = 'black', size = 5,  
                         point.padding = unit(0.3, "lines")) +
        # theme(legend.position = "bottom") +
        theme(legend.position = c(0.1, 0.9), 
              legend.text = element_text(colour="black", size=12, face="bold"),
              legend.title = element_text(colour="black", size=14, face="bold")) +
        scale_fill_discrete(name = "Country Overall Ranking", 
                            labels = c("Below 30 Rank", "Top 30 Rank")) +
        labs(title=sprintf(result4title,  cor(validation_lim[ , 1], validation_lim[, NN-1])),
             x ="Observed Overall Country Ranking (1 is 'best')", 
             y = "Spacetime LASSO Predicted")
      return(myPlot)
    }
    if(result==5){
      RESlist<-list(dimX=text0,Coef_first9=text1,COR_pred_true=text2,LASSO_Varimp=text3,dimLASSO=text4,LASSO_part_feature=text5,LASSO_rename_feature=text6,pred_COR=text7)
      return(RESlist)
    }
}

5.0.36 chooselambda()

#' Show the features.
#' 
#' @param cvlasso Lasso cross-validation result.
#' @param option put in 1 or 2. 1 will generate lambda for all different features choices. 2 will generate lambda for a particular number of features
#' @param k work when option is 2. Put in the number of features you wish to keep
#' @return
#' @examples
chooselambda <- function(cvlasso, option, k=NULL) {
  lambmat<-cbind(cvlasso$glmnet.fit$df,cvlasso$glmnet.fit$lambda)
  result<-tapply(lambmat[,2],lambmat[,1],max)
  kresult<-result[which(names(result)==as.factor(k))]
  if(option==1) {return(result)}
  else if (option==2) {return(kresult)}
  else (return("Not a valid option"))
}

5.0.37 randchoose()

#'randomly choose 60% of data to keep as training data
#'
#' @param matr dataset matrix that you wish to split training and testing set on.
#' @return
#' @examples
randchoose <- function(matr) {
  leng<-nrow(matr)
  se<-seq(1:leng)
  sam<-sample(se,as.integer(leng*0.6))
  return(sam)
}

5.0.38 MLcomp()

#' Compare prediction performance of different machine learning algorithms
#' 
#' @param fitlas A glmnet LASSO object. 
#' @param cvlas A cv.glmnet LASSO object.
#' @param train A training dataset.
#' @param test A test dataset.
#' @param option A number.
#' @return A table of prediction accuracy of Bagging, Random Forest, Adaboost,
#'         Logistic Regression and Support Vector Machine with diffrent number 
#'         of features.
MLcomp <- function(fitlas, cvlas, trn, test, option=1) {
  allfeat<-as.numeric(names(chooselambda(cvlasso = cvlas, option = 1)))
  allfeat<-allfeat[which(allfeat>4)]
  trainlist<-as.list(NULL)
  for (i in 1:length(allfeat)) {
    trainlist[[i]]<-trn[,which(colnames(trn) %in% 
                                 c(row.names(showfeatures(fitlas, chooselambda(cvlas = cvlas,1), allfeat[i])), "Rank"))]
  }
resultframe<-data.frame(origin=rep(NA,length(allfeat)))
rownames(resultframe)<-allfeat
resultframe$Decision_tree_bagging<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    eubag<-ipred::bagging(Rank~.,data = trainlist[[i]],nbagg=100)
    bagtest<-predict(eubag, eutest)
    bagagg<-bagtest==eutest$Rank
    accuracy<-prop.table(table(bagagg))[c("TRUE")]
    resultframe$Decision_tree_bagging[i]<-accuracy
  }
resultframe$Random_forest<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    eurf<-randomForest(Rank~.,data=trainlist[[i]])
    rftest<-predict(eurf,eutest)
    rfagg<-rftest==eutest$Rank
    accuracy<-prop.table(table(rfagg))[c("TRUE")]
    resultframe$Random_forest[i]<-accuracy
  }
resultframe$Decision_tree_adaboost<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    enada<-ada(Rank~.,data = trainlist[[i]],iter=50)
    adatest<-predict(enada,eutest)
    adaagg<-adatest==eutest$Rank
    accuracy<-prop.table(table(adaagg))[c("TRUE")]
    resultframe$Decision_tree_adaboost[i]<-accuracy
  }
resultframe$GLM<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    euglm<-glm(Rank~.,data = trainlist[[i]],family = "binomial")
    glmtest<-predict(euglm,eutest)
    glmtest<-ifelse(glmtest<0,0,1)
    glmagg<-glmtest==eutest$Rank
    accuracy<-prop.table(table(glmagg))[c("TRUE")]
    resultframe$GLM[i]<-accuracy
  }
resultframe$SVM_best_Gamma_Cost<-rep(NA,length(allfeat))  
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    svmtune<-tune.svm(Rank~.,data = trainlist[[i]],gamma = 10^(-6:1),cost = 10^(-10:10))
    svmed<-svm(Rank~.,data=trainlist[[i]],gamma=svmtune$best.parameters[1],cost=svmtune$best.parameters[2])
    svmtest<-predict(svmed,eutest)
    svmagg<-svmtest==eutest$Rank
    accuracy<-prop.table(table(svmagg))[c("TRUE")]
    resultframe$SVM_best_Gamma_Cost[i]<-accuracy
  }
  resultframe$origin<-NULL
  if(option==1){return(resultframe)}
}

5.0.39 MLcompX()

#' Compare prediction performance of different machine learning algorithms
#' 
#' @param fitlas A glmnet LASSO object. 
#' @param cvlas A cv.glmnet LASSO object.
#' @param train A training dataset.
#' @param test A test dataset.
#' @param option A number.
#' @return A table of prediction accuracy of Bagging, Random Forest, Adaboost,
#'         Logistic Regression and Support Vector Machine with diffrent number 
#'         of features.
MLcompX <- function(fitlas, cvlas, trn, test, option=1) {
  allfeat<-as.numeric(names(chooselambda(cvlasso = cvlas, option = 1)))
  allfeat<-allfeat[which(allfeat>4)]
  trainlist<-as.list(NULL)
  for (i in 1:length(allfeat)) {
    trainlist[[i]]<-trn[,which(colnames(trn) %in% 
                                 c(row.names(showfeatures(fitlas, chooselambda(cvlas = cvlas,1), allfeat[i])), "Rank"))]
  }
 
  resultXframe<-data.frame(origin=rep(NA,length(allfeat)))
  rownames(resultXframe)<-allfeat
  resultXframe$Decision_tree_bagging<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    #ERROR HANDLING
    possibleError <- tryCatch(
        function () {
          set.seed(1234)
          Xbag<-ipred::bagging(Rank~ . ,data = trainlist[[i]], nbagg=100, 
                               control=rpart.control(minsplit=2, cp=0.1, xval=10))
          bagtest<-predict(Xbag, Xtest)
          bagagg<-bagtest==Xtest$Rank
          accuracy<-prop.table(table(bagagg))[c("TRUE")]
          resultXframe$Decision_tree_bagging[i]<-accuracy
        },
        error=function(e) e
    )
    if(inherits(possibleError, "error")) next
    # print(i)
  }

  resultXframe$Random_forest<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    Xrf<-randomForest(Rank~.,data=trainlist[[i]])
    rftest<-predict(Xrf,test)
    rfagg<-rftest==test$Rank
    accuracy<-prop.table(table(rfagg))[c("TRUE")]
    resultXframe$Random_forest[i]<-accuracy
  }

  resultXframe$Decision_tree_adaboost<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    Xada<-ada(Rank~.,data = trainlist[[i]],iter=50)
    adatest<-predict(Xada,test)
    adaagg<-adatest==test$Rank
    accuracy<-prop.table(table(adaagg))[c("TRUE")]
    resultXframe$Decision_tree_adaboost[i]<-accuracy
  }

  resultXframe$GLM<-rep(NA,length(allfeat))
  for (i in 1:length(allfeat)) {
    euglm<-glm(Rank~.,data = trainlist[[i]],family = "binomial")
    glmtest<-predict(euglm,test)
    glmtest<-ifelse(glmtest<0,0,1)
    glmagg<-glmtest==test$Rank
    accuracy<-prop.table(table(glmagg))[c("TRUE")]
    resultXframe$GLM[i]<-accuracy
  }

  resultXframe$SVM_best_Gamma_Cost<-rep(NA,length(allfeat))  
  for (i in 1:length(allfeat)) {
    set.seed(1234)
    svmtune<-tune.svm(Rank~.,data = trainlist[[i]],gamma = 10^(-6:1),cost = 10^(-10:10))
    svmed<-svm(Rank~.,data=trainlist[[i]],gamma=svmtune$best.parameters[1],cost=svmtune$best.parameters[2])
    svmtest<-predict(svmed,test)
    svmagg<-svmtest==test$Rank
    accuracy<-prop.table(table(svmagg))[c("TRUE")]
    resultXframe$SVM_best_Gamma_Cost[i]<-accuracy
  }
  resultXframe$origin<-NULL
  if(option==1){return(resultXframe)}
}

TCIU Basic Functions

Back To Contents

SOCR Team

June 2019