SOCR ≫ TCIU Website ≫ TCIU GitHub ≫

# Get this figure: fig <- get_figure("MattSundquist", 4064)
# Get this figure's data: data <- get_figure("MattSundquist", 4064)$data
# Add data to this figure: p <- add_trace(p, x=c(4, 5), y=c(4, 5), kwargs=list(filename="Klein bottle", fileopt="extend"))
# Get y data of first trace: y1 <- get_figure("MattSundquist", 4064)$data[[1]]$y

library(plotly)

1 Background

The function fftshift() is useful for visualizing the Fourier transform with the zero-frequency component in the middle of the spectrum. Its inverse counterpart, ifftshift(), is needed to rearrange again the indices appropriately after the IFT is employed, so that the image is correctly reconstructed in spacetime. The FT only computes half of the frequency spectrum corresponding to the non-negative (positive and zero if the length(f) is odd) frequencies in order to save computation time. To preserve the dimensions of the output \(\hat{f}=FT(f)\), the second half of the frequency spectrum (the complex conjugate of the first half) is just added at the end of this vector. In a 1D setting, the result of fft() is:

\(0\ 1\ 2\ 3\ ...\ (freq\ bins > 0)\ ... {\frac{Fs}{2}}\) and \(-{\frac{Fs}{2}}\ ... \ (freq\ bins < 0)\ ...\ -3\ -2\ -1.\)

where \(F_s\) is the frequency sample. The fftshift method sets the zero-frequency component in the center of the array, i.e., it just shifts (offsets) the second part with the negative frequency bins to the beginning and the first part to the end of the resulting FT vector, or matrix. Thus, the shifted discrete FT can be nicely plotted in the center covering the frequency spectrum from \(-{\frac{Fs}{2}}\) on the left to \({\frac{Fs}{2}}\) on the right. This is not necessary, but is used for better visualization aesthetics. To synthesize back the correct image, after using fftshift on the FT signal, we always have to undo that re-indexing by using ifftshift() on the inverse-FT.

# FFT SHIFT
fftshift <- function(img_ff, dim = -1) {

  rows <- dim(img_ff)[1]    
  cols <- dim(img_ff)[2]
  # planes <- dim(img_ff)[3]

  swap_up_down <- function(img_ff) {
    rows_half <- ceiling(rows/2)
    return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
  }

  swap_left_right <- function(img_ff) {
    cols_half <- ceiling(cols/2)
    return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
  }
  
  #swap_side2side <- function(img_ff) {
  #  planes_half <- ceiling(planes/2)
  #  return(cbind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], img_ff[1:rows, 1:cols, 1:planes_half]))
  #}

  if (dim == -1) {
    img_ff <- swap_up_down(img_ff)
    return(swap_left_right(img_ff))
  }
  else if (dim == 1) {
    return(swap_up_down(img_ff))
  }
  else if (dim == 2) {
    return(swap_left_right(img_ff))
  }
  else if (dim == 3) {
    # Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
    # install.packages("abind")
    library(abind)
    
    planes <- dim(img_ff)[3]
    rows_half <- ceiling(rows/2)
    cols_half <- ceiling(cols/2)
    planes_half <- ceiling(planes/2)
    
    img_ff <- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)], 
                    img_ff[(1:rows_half), (1:cols), (1:planes)], along=1)
    img_ff <- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)], 
                    img_ff[1:rows, 1:cols_half, (1:planes)], along=2)
    img_ff <- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], 
                    img_ff[1:rows, 1:cols, 1:planes_half], along=3)
    return(img_ff)
  }
  else {
    stop("Invalid dimension parameter")
  }
}

ifftshift <- function(img_ff, dim = -1) {

  rows <- dim(img_ff)[1]    
  cols <- dim(img_ff)[2]    

  swap_up_down <- function(img_ff) {
    rows_half <- floor(rows/2)
    return(rbind(img_ff[((rows_half+1):rows), (1:cols)], img_ff[(1:rows_half), (1:cols)]))
  }

  swap_left_right <- function(img_ff) {
    cols_half <- floor(cols/2)
    return(cbind(img_ff[1:rows, ((cols_half+1):cols)], img_ff[1:rows, 1:cols_half]))
  }

  if (dim == -1) {
    img_ff <- swap_left_right(img_ff)
    return(swap_up_down(img_ff))
  }
  else if (dim == 1) {
    return(swap_up_down(img_ff))
  }
  else if (dim == 2) {
    return(swap_left_right(img_ff))
  }
  else if (dim == 3) {
    # Use the `abind` package to bind along any dimension a pair of multi-dimensional arrays
    # install.packages("abind")
    library(abind)
    
    planes <- dim(img_ff)[3]
    rows_half <- floor(rows/2)
    cols_half <- floor(cols/2)
    planes_half <- floor(planes/2)
    
    img_ff <- abind(img_ff[1:rows, 1:cols, ((planes_half+1):planes)], 
                    img_ff[1:rows, 1:cols, 1:planes_half], along=3)
    img_ff <- abind(img_ff[1:rows, ((cols_half+1):cols), (1:planes)], 
                    img_ff[1:rows, 1:cols_half, (1:planes)], along=2)
    img_ff <- abind(img_ff[((rows_half+1):rows), (1:cols), (1:planes)], 
                    img_ff[(1:rows_half), (1:cols), (1:planes)], along=1)
    return(img_ff)
  }
  else {
    stop("Invalid dimension parameter")
  }
}

2 Exegeneous Feature Time-series analysis

We can use the UCI ML Air Quality Dataset to demonstrate the effect of kime-direction on the analysis of the longitudinal data. The Air Quality data consists of \(9358\) hourly-averaged responses from an array of 5 sensors embedded in an Air Quality Chemical Multisensor Device. These measurements were obtained in a significantly polluted area during a one year period (March 2004 to February 2005). The features include Concentrations for CO, Non Metanic Hydrocarbons, Benzene, Total Nitrogen Oxides (NOx), and Nitrogen Dioxide (NO2).

The attributes in the CSV file include:

  • Date (DD/MM/YYYY)
  • Time (HH.MM.SS)
  • True hourly averaged concentration CO in mg/m^3 (reference analyzer)
  • PT08.S1 (tin oxide) hourly averaged sensor response (nominally CO targeted)
  • True hourly averaged overall Non Metanic Hydro-carbons concentration in microg/m^3 (reference analyzer)
  • True hourly averaged Benzene concentration in microg/m^3 (reference analyzer)
  • PT08.S2 (Titania) hourly averaged sensor response (nominally NMHC targeted)
  • True hourly averaged NOx concentration in ppb (reference analyzer)
  • PT08.S3 (tungsten oxide) hourly averaged sensor response (nominally NOx targeted)
  • True hourly averaged NO2 concentration in microg/m^3 (reference analyzer)
  • PT08.S4 (tungsten oxide) hourly averaged sensor response (nominally NO2 targeted)
  • PT08.S5 (indium oxide) hourly averaged sensor response (nominally O3 targeted)
  • Temperature in ?C
  • Relative Humidity (%)
  • AH Absolute Humidity
aqi_data <- read.csv("https://umich.instructure.com/files/8208336/download?download_frd=1")
summary(aqi_data)
##      Date               Time               CO.GT.         PT08.S1.CO.  
##  Length:9471        Length:9471        Min.   :-200.00   Min.   :-200  
##  Class :character   Class :character   1st Qu.:   0.60   1st Qu.: 921  
##  Mode  :character   Mode  :character   Median :   1.50   Median :1053  
##                                        Mean   : -34.21   Mean   :1049  
##                                        3rd Qu.:   2.60   3rd Qu.:1221  
##                                        Max.   :  11.90   Max.   :2040  
##                                        NA's   :114       NA's   :114   
##     NMHC.GT.         C6H6.GT.        PT08.S2.NMHC.       NOx.GT.      
##  Min.   :-200.0   Min.   :-200.000   Min.   :-200.0   Min.   :-200.0  
##  1st Qu.:-200.0   1st Qu.:   4.000   1st Qu.: 711.0   1st Qu.:  50.0  
##  Median :-200.0   Median :   7.900   Median : 895.0   Median : 141.0  
##  Mean   :-159.1   Mean   :   1.866   Mean   : 894.6   Mean   : 168.6  
##  3rd Qu.:-200.0   3rd Qu.:  13.600   3rd Qu.:1105.0   3rd Qu.: 284.0  
##  Max.   :1189.0   Max.   :  63.700   Max.   :2214.0   Max.   :1479.0  
##  NA's   :114      NA's   :114        NA's   :114      NA's   :114     
##   PT08.S3.NOx.     NO2.GT.         PT08.S4.NO2.   PT08.S5.O3.    
##  Min.   :-200   Min.   :-200.00   Min.   :-200   Min.   :-200.0  
##  1st Qu.: 637   1st Qu.:  53.00   1st Qu.:1185   1st Qu.: 700.0  
##  Median : 794   Median :  96.00   Median :1446   Median : 942.0  
##  Mean   : 795   Mean   :  58.15   Mean   :1391   Mean   : 975.1  
##  3rd Qu.: 960   3rd Qu.: 133.00   3rd Qu.:1662   3rd Qu.:1255.0  
##  Max.   :2683   Max.   : 340.00   Max.   :2775   Max.   :2523.0  
##  NA's   :114    NA's   :114       NA's   :114    NA's   :114     
##        T                  RH                AH               X          
##  Min.   :-200.000   Min.   :-200.00   Min.   :-200.0000   Mode:logical  
##  1st Qu.:  10.900   1st Qu.:  34.10   1st Qu.:   0.6923   NA's:9471     
##  Median :  17.200   Median :  48.60   Median :   0.9768                 
##  Mean   :   9.778   Mean   :  39.49   Mean   :  -6.8376                 
##  3rd Qu.:  24.100   3rd Qu.:  61.90   3rd Qu.:   1.2962                 
##  Max.   :  44.600   Max.   :  88.70   Max.   :   2.2310                 
##  NA's   :114        NA's   :114       NA's   :114                       
##    X.1         
##  Mode:logical  
##  NA's:9471     
##                
##                
##                
##                
## 
aqi_data.ts <- ts(aqi_data, start=c(2004,3), freq=24) # hourly sampling rate

dateTime <- as.POSIXct(paste(aqi_data$Date, aqi_data$Time), format="%m/%d/%Y %H:%M:%S")

# set up training and testing time-periods
alltrain.ts <- window(aqi_data.ts, end=c(2004,3))
allvalid.ts <- window(aqi_data.ts, start=c(2005,1))

# Estimate the ARIMAX model
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
fitArimaX <- auto.arima(aqi_data$CO.GT., xreg= as.matrix(aqi_data[ , 
                                  c("PT08.S1.CO.", "NMHC.GT.", "C6H6.GT.", "PT08.S2.NMHC.",
                                    "NOx.GT.", "PT08.S3.NOx.", "NO2.GT.", "PT08.S4.NO2.",
                                    "PT08.S5.O3.", "T", "RH", "AH")]))
fitArimaX
## Series: aqi_data$CO.GT. 
## Regression with ARIMA(1,0,1) errors 
## 
## Coefficients:
##          ar1      ma1  intercept  PT08.S1.CO.  NMHC.GT.  C6H6.GT.
##       0.9758  -0.5825   -40.4356       0.0056    0.0452   -2.9162
## s.e.  0.0025   0.0099    22.5593       0.0120    0.0071    0.5165
##       PT08.S2.NMHC.  NOx.GT.  PT08.S3.NOx.  NO2.GT.  PT08.S4.NO2.  PT08.S5.O3.
##              0.1017   0.0363       -0.0157   0.0091       -0.0196      -0.0091
## s.e.         0.0188   0.0057        0.0073   0.0098        0.0095       0.0044
##             T       RH      AH
##       -0.3853  -0.2147  3.4773
## s.e.   0.3318   0.1219  0.6934
## 
## sigma^2 estimated as 1202:  log likelihood=-46447.71
## AIC=92927.43   AICc=92927.49   BIC=93041.73
# Predict prospective CO concentration
pred_length <- 24*30 # 1 month forward forecasting
predArrivals <- predict(fitArimaX, n.ahead = pred_length, 
                        newxreg = aqi_data[c((9471-pred_length+1):9471), c(4:15)])
#plot(predArrivals$pred, main="Forward time-series predictions (fitArimaX)")
plot(forecast(fitArimaX, xreg = as.matrix(aqi_data[c((9471-pred_length+1):9471), c(4:15)])))
## Warning in forecast.forecast_ARIMA(fitArimaX, xreg = as.matrix(aqi_data[c((9471
## - : Upper prediction intervals are not finite.

library(plotly)
library(magrittr)
# generate date sequence
arimaForecast <- forecast(fitArimaX, xreg = as.matrix(aqi_data[c((9471-pred_length+1):9471), c(4:15)]))
## Warning in forecast.forecast_ARIMA(fitArimaX, xreg = as.matrix(aqi_data[c((9471
## - : Upper prediction intervals are not finite.
str(arimaForecast )
## List of 10
##  $ method   : chr "Regression with ARIMA(1,0,1) errors"
##  $ model    :List of 19
##   ..$ coef     : Named num [1:15] 0.97577 -0.58251 -40.43563 0.00562 0.04517 ...
##   .. ..- attr(*, "names")= chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
##   ..$ sigma2   : num 1202
##   ..$ var.coef : num [1:15, 1:15] 6.22e-06 -9.86e-06 -3.02e-03 -1.36e-06 -1.69e-07 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
##   .. .. ..$ : chr [1:15] "ar1" "ma1" "intercept" "PT08.S1.CO." ...
##   ..$ mask     : logi [1:15] TRUE TRUE TRUE TRUE TRUE TRUE ...
##   ..$ loglik   : num -46448
##   ..$ aic      : num 92927
##   ..$ arma     : int [1:7] 1 1 0 0 1 0 0
##   ..$ residuals: Time-Series [1:9471] from 1 to 9471: 11.79 5.48 4.3 3.47 8.06 ...
##   ..$ call     : language auto.arima(y = aqi_data$CO.GT., xreg = c(1360, 1292, 1402, 1376, 1272,  1197, 1185, 1136, 1094, 1010, 1011, 1066,| __truncated__ ...
##   ..$ series   : chr "aqi_data$CO.GT."
##   ..$ code     : int 1
##   ..$ n.cond   : num 0
##   ..$ nobs     : int 9357
##   ..$ model    :List of 10
##   .. ..$ phi  : num 0.976
##   .. ..$ theta: num -0.583
##   .. ..$ Delta: num(0) 
##   .. ..$ Z    : num [1:2] 1 0
##   .. ..$ a    : num [1:2] 0.932 0
##   .. ..$ P    : num [1:2, 1:2] 4.218 -0.583 -0.583 0.339
##   .. ..$ T    : num [1:2, 1:2] 0.976 0 1 0
##   .. ..$ V    : num [1:2, 1:2] 1 -0.583 -0.583 0.339
##   .. ..$ h    : num 0
##   .. ..$ Pn   : num [1:2, 1:2] 4.218 -0.583 -0.583 0.339
##   ..$ bic      : num 93042
##   ..$ aicc     : num 92927
##   ..$ xreg     : num [1:9471, 1:12] 1360 1292 1402 1376 1272 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:12] "PT08.S1.CO." "NMHC.GT." "C6H6.GT." "PT08.S2.NMHC." ...
##   ..$ x        : Time-Series [1:9471] from 1 to 9471: 2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
##   ..$ fitted   : Time-Series [1:9471] from 1 to 9471: -9.19 -3.48 -2.1 -1.27 -6.46 ...
##   ..- attr(*, "class")= chr [1:3] "forecast_ARIMA" "ARIMA" "Arima"
##  $ level    : num [1:2] 80 95
##  $ mean     : Time-Series [1:720] from 9358 to 10077: -11.29 -6.94 -12.26 -13.45 -36.93 ...
##   ..- attr(*, "names")= chr [1:720] "8752" "8753" "8754" "8755" ...
##  $ lower    : Time-Series [1:720, 1:2] from 9358 to 10077: -102.5 -98.2 -103.5 -104.7 -128.2 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:2] "80%" "95%"
##  $ upper    : Time-Series [1:720, 1:2] from 9358 to 10077: 80 84.3 79 77.8 54.3 ...
##   ..- attr(*, "dimnames")=List of 2
##   .. ..$ : NULL
##   .. ..$ : chr [1:2] "80%" "95%"
##  $ x        : Time-Series [1:9471] from 1 to 9471: 2.6 2 2.2 2.2 1.6 1.2 1.2 1 0.9 0.6 ...
##  $ series   : chr "aqi_data$CO.GT."
##  $ fitted   : Time-Series [1:9471] from 1 to 9471: -9.19 -3.48 -2.1 -1.27 -6.46 ...
##  $ residuals: Time-Series [1:9471] from 1 to 9471: 11.79 5.48 4.3 3.47 8.06 ...
##  - attr(*, "class")= chr "forecast"
plot_ly(x=~dateTime, y=~arimaForecast$fitted, 
                  name="ARIMA Model", type = "scatter", mode='lines+markers')  
# Figure 6.9 ================================================
# aqi_data[is.na(aqi_data)] <- 0
# y     <- aqi_data$PT08.S1.CO.
# t     <- 1:dim(aqi_data)[1]
# # range <- diff(range(y))
# 
# # Compute the spectral decomposition (harmonics)
# ff_harmonicsPlotLy = function(x=NULL, n=NULL, up=10L, plot=TRUE, add=F, main=NULL, ...) {
#   # The discrete Fourier transformation
#   dff = fft(x)
#   # time
#   t = seq(from = 1, to = length(x))
#   
#   # Upsampled time
#   nt = seq(from = 1, to = length(x)+1-1/up, by = 1/up)
#  
#   #New spectrum
#   ndff = array(data = 0, dim = c(length(nt), 1L))
#   ndff[1] = dff[1] # mean, DC component
#   if(n != 0){
#     ndff[2:(n+1)] = dff[2:(n+1)] # positive frequencies come first
#     ndff[length(ndff):(length(ndff) - n + 1)] = dff[length(x):(length(x) - n + 1)] # negative frequencies
#   }
#   
#   # Invert the FT
#   indff = fft(ndff/length(y), inverse = TRUE)
#   idff = fft(dff/length(y), inverse = TRUE)
#   if(plot){
#     plot(x = dateTime, y = x, pch = 16L, xlab = "Time", ylab = "Measurement", 
#           col = rgb(red = 0.5, green = 0.5, blue = 0.5, alpha = 0.5),
#           main = ifelse(is.null(main), paste(n, "harmonics"), main))
#       lines(y = Mod(idff), x = dateTime, col = adjustcolor(1L, alpha = 0.5))
#     
#       plot_ly(x=~dateTime, y=~x, name="ARIMA Model", type = "scatter", mode='lines+markers')  %>%
#         add_trace(x=~dateTime, y =~ Mod(idff), 
#             name="First 3 Harmonics Model",
#             type = "scatter", mode='lines+markers') %>%
#         layout(title = "Air Quality with First 3 Harmonics Model")
#   }
#   ret = data.frame(time = dateTime, y = Mod(indff))
#   return(ret)
# }
# 
# # Apply ff_harmonics to the timeseries as x, specifying the number of harmonics (n) and 
# #      the upsampling (so we plot points in time beside the original ones)
# result = ff_harmonicsPlotLy(x = y, n = 12L, up = 100L, col = 2L, lwd=3, cex=2)

We can first explore the time-course harmonics of the data.

aqi_data[is.na(aqi_data)] <- 0
y     <- aqi_data$PT08.S1.CO.
t     <- 1:dim(aqi_data)[1]
# range <- diff(range(y))

# Compute the spectral decomposition (harmonics)
ff_harmonics = function(x=NULL, n=NULL, up=10L, plot=TRUE, add=F, main=NULL, ...) {
  # The discrete Fourier transformation
  dff = fft(x)
  # time
  t = seq(from = 1, to = length(x))
  
  # Upsampled time
  nt = seq(from = 1, to = length(x)+1-1/up, by = 1/up)
 
  #New spectrum
  ndff = array(data = 0, dim = c(length(nt), 1L))
  ndff[1] = dff[1] # mean, DC component
  if(n != 0){
    ndff[2:(n+1)] = dff[2:(n+1)] # positive frequencies come first
    ndff[length(ndff):(length(ndff) - n + 1)] = dff[length(x):(length(x) - n + 1)] # negative frequencies
  }
  
  # Invert the FT
  indff = fft(ndff/length(y), inverse = TRUE)
  idff = fft(dff/length(y), inverse = TRUE)
  if(plot){
    if(!add){
      plot(x = t, y = x, pch = 16L, xlab = "Time", ylab = "Measurement", 
          col = rgb(red = 0.5, green = 0.5, blue = 0.5, alpha = 0.5),
          main = ifelse(is.null(main), paste(n, "harmonics"), main))
      lines(y = Mod(idff), x = t, col = adjustcolor(1L, alpha = 0.5))
    }
    lines(y = Mod(indff), x = nt, ...)
  }
  ret = data.frame(time = nt, y = Mod(indff))
  return(ret)
}

# Apply ff_harmonics to the timeseries as x, specifying the number of harmonics (n) and 
#      the upsampling (so we plot points in time beside the original ones)
result = ff_harmonics(x = y, n = 12L, up = 100L, col = 2L, lwd=3, cex=2)

# We can add the fourth-to-twelveth harmonics and look at their sum (as a series difference, 12-3)
add4to12_harmonics = ff_harmonics(x = y, n = 12L, up = 10L, col = 2L, plot = FALSE)
add4to12_harmonics$y <- add4to12_harmonics$y - ff_harmonics(x = y, n = 3L, up = 10L, plot = T, col = 2L, lwd=3)$y

plot(add4to12_harmonics, pch = 16L, xlab = "Time", ylab = "Measurement", 
     main = "Sum of all harmonics up to order 12", type = "l", col = 2)

# Harmonics plot of multiple frequencies (waves) in different colors
colors = rainbow(14, alpha = 0.6)
# ff_harmonics(x = y, n = 28L, up = 100L, col = colors[1], add=T)
for(i in 1:14){
  ad = ifelse(i == 1, FALSE, TRUE)
  ff_harmonics(x = y, n = i, up = 100L, col = colors[i], add = ad, 
               lwd= 3, main = "All waves up to 14th harmonic")
}

2.1 Kime-Phases Circular distribution plots

# install.packages("circular")
library("circular")
## 
## Attaching package: 'circular'
## The following object is masked from 'package:plotly':
## 
##     wind
## The following objects are masked from 'package:stats':
## 
##     sd, var
set.seed(1234)
x <- rvonmises(n=1000, mu=circular(pi/5), kappa=3)
y <- rvonmises(n=1000, mu=circular(-pi/3), kappa=5)
z <- rvonmises(n=1000, mu=circular(0), kappa=10)
resx <- density(x, bw=25)
res <- plot(resx, points.plot=TRUE, xlim=c(-1.5,1), ylim=c(-1.1, 1.5), offset=1.1, shrink=1.2, lwd=3)
resy <- density(y, bw=25)
lines(resy, points.plot=TRUE, col=2, points.col=2, plot.info=res, offset=1.1, shrink=1.45, lwd=3)
resz <- density(z, bw=25)
lines(resz, points.plot=TRUE, col=3, points.col=3, plot.info=res, offset=1.1, shrink=1.2, lwd=3)

There is a key difference between spacekime data analytics and spacetime data modeling and inference. This contrast is based on the fact that in spacetime, statistical results are obtained by aggregating repeated (IID) dataset samples or measuring identical replicate cohorts under identical conditions. In spacekime, reliable inference can be made on a single sample, if the kime direction angles are known. Indeed the latter are generally no observable, however, they can be estimated, inferred, or approximated. As the FT and IFT are linear functionals, addition, averaging and multiplication by constants are preserved by the forward and inverse Fourier transforms. Therefore, if we have a number of phase estimates in k-space, we can aggregate these (e.g., by averaging them) and use the resulting assemblage phase to synthesize the data in spacekime. If the composite phases are indeed representative of the process kime orientation, then the reconstructed spacekime inference is expected to be valid even if we used a single sample. In a way, spacekime inference allows a dual representation of the central limit theorem, which guarantees the convergence of sample averages to their corresponding population mean counterparts.

In light of this analytics duality, we can now perform a traditional ARIMA modeling of CO concentration (outcome: PT08.S1.CO.) based on several covariates, e.g., predictors: NMHC.GT., C6H6.GT., PT08.S2.NMHC., NOx.GT., PT08.S3.NOx., NO2.GT., PT08.S4.NO2., PT08.S5.O3., T, RH, and AH.

dim(aqi_data)
## [1] 9471   17
epochs_aqi_data <- as.matrix(aqi_data[c(1:9000) , -c(1:3, 16:17)])
is.matrix(epochs_aqi_data); dim(epochs_aqi_data)
## [1] TRUE
## [1] 9000   12
dim(epochs_aqi_data) <- c(9, 1000, 12)
dim(epochs_aqi_data); identical(epochs_aqi_data[9, 1000, 12], aqi_data[9000, 12+3])
## [1]    9 1000   12
## [1] TRUE
epochs_aqi_data_1 <- epochs_aqi_data[1, , ]; dim(epochs_aqi_data_1)
## [1] 1000   12
# 1D timeseries FFT SHIFT
fftshift1D <- function(img_ff) {
  rows <- length(img_ff)   
  rows_half <- ceiling(rows/2)
  return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}

# 1. Transform all 9 signals to k-space (Fourier domain)
x1 <- c(1:1000)
FT_epochs_aqi_data <- array(complex(), c(9, 1000, 12))
mag_FT_epochs_aqi_data <- array(complex(), c(9, 1000, 12))
phase_FT_epochs_aqi_data <- array(complex(), c(9, 1000, 12))
for (i in 1:9) {
  FT_epochs_aqi_data[i, , ] <- fft(epochs_aqi_data[i, , ])
  X2 <- FT_epochs_aqi_data[i, , ]
  # plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))") 
  mag_FT_epochs_aqi_data[i, , ] <- sqrt(Re(X2)^2+Im(X2)^2); 
  # plot(log(fftshift1D(Re(X2_mag))), main = "log(Magnitude(FFT(timeseries)))") 
  phase_FT_epochs_aqi_data[i, , ] <- atan2(Im(X2), Re(X2)); 
  # plot(fftshift1D(X2_phase), main = "Shift(Phase(FFT(timeseries)))")
}

### Test the process to confirm calculations
# X2<-FT_epochs_aqi_data[1,,];X2_mag<-mag_FT_epochs_aqi_data[1,,];X2_phase<-phase_FT_epochs_aqi_data[1,,]
# Real2 = X2_mag * cos(X2_phase)
# Imaginary2 = X2_mag * sin(X2_phase)
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5,10] - epochs_aqi_data[1, 5, 10]) < 0.001, "Perfect Syntesis", "Problems!!!")
#######

# 2. Invert back to spacetime the epochs_aqi_data_1 signal with nil phase
Real = mag_FT_epochs_aqi_data[1, , ] * cos(0)  # cos(phase_FT_epochs_aqi_data[1, , ])
Imaginary = mag_FT_epochs_aqi_data[1, , ] * sin(0)   # sin(phase_FT_epochs_aqi_data[1, , ])
ift_NilPhase_X2mag = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_epochs_aqi_data[1,,]))
# display(ift_NilPhase_X2mag, method = "raster")
# dim(ift_NilPhase_X2mag); View(ift_NilPhase_X2mag); # compare to View(epochs_aqi_data[1, , ])

# 3. Perform ARIMAX modeling of ift_NilPhase_X2mag; report (p,d,q) params and quality metrics AIC/BIC
library(forecast)
fitArimaX_nil <- auto.arima(ift_NilPhase_X2mag[ , 1], xreg= ift_NilPhase_X2mag[ , 2:12])
fitArimaX_nil
## Series: ift_NilPhase_X2mag[, 1] 
## Regression with ARIMA(2,0,1) errors 
## 
## Coefficients:
##          ar1      ar2      ma1  intercept    xreg1   xreg2    xreg3   xreg4
##       1.1141  -0.1457  -0.7892   503.3455  -0.4028  0.1366  -0.5146  1.0961
## s.e.  0.2064   0.1571   0.1821    73.4212   0.1087  0.1123   0.1072  0.1059
##        xreg5   xreg6   xreg7   xreg8    xreg9  xreg10   xreg11
##       1.2195  1.3063  1.2087  1.1491  -0.4823  0.0315  -0.4640
## s.e.  0.1029  0.1564  0.1023  0.1101   0.1049  0.1125   0.1076
## 
## sigma^2 estimated as 30448:  log likelihood=-6573.68
## AIC=13179.36   AICc=13179.91   BIC=13257.88
# Regression with ARIMA(2,0,1) errors 
#Coefficients:
#         ar1      ar2      ma1  intercept    xreg1   xreg2    xreg3   xreg4   xreg5   xreg6   xreg7   xreg8
#      1.1141  -0.1457  -0.7892   503.3455  -0.4028  0.1366  -0.5146  1.0961  1.2195  1.3063  1.2087  1.1491
#s.e.  0.2064   0.1571   0.1821    73.4212   0.1087  0.1123   0.1072  0.1059  0.1029  0.1564  0.1023  0.1101
#        xreg9  xreg10   xreg11
#      -0.4823  0.0315  -0.4640
#s.e.   0.1049  0.1125   0.1076
# sigma^2 estimated as 30448:  log likelihood=-6573.68 AIC=13179.36   AICc=13179.91   BIC=13257.88

# Predict prospective CO concentration
pred_length <- 24*7 # 1 week forward forecasting
predArrivals <- predict(fitArimaX_nil, n.ahead = pred_length, newxreg = ift_NilPhase_X2mag[800:1000, 2:12])
## Warning in cbind(intercept = rep(1, n.ahead), newxreg): number of rows of result
## is not a multiple of vector length (arg 1)
## Warning in z[[1L]] + xm: longer object length is not a multiple of shorter
## object length
#plot(predArrivals$pred, main="Forward time-series predictions (fitArimaX)")
plot(forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[800:1000, 2:12]), main = "ARIMAX(2,0,1) Model Forecasting (1,001:1,200)", ylim = c(500, 6000))
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "red", lwd = 2, lty=2)  
# overlay original data to show Nil-Phase effects on reconstruction
legend("top", bty="n", legend=c("Prediction via Nil-Phase Reconstruction", "Real Timeseries (CO)"),
       col=c("blue", "red"), lty=c(1,2), lwd=c(2,2), cex=0.9, x.intersp=0.5)

# 4. Compute the *average phase* across the eight series 2:9
phase_Avg <- apply(phase_FT_epochs_aqi_data, c(2,3), mean); dim(phase_Avg); phase_Avg[1:5 , 1:5]
## [1] 1000   12
##               [,1]          [,2]         [,3]          [,4]         [,5]
## [1,]  0.0000000+0i  3.0047664+0i -1.038689+0i  0.6640319+0i 0.6095669+0i
## [2,] -1.4212860+0i -0.1016919+0i -2.648523+0i -0.5298284+0i 2.3392869+0i
## [3,] -1.4819520+0i  0.4516082+0i -2.289320+0i -0.9302969+0i 2.5178117+0i
## [4,]  0.1747996+0i -2.5427688+0i -1.433284+0i  1.6936688+0i 2.6054166+0i
## [5,] -2.0177463+0i -0.8160858+0i  3.018639+0i -1.0881887+0i 2.7825180+0i
# 5. Invert epochs_aqi_data_1 signal to spacetime using average-phase
Real = mag_FT_epochs_aqi_data[1, , ] * cos(phase_Avg)
Imaginary = mag_FT_epochs_aqi_data[1, , ] * sin(phase_Avg)
ift_AvgPhase_X2mag = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_epochs_aqi_data[1,,]))
# display(ift_AvgPhase_X2mag, method = "raster")
# dim(ift_AvgPhase_X2mag); View(ift_AvgPhase_X2mag); # compare to View(epochs_aqi_data[1, , ])

# 6. Perform ARIMAX modeling on ift_AvgPhase_X2mag; report (p, d, q) parameters and quality metrics
fitArimaX_avg <- auto.arima(ift_AvgPhase_X2mag[ , 1], xreg= ift_NilPhase_X2mag[ , 2:12])
fitArimaX_avg
## Series: ift_AvgPhase_X2mag[, 1] 
## Regression with ARIMA(2,0,3) errors 
## 
## Coefficients:
##          ar1     ar2     ma1      ma2     ma3  intercept   xreg1   xreg2
##       0.3295  0.2384  0.2673  -0.0061  0.1573   742.8001  0.5838  0.2809
## s.e.  0.1354  0.1150  0.1363   0.0650  0.0451    97.1678  0.1700  0.1776
##         xreg3   xreg4    xreg5   xreg6   xreg7   xreg8    xreg9  xreg10
##       -0.6497  1.2399  -0.0261  1.0818  0.2540  0.3065  -0.4052  0.3511
## s.e.   0.1468  0.1695   0.1710  0.2394  0.1706  0.1665   0.1450  0.1791
##        xreg11
##       -0.4577
## s.e.   0.1709
## 
## sigma^2 estimated as 82982:  log likelihood=-7073.9
## AIC=14183.8   AICc=14184.5   BIC=14272.14
# ARIMA(2,0,3) 
# Coefficients:
#         ar1     ar2     ma1      ma2     ma3  intercept   xreg1   xreg2    xreg3   xreg4    xreg5   xreg6
#      0.3295  0.2384  0.2673  -0.0061  0.1573   742.8001  0.5838  0.2809  -0.6497  1.2399  -0.0261  1.0818
# s.e.  0.1354  0.1150  0.1363   0.0650  0.0451    97.1676  0.1700  0.1776   0.1468  0.1695   0.1710  0.2394
#       xreg7   xreg8    xreg9  xreg10   xreg11
#      0.2540  0.3065  -0.4052  0.3511  -0.4577
# s.e.  0.1706  0.1665   0.1450  0.1791   0.1709
# sigma^2 estimated as 82982:  log likelihood=-7073.9  AIC=14183.8   AICc=14184.5   BIC=14272.14

# 7. Perform ARIMAX modeling on epochs_aqi_data[1,,]; report (p,d,q) parameters and quality metrics
fitArimaX_orig <- auto.arima(epochs_aqi_data[1, , 1], xreg= epochs_aqi_data[1, , 2:12])
fitArimaX_orig
## Series: epochs_aqi_data[1, , 1] 
## Regression with ARIMA(0,1,4) errors 
## 
## Coefficients:
##           ma1      ma2     ma3      ma4   xreg1   xreg2   xreg3   xreg4
##       -0.6227  -0.0504  0.0067  -0.2011  0.0788  6.2927  0.0893  0.0180
## s.e.   0.0327   0.0366  0.0396   0.0307  0.0212  1.6588  0.0553  0.0152
##         xreg5    xreg6   xreg7   xreg8   xreg9  xreg10    xreg11
##       -0.0522  -0.0127  0.1809  0.1788  6.4129  1.7568  -11.9521
## s.e.   0.0206   0.0256  0.0273  0.0118  0.7105  0.2722    1.8927
## 
## sigma^2 estimated as 2291:  log likelihood=-5275.11
## AIC=10582.23   AICc=10582.78   BIC=10660.73
# Regression with ARIMA(1,1,4) errors 
# Coefficients:
#         ar1      ma1     ma2     ma3      ma4   xreg1   xreg2   xreg3   xreg4    xreg5    xreg6   xreg7
#      0.2765  -0.8891  0.1268  0.0304  -0.1766  0.0804  6.1495  0.0986  0.0163  -0.0482  -0.0110  0.1833
#s.e.  0.1294   0.1272  0.0933  0.0450   0.0384  0.0213  1.6611  0.0554  0.0152   0.0207   0.0257  0.0274
#       xreg8   xreg9  xreg10    xreg11
#      0.1765  6.5374  1.7939  -12.0697
#s.e.  0.0118  0.7141  0.2724    1.8905
# sigma^2 estimated as 2287:  log likelihood=-5273.65   AIC=10581.29   AICc=10581.92   BIC=10664.71

# 8. Compare the analytics results from #3, #6, and #7

# Generate a table with results

### correlations
cor_orig_obs <- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean, 
    epochs_aqi_data[2, c(1:200), 1]), digits=3); cor_orig_obs
## [1] "-0.0114"
cor_orig_nil <- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean, 
    forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean), digits=3); cor_orig_nil
## [1] "0.077"
cor_orig_avg <- format(cor(forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean, 
    forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean), digits=3); cor_orig_avg
## [1] "0.309"
### plots
plot(forecast(fitArimaX_orig, xreg = epochs_aqi_data[1, 800:1000, 2:12]), 
     main = sprintf("ARIMAX Model Forecasting (1,001:1,200): Corr(TrueObs,Orig)=%s", cor_orig_obs),
     xlim=c(800, 1200), ylim = c(500, 2000), col="black", lwd = 1, lty=1)
#lines(c(1001:1200), forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean, 
#      col = "black", lwd = 1, lty=1)  # Original=True Phase reconstruction
lines(c(1001:1200), forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean, 
      col = "purple", lwd = 1, lty=1)
lines(c(1001:1200), forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean, 
      col = "red", lwd = 1, lty=1)
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "green", lwd = 1, lty=1)  
legend("topleft", bty="n", legend=c(
  sprintf("Original (Correct Phases): Corr(Orig, TrueObs)=%s", cor_orig_obs), 
  sprintf("Prediction via Nil-Phase Reconstruction: Corr(Orig, Nil)=%s", cor_orig_nil), 
  sprintf("Prediction via Average-Phase Reconstruction: Corr(Orig, Avg)=%s", cor_orig_avg), 
  sprintf("Real CO Timeseries (Epoch 2)"), 
  "Orig (True Phase) ARIMA(1,1,4) Model Forecast"), 
  col=c("black", "purple", "red", "green", "blue"), lty=c(1,1,1,1), lwd=c(2,2,2,2), cex=0.9, 
  x.intersp=1, text.width=c(0.085,0.235,0.35, 0.3), xjust=0, yjust=0)
## Warning in w0 * rep.int(0:(ncol - 1), rep.int(n.legpercol, ncol)): longer object
## length is not a multiple of shorter object length

#### Zoom in
plot(forecast(fitArimaX_orig, xreg = epochs_aqi_data[1, 800:1000, 2:12]), 
     main = sprintf("ARIMAX Model Forecasting (1,001:1,200): Corr(TrueObs,Orig)=%s", cor_orig_obs),
     xlim=c(950, 1050), ylim = c(500, 2000), col="black", lwd = 1, lty=1)
#lines(c(1001:1200), forecast(fitArimaX_orig, xreg = epochs_aqi_data[2, c(801:1000), 2:12])$mean, 
#      col = "black", lwd = 1, lty=1)  # Original=True Phase reconstruction
lines(c(1001:1200), forecast(fitArimaX_nil, xreg = ift_NilPhase_X2mag[801:1000, 2:12])$mean, 
      col = "purple", lwd = 1, lty=1)
lines(c(1001:1200), forecast(fitArimaX_avg, xreg = ift_AvgPhase_X2mag[801:1000, 2:12])$mean, 
      col = "red", lwd = 1, lty=1)
lines(c(1001:1200), epochs_aqi_data[2, c(1:200), 1], col = "green", lwd = 1, lty=1)  
legend("topleft", bty="n", legend=c(
  sprintf("Original (Correct Phases): Corr(Orig, TrueObs)=%s", cor_orig_obs), 
  sprintf("Prediction via Nil-Phase Reconstruction: Corr(Orig, Nil)=%s", cor_orig_nil), 
  sprintf("Prediction via Average-Phase Reconstruction: Corr(Orig, Avg)=%s", cor_orig_avg), 
  sprintf("Real CO Timeseries (Epoch 2)"), 
  "Orig (True Phase) ARIMA(1,1,4) Model Forecast"), 
  col=c("black", "purple", "red", "green", "blue"), lty=c(1,1,1,1), lwd=c(2,2,2,2), cex=0.9, 
  x.intersp=1, text.width=c(0.085,0.235,0.35, 0.3), xjust=0, yjust=0)
## Warning in w0 * rep.int(0:(ncol - 1), rep.int(n.legpercol, ncol)): longer object
## length is not a multiple of shorter object length

An alternative data analytic approach involves using the Fourier transform applied to the complete 2D data-matrix (rows=time, columns=features), inverting it back in spacetime, and investigating the effect of the timeseries analysis with and without using the correct kime-directions (phases). Knowing the kime-directions is expected to produce better analytical results (e.g., lower bias and lower dispersion).

dim(aqi_data)
## [1] 9471   17
# 1. Transform the 2D matrix to k-space (Fourier domain)
aqi_data1 <- aqi_data[ , 3:15]   # remove string columns
aqi_data_complete <- as.matrix(aqi_data1[complete.cases(aqi_data1), ])
dim(aqi_data_complete)    # ; display(aqi_data_complete, method = "raster") 
## [1] 9471   13
FT_aqi_data <- fft(aqi_data_complete)
X2 <- FT_aqi_data  # display(FT_aqi_data, method = "raster") 
mag_FT_aqi_data <- sqrt(Re(X2)^2+Im(X2)^2) 
  # plot(log(fftshift1D(Re(X2_mag))), main = "log(Magnitude(FFT(timeseries)))") 
phase_FT_aqi_data <- atan2(Im(X2), Re(X2)) 

### Test the process to confirm calculations
# X2<-FT_aqi_data; X2_mag <- mag_FT_aqi_data; X2_phase<-phase_FT_aqi_data
# Real2 = X2_mag * cos(X2_phase)
# Imaginary2 = X2_mag * sin(X2_phase)
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5,10] - aqi_data1[5, 10]) < 0.001, "Perfect Syntesis", "Problems!!!")
#######

# 2. Invert back to spacetime the epochs_aqi_data_1 signal with nil phase
Real = mag_FT_aqi_data * cos(0)  # cos(phase_FT_aqi_data)
Imaginary = mag_FT_aqi_data * sin(0)   # sin(phase_FT_aqi_data)
ift_NilPhase_X2mag = Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_aqi_data))
# display(ift_NilPhase_X2mag, method = "raster")
# dim(ift_NilPhase_X2mag); View(ift_NilPhase_X2mag); # compare to View(aqi_data1)
summary(aqi_data_complete); summary(ift_NilPhase_X2mag, method = "raster")
##      CO.GT.        PT08.S1.CO.      NMHC.GT.         C6H6.GT.       
##  Min.   :-200.0   Min.   :-200   Min.   :-200.0   Min.   :-200.000  
##  1st Qu.:   0.6   1st Qu.: 915   1st Qu.:-200.0   1st Qu.:   3.900  
##  Median :   1.5   Median :1050   Median :-200.0   Median :   7.800  
##  Mean   : -33.8   Mean   :1036   Mean   :-157.2   Mean   :   1.843  
##  3rd Qu.:   2.6   3rd Qu.:1218   3rd Qu.:-200.0   3rd Qu.:  13.500  
##  Max.   :  11.9   Max.   :2040   Max.   :1189.0   Max.   :  63.700  
##  PT08.S2.NMHC.       NOx.GT.        PT08.S3.NOx.       NO2.GT.       
##  Min.   :-200.0   Min.   :-200.0   Min.   :-200.0   Min.   :-200.00  
##  1st Qu.: 704.0   1st Qu.:  47.0   1st Qu.: 631.0   1st Qu.:  51.00  
##  Median : 890.0   Median : 139.0   Median : 791.0   Median :  95.00  
##  Mean   : 883.8   Mean   : 166.6   Mean   : 785.4   Mean   :  57.45  
##  3rd Qu.:1102.0   3rd Qu.: 281.5   3rd Qu.: 957.0   3rd Qu.: 132.00  
##  Max.   :2214.0   Max.   :1479.0   Max.   :2683.0   Max.   : 340.00  
##   PT08.S4.NO2.   PT08.S5.O3.           T                  RH         
##  Min.   :-200   Min.   :-200.0   Min.   :-200.000   Min.   :-200.00  
##  1st Qu.:1171   1st Qu.: 689.0   1st Qu.:  10.700   1st Qu.:  33.50  
##  Median :1440   Median : 936.0   Median :  17.000   Median :  48.10  
##  Mean   :1375   Mean   : 963.3   Mean   :   9.661   Mean   :  39.01  
##  3rd Qu.:1658   3rd Qu.:1250.0   3rd Qu.:  24.000   3rd Qu.:  61.70  
##  Max.   :2775   Max.   :2523.0   Max.   :  44.600   Max.   :  88.70  
##        AH           
##  Min.   :-200.0000  
##  1st Qu.:   0.6768  
##  Median :   0.9711  
##  Mean   :  -6.7553  
##  3rd Qu.:   1.2915  
##  Max.   :   2.2310
##      CO.GT.       PT08.S1.CO.        NMHC.GT.         C6H6.GT.      
##  Min.   : 1143   Min.   :-185.1   Min.   :-392.3   Min.   : -65.27  
##  1st Qu.: 1789   1st Qu.: 138.1   1st Qu.: 188.4   1st Qu.: 228.36  
##  Median : 1982   Median : 213.8   Median : 254.5   Median : 298.23  
##  Mean   : 2100   Mean   : 218.9   Mean   : 257.7   Mean   : 304.04  
##  3rd Qu.: 2223   3rd Qu.: 291.7   3rd Qu.: 324.4   3rd Qu.: 373.90  
##  Max.   :41122   Max.   :3787.7   Max.   :1323.0   Max.   :2876.22  
##  PT08.S2.NMHC.        NOx.GT.         PT08.S3.NOx.        NO2.GT.       
##  Min.   : -93.41   Min.   :  32.15   Min.   :-183.04   Min.   :-183.04  
##  1st Qu.:  97.44   1st Qu.: 392.22   1st Qu.:  22.36   1st Qu.:  22.36  
##  Median : 152.62   Median : 458.39   Median :  78.28   Median :  78.28  
##  Mean   : 167.31   Mean   : 476.19   Mean   :  85.98   Mean   :  85.98  
##  3rd Qu.: 217.35   3rd Qu.: 533.20   3rd Qu.: 139.73   3rd Qu.: 139.73  
##  Max.   :4611.64   Max.   :7979.90   Max.   :3896.77   Max.   :3896.77  
##   PT08.S4.NO2.      PT08.S5.O3.            T                 RH        
##  Min.   :  32.15   Min.   : -93.41   Min.   : -65.27   Min.   :-392.3  
##  1st Qu.: 392.22   1st Qu.:  97.44   1st Qu.: 228.36   1st Qu.: 188.4  
##  Median : 458.39   Median : 152.62   Median : 298.23   Median : 254.5  
##  Mean   : 476.19   Mean   : 167.31   Mean   : 304.04   Mean   : 257.7  
##  3rd Qu.: 533.20   3rd Qu.: 217.35   3rd Qu.: 373.90   3rd Qu.: 324.4  
##  Max.   :7979.90   Max.   :4611.64   Max.   :2876.22   Max.   :1323.0  
##        AH        
##  Min.   :-185.1  
##  1st Qu.: 138.1  
##  Median : 213.8  
##  Mean   : 218.9  
##  3rd Qu.: 291.7  
##  Max.   :3787.7
# 3. Perform 2D modeling of ift_NilPhase_X2mag, e.g., 
###   LASSO "CO.GT." ~ "PT08.S1.CO."+"NMHC.GT."+"C6H6.GT."+"PT08.S2.NMHC."+"NOx.GT."+"PT08.S3.NOx."+ "NO2.GT."+"PT08.S4.NO2."+"PT08.S5.O3."+"T"+"RH"+"AH"
library(glmnet)
## Warning: package 'glmnet' was built under R version 4.1.2
## Loading required package: Matrix
## Loaded glmnet 4.1-3
library(arm)
## Warning: package 'arm' was built under R version 4.1.2
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## Loading required package: lme4
## 
## arm (Version 1.12-2, built: 2021-10-15)
## Working directory is E:/Ivo.dir/Eclipse_Projects/HTML5_WebSite/TCIU/Chapter6
library(knitr) 
## Warning: package 'knitr' was built under R version 4.1.2
y <- ift_NilPhase_X2mag[ , 1]      # CO as outcome variable
X <- ift_NilPhase_X2mag[ , 2:13]   # remaining features are predictors

set.seed(1234)
train = sample(1 : nrow(X), round((4/5) * nrow(X)))
test = -train

# subset training data
yTrain = y[train]
XTrain = X[train, ]
# subset test data
yTest = y[test]
XTest = X[test, ]

#### Model Estimation & Selection ####
# Estimate models: glmnet automatically standardizes the predictors
fitRidge = glmnet(XTrain, yTrain, alpha = 0)  # Ridge Regression
fitLASSO = glmnet(XTrain, yTrain, alpha = 1)  # The LASSO

### Plot Solution Path #### LASSO
plot(fitLASSO, xvar="lambda", label="TRUE")  # add label to upper x-axis
mtext("(Nil-Phase) LASSO regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

### Plot Solution Path #### Ridge
plot(fitRidge, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(Nil-Phase) Ridge regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

#### 10-fold cross validation ##### LASSO
library("glmnet")
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
## Loading required package: parallel
registerDoParallel(6)
set.seed(1234)  # set seed 
cvLASSO = cv.glmnet(XTrain, yTrain, alpha = 1, parallel=TRUE) # (10-fold) cross validation for the LASSO
plot(cvLASSO); mtext("(Nil-Phase) CV LASSO: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

### Predict and Report LASSO MSE 
predLASSO <-  predict(cvLASSO, s = cvLASSO$lambda.1se, newx = XTest)
testMSE_LASSO <- mean((predLASSO - yTest)^2); testMSE_LASSO
## [1] 38880.76
# LASSO coefficient estimates 
betaHatLASSO = as.double(coef(fitLASSO, s = cvLASSO$lambda.1se))  # s is lambda
coefplot(betaHatLASSO[2:12], sd = rep(0, 11), pch=1, col.pts = "red", cex.pts = 2)
legend("bottomright", "(Nil-Phase) LASSO", col = "blue", bty = "o", cex = 1)

# 4. Perform LASSO modeling on ift_TruePhase_X2mag 
X2<-FT_aqi_data; X2_mag <- mag_FT_aqi_data; X2_phase<-phase_FT_aqi_data
Real2 = X2_mag * cos(X2_phase)
Imaginary2 = X2_mag * sin(X2_phase)
man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))

y3 <- man_hat_X2[ , 1]      # CO as outcome variable
X3 <- man_hat_X2[ , 2:13]   # remaining features are predictors

# subset training data
y3Train = y3[train]
X3Train = X3[train, ]
# subset test data
y3Test = y3[test]
X3Test = X3[test, ]

#### Model Estimation & Selection ####
# Estimate models: glmnet automatically standardizes the predictors
fitRidge3 = glmnet(X3Train, y3Train, alpha = 0)  # Ridge Regression
fitLASSO3 = glmnet(X3Train, y3Train, alpha = 1)  # The LASSO

### Plot Solution Path #### LASSO
plot(fitLASSO3, xvar="lambda", label="TRUE")  # add label to upper x-axis
mtext("(True-Phase) LASSO regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

### Plot Solution Path #### Ridge
plot(fitRidge3, xvar="lambda", label="TRUE") # add label to upper x-axis
mtext("(True-Phase) Ridge regularizer: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

#### 10-fold cross validation ##### LASSO
library("glmnet")
library(doParallel)
registerDoParallel(6)
set.seed(1234)  # set seed 
cvLASSO3 = cv.glmnet(X3Train, y3Train, alpha = 1, parallel=TRUE) # (10-fold) cross validation for the LASSO
plot(cvLASSO3); mtext("(True-Phase) CV LASSO: Number of Nonzero (Active) Coefficients", side=3, line=2.5)

### Predict and Report LASSO MSE 
predLASSO3 <-  predict(cvLASSO3, s = cvLASSO3$lambda.1se, newx = X3Test)
testMSE_LASSO3 <- mean((predLASSO3 - y3Test)^2); testMSE_LASSO3
## [1] 2876.822
# LASSO coefficient estimates 
betaHatLASSO3 = as.double(coef(fitLASSO3, s = cvLASSO3$lambda.1se))  # s is lambda
coefplot(betaHatLASSO3[2:12], sd = rep(0, 11), pch=1, col.pts = "red", cex.pts = 2)
legend("bottomright", "(True-Phase)LASSO", col = "blue", bty = "o", cex = 1.5)

# 5. Compare the analytics results from #3, #6, and #7

# Stop local cluster
stopImplicitCluster()
SOCR Resource Visitor number Web Analytics SOCR Email