1 TCIU Kime ARIMAX on ED Econ Data

library(dplyr)
library(ggplot2)
library(plot3D)
library(scatterplot3d)
library(plotly)
library(forecast)
library(ggplot2)
library(reshape2)
library(tidyr)
library(TSplotly)

load("Fig1.8_to_1.12.RData")

1.1 Data Source Type

MCSI_Data <- read.csv(unzip("~/UM_Michigan_Consumer_Sentiment_Index_Data.csv.zip"))

dim(MCSI_Data)

## [1] 279242    107

# [1] 279242    107

# extract features
# ICS                INDEX OF CONSUMER SENTIMENT
# ICC                INDEX OF CURRENT ECONOMIC CONDITIONS                
# GOVT               GOVERNMENT ECONOMIC POLICY
# DUR                DURABLES BUYING ATTITUDES 
# HOM                HOME BUYING ATTITUDES
# CAR                VEHICLE BUYING ATTITUDES
# INCOME             TOTAL HOUSEHOLD INCOME - CURRENT DOLLARS
# AGE                AGE OF RESPONDENT
# EDUC               EDUCATION OF RESPONDENT


DT::datatable(head(MCSI_Data,1000), fillContainer = TRUE)

MCSI_Data_short <- 
  MCSI_Data[ , which(names(MCSI_Data) %in% 
                       c("YYYYMM", "ICS", "ICC", "GOVT", "DUR",
                         "HOM", "CAR", "INCOME", "AGE", "EDUC"))]
dim(MCSI_Data_short)

## [1] 279242     10

# [1] 279242      10
# Compute the monthly avarages across all participants
MCSI_Data_monthAvg <- aggregate(.~YYYYMM, data = MCSI_Data_short, mean)
dim(MCSI_Data_monthAvg)

## [1] 492  10

# [1] 492  10
# colnames(MCSI_Data_monthAvg)

# convert YYYMM to Date
MCSI_Data_monthAvg$YYYYMM <- 
  as.Date(as.character(
    as.POSIXct(paste0(as.character(MCSI_Data_monthAvg$YYYYMM),"01"), format = "%Y%m%d")))
head(MCSI_Data_monthAvg$YYYYMM)

## [1] "1978-01-01" "1978-02-01" "1978-03-01" "1978-04-01" "1978-05-01"
## [6] "1978-06-01"

# View(MCSI_Data_monthAvg)

# plot all features on the same plot (across time/months)

MCSI_Data_monthAvg_melt <- 
  melt(MCSI_Data_monthAvg , id.vars = 'YYYYMM', variable.name = 'series')

1.2 Figure 1.5

# plot on same grid, each series colored differently -- 
# good if the series have same scale
#ggplot(MCSI_Data_monthAvg_melt, aes(YYYYMM, value), log10="y") + 
#  geom_line(aes(colour = series)) 
# Exclude INCOME as its too large!
ggplot(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ], 
       aes(YYYYMM, value)) + 
    geom_line(aes(linetype=series, colour = series), size=2) +
    geom_point(aes(shape=series, colour = series), size=0.3) +
      # geom_line(aes(colour = series)) +
    geom_smooth(aes(colour = series), se = TRUE) +
    coord_trans(y="log10") +
    xlab("Time (monthly)") + ylab("Index Values (log-scale)") +
    # scale_x_date(labels = date_label("%m-%Y")) +
    scale_x_date(date_breaks = "12 month", date_labels =  "%m-%Y")  +
    theme(axis.text.x = element_text(angle = 45, hjust = 1),
          text = element_text(size=20))+
    theme(legend.position="top")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Interactive plot here

updatemenus <- list(
  list(
    xanchor="left",
    yanchor="top",
    active = -1,
    type= 'buttons',
    buttons = list(
      list(
        label = "ALL",
        method = "update",
        args = list(list(visible = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)),
                    list(title = "All Indexes"))),
      list(
        label = "ICS",
        method = "update",
        args = list(list(visible = c(FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "ICS"))),
      list(
        label = "ICC",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "ICC"))),
      list(
        label = "GOVT",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "GOVT"))),
      list(
        label = "DUR",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE)),
                    list(title = "DUR"))),
      list(
        label = "HOM",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE)),
                    list(title = "HOM"))),
      list(
        label = "CAR",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)),
                    list(title = "CAR"))),
      list(
        label = "AGE",
        method = "update",
        args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE)),
                    list(title = "AGE"))),
      list(
        label = "EDUC",
        method = "update",
        args = list(list(visible = c(TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
                    list(title = "EDUC")))
      )
  )
)
PYdf<-GtoP_trans(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ],NAME="series",X="YYYYMM",Y="value")

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.5.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

PYdf$INCOME<-NULL
PYdf<-log10(PYdf)
plot_ly(type="scatter",mode="lines")%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICS,name="ICS",line=list(color="powderblue"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICC,name="ICC",line=list(color="red"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$GOVT,name="GOVT",line=list(color="green"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$DUR,name="DUR",line=list(color="orange"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$HOM,name="HOM",line=list(color="purple"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$CAR,name="CAR",line=list(color="pink"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$AGE,name="AGE",line=list(color="brown"))%>%
  add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$EDUC,name="EDUC",line=list(color="black"))%>%
  layout(title= list(text="Time series for 8 indexes",font=list(family = "Times New Roman",size = 16,color = "black" )),
           paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
           xaxis = list(title ="Time (monthly)",
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE),
           yaxis = list(title = "Index Values (log10-scale)",
                        gridcolor = 'rgb(255,255,255)',
                        showgrid = TRUE,
                        showline = FALSE,
                        showticklabels = TRUE,
                        tickcolor = 'rgb(127,127,127)',
                        ticks = 'outside',
                        zeroline = FALSE),
         updatemenus=updatemenus)

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.

## Warning: Removed 984 rows containing missing values (geom_point).

## png 
##   2

# Next, we want to take this example one step further and demonstrate the foundation of
# spacekime analytics using one oversimplified example that illustrates the tradeoffs
# between traditional spacetime inference and their spacekime analytical counterparts.

colnames(MCSI_Data_monthAvg)

##  [1] "YYYYMM" "ICS"    "ICC"    "GOVT"   "DUR"    "HOM"    "CAR"   
##  [8] "INCOME" "AGE"    "EDUC"

MCSI_Data_monthAvg_ts <- 
  ts(MCSI_Data_monthAvg, start=c(1978,1), end=c(2018, 12), frequency = 12)
class(MCSI_Data_monthAvg_ts)

## [1] "mts"    "ts"     "matrix"

dim(MCSI_Data_monthAvg_ts)

## [1] 492  10

# [1] 492  10
# Time-series plots: 41 years * 12 months = 492 time points
plot.ts(MCSI_Data_monthAvg_ts)

plot(MCSI_Data_monthAvg_ts, plot.type="m")

# See MCSI docs: https://data.sca.isr.umich.edu/fetchdoc.php?docid=45121
# Outcome to Predict: ICS = INDEX OF CONSUMER SENTIMENT
Y <- MCSI_Data_monthAvg$ICS

# Covariates explaining the longitudinal outcome: 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X <- cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
           MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
           MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC)

# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y <- ts(Y, start=c(1978,1), end=c(2018, 12), frequency = 12)

############################################################################
# Fit ARIMAX model on entire dataset, validate on prospective data (2019-2020) using (noisy) simulated XReg
modArima <- auto.arima(MCSI_Data_monthAvg_ts_Y, xreg=X)
modArima

## Series: MCSI_Data_monthAvg_ts_Y 
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sma1  intercept     xreg1
##       1.4289  -0.4577  -0.6788  0.5851  -0.5346   151.4591  -11.2435
## s.e.  0.1885   0.1704   0.1649  0.3503   0.3637     8.4220    1.1118
##          xreg2    xreg3    xreg4  xreg5   xreg6
##       -10.7791  -2.7576  -0.9281  0e+00  1.5743
## s.e.    1.0010   0.9115   0.9593  1e-04  1.8224
## 
## sigma^2 estimated as 8.266:  log likelihood=-1212.44
## AIC=2450.88   AICc=2451.64   BIC=2505.46

# Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
# Coefficients:
#         ar1      ar2      ma1    sar1     sma1  intercept     xreg1     xreg2    xreg3    xreg4  xreg5
#      1.4289  -0.4577  -0.6788  0.5851  -0.5346   151.4591  -11.2435  -10.7791  -2.7576  -0.9281  0e+00
#s.e.  0.1885   0.1704   0.1649  0.3503   0.3637     8.4220    1.1118    1.0010   0.9115   0.9593  1e-04
#       xreg6
#      1.5743
#s.e.  1.8224
# sigma^2 estimated as 8.266:  log likelihood=-1212.44
# AIC=2450.88   AICc=2451.64   BIC=2505.46

# Predict prospective ICS = INDEX OF CONSUMER SENTIMENT over next 2 years
pred_length <- 2 * 12 # 2-years of monthly forward forecasting (2017-2018)
start_period <- 468  # 39 years (2017-1978)*12

# Noise-generating function to generate some synthetic predictors (X_new) and test the ARIMA(2,0,1)(1,0,1)[12] model
noiseGen <- function(data) { 
  if (is.vector(data)) 
      return( data + rnorm(length(data), mean(data), sd(data))) 
  else 
      return(data + matrix(rnorm(dim(data)[1] * dim(data)[2], 0, 1), dim(data)[1]))
}

X_new <- noiseGen(X[(start_period+1):(start_period+24), ])  # just noise-corrupt the months 301:324 predictors

for (i in 1:6){
  # just noise-corrupt the months 468:492 predictors
  X_new[ ,i] <- noiseGen(X[(start_period+1):(start_period+24), i])  
}
# Check X_new, relative to X
# head (X[(start_period+1):(start_period+24), ])
# head(X_new)
# dim(X)
# dim(X_new)

pred2years_ICS <- predict(modArima, n.ahead = pred_length, newxreg = X_new)
#plot(pred2years_ICS$pred, main="Forward time-series predictions (fitArimaX)")
# Plot data (4-yrs) and prediciton model, CI's (prospective 2-yrs)
plot(forecast(modArima, xreg = X_new), lwd=2, include=48, xlab="Time", 
     ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
     main = "(2020-2021) Forecasting US ICS based on ARIMAX(2,0,1) Model 
     \n using prospective regression estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")

# autoplot(forecast(modArima, xreg = X_new))

Interactive plot here

TSplot(48,modArima,X_new,TITLE = "(2020-2021) Forecasting US ICS based on ARIMAX(2,0,1) Model using prospective regression estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Xlab ="Time",Ylab = "ICS = INDEX OF CONSUMER SENTIMENT (US)",title_size = 8,ts_original = "Original time series",ts_forecast = "Predicted time series")

################################################################################################
# Fit model only on 39 years (1978-2016), and Evaluate on testing data (2017-2018)
Y_train <- MCSI_Data_monthAvg$ICS[1:(39*12)]
length(Y_train)/12

## [1] 39

Y_test <- MCSI_Data_monthAvg$ICS[((39*12)+1):length(MCSI_Data_monthAvg$ICS)] 
length(Y_test)/12

## [1] 2

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X_train <- 
  as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
                      MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
                      MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC))[1:(39*12), ]
dim(X_train)

## [1] 468   6

X_test <- 
  as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR, 
                      MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR, 
                      MCSI_Data_monthAvg$INCOME,
                      MCSI_Data_monthAvg$EDUC))[((39*12)+1):length(MCSI_Data_monthAvg$ICS), ]
dim(X_test)

## [1] 24  6

# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train <- ts(Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
MCSI_Data_monthAvg_ts_Y_test <- ts(Y_test, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train <- auto.arima(MCSI_Data_monthAvg_ts_Y_train, xreg=as.matrix(X_train))

modArima_train

## Series: MCSI_Data_monthAvg_ts_Y_train 
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sma1  intercept        V1
##       1.4447  -0.4737  -0.6944  0.5781  -0.5307   152.4668  -11.4417
## s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.6771    1.1624
##             V2       V3       V4     V5      V6
##       -10.9797  -2.8305  -0.7844  0e+00  1.6079
## s.e.    1.0447   0.9301   1.0023  1e-04  1.8702
## 
## sigma^2 estimated as 8.564:  log likelihood=-1161.29
## AIC=2348.59   AICc=2349.39   BIC=2402.52

# Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
# Coefficients:
#         ar1      ar2      ma1    sar1     sma1  intercept        V1        V2       V3       V4     V5
#      1.4447  -0.4737  -0.6944  0.5781  -0.5307   152.4668  -11.4417  -10.9797  -2.8305  -0.7844  0e+00
#s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.6771    1.1624    1.0447   0.9301   1.0023  1e-04
#          V6
#      1.6079
#s.e.  1.8702
# sigma^2 estimated as 8.564:  log likelihood=-1161.29
# AIC=2348.59   AICc=2349.39   BIC=2402.52

pred2years_ICS <- predict(modArima_train, n.ahead = pred_length, frequency=12, newxreg = X_test)
#plot(pred2years_ICS$pred, main="Forward time-series predictions (fitArimaX)")
# pred_arimax_2_0_1 <- forecast(modArima_train, xreg = X_test)
# pred_arimax_2_0_1_2017_2018 <- 
#   ts(pred_arimax_2_0_1$fitted, frequency=12, start=c(2017,1), end=c(2018,12))

1.3 Figure 1.6

# Plot ICS training Data, Testing ICS and Predicted ICS
plot(forecast(modArima_train, xreg = as.matrix(X_test) ), lwd=2, xlab="Time", 
     ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
     main = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
      \n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)  
legend(1990, 60, bty="n", legend=c("Training ICS Data (1978-2016)", 
                                   "ARIMAX-model ICS Forecasting (2017-2018)",
                                   "MCSI Official ICS (2017-2018)"),
       col=c("black", "blue", "red"), 
       lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=0.5, y.intersp=0.7)

Interactive Plot here

newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")

TSplot("all",modArima_train,as.matrix(X_test),TITLE = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (1978-2016)",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)")%>%
  add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="grey"))

## png 
##   2

# Zoom in on a smaller interval (4-years prior and 2-years forecasting)
plot(forecast(modArima_train, xreg = as.matrix(X_test) ), include=48, lwd=2, xlab="Time", 
     ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
     main = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
      \n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)  
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                                   "ARIMAX-model ICS Forecasting (2017-2018)",
                                   "MCSI Official ICS (2017-2018)"),
       col=c("black", "blue", "red"), 
       lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=1.5, y.intersp=0.7)
text(2014, 91, "Training Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6", cex=1.5)
text(2018, 79, "Validation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6", cex=1.5)

Interactive plot here

newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")

TSplot(48,modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="grey"))

##############################################################################################
# FT/Spacekime Analytics
# 1D timeseries FFT SHIFT
fftshift1D <- function(img_ff) {
  rows <- length(img_ff)   
  rows_half <- ceiling(rows/2)
  return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}

# 1. Transform all 6 predictors (X) and outcome (Y) series to k-space (Fourier domain)
# Transform entire Xreg matrix (12*39 = 468 PLUS 12*2 = 24, a total of 492 time points: Training + Testing)
# But transform only the Training Y outcome (12*39 = 468 time points: Training)
MCSI_data <- cbind(Y_train, X_train)
dim(MCSI_data)

## [1] 468   7

colnames(MCSI_data) <- c("Y_train", "GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
#head(MCSI_data)
colnames(X_test) <- c("GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
head(X_test)

##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300

#   MCSI_data_numeric <- as.matrix(MCSI_data)  # remove column names for FT processing

FT_MCSI_data <- array(complex(), c(468, 7))
mag_FT_MCSI_data <- array(complex(), c(468, 7))
phase_FT_MCSI_data <- array(complex(), c(468, 7))
IFT_NilPhase_FT_MCSI_data <- cbind(Y_train, X_train)        # array( , c(468, 7))
IFT_ScramblePhase_FT_MCSI_data <- cbind(Y_train, X_train)   # array( , c(468, 7))
IFT_LoadedPhase_FT_MCSI_data <- cbind(Y_train, X_train)   # array( , c(468, 7))

for (i in 1:7) {
  FT_MCSI_data[ , i] <- fft(MCSI_data[ , i])
  X2 <- FT_MCSI_data[ , i]
  # plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))") 
  mag_FT_MCSI_data[ , i] <- sqrt(Re(X2)^2+Im(X2)^2); 
  # plot(log(fftshift1D(Re(mag_FT_MCSI_data))), main = "log(Magnitude(FFT(timeseries)))") 
  phase_FT_MCSI_data[ , i] <- atan2(Im(X2), Re(X2)); 
  # plot(Re(fftshift1D(phase_FT_MCSI_data[ , 1])), main = "Shift(Phase(FFT(timeseries)))")
}

######### Test the FT-IFT analysis-synthesis back-and-forth transofrm process to confirm calculations
#           X2 <- FT_MCSI_data[ , 1]; mag_FT_MCSI_data[ , 1] <- sqrt(Re(X2)^2+Im(X2)^2); 
#           phase_FT_MCSI_data[ , 1] <- atan2(Im(X2), Re(X2)); 
# Real2 = mag_FT_MCSI_data[ , 1] * cos(phase_FT_MCSI_data[ , 1])
# Imaginary2 = mag_FT_MCSI_data[ , 1] * sin(phase_FT_MCSI_data[ , 1])
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5] - MCSI_data[5, 1]) < 0.001, "Perfect Syntesis", "Problems!!!")
#########

# Examine the Kime-direction Distributions of the Phases for all 7 features (predictors + outcome)
df.phase_FT_MCSI_data <- as.data.frame(Re(phase_FT_MCSI_data))
df.phase_FT_MCSI_data %>% 
  gather() %>% 
  head()

##   key     value
## 1  V1  0.000000
## 2  V1 -2.845538
## 3  V1 -1.074318
## 4  V1  1.744811
## 5  V1  0.832915
## 6  V1  1.627984

colnames(df.phase_FT_MCSI_data) <- colnames(MCSI_data)
colnames(df.phase_FT_MCSI_data)[1] <- "ICS"
phaseDistributions <- gather(df.phase_FT_MCSI_data)
colnames(phaseDistributions) <- c("Feature", "Phase")

1.4 Figure 1.8

# map the value as our x variable, and use facet_wrap to separate by the key column:
ggplot(phaseDistributions, aes(Phase)) + 
  # geom_histogram(bins = 10) + 
  geom_histogram(aes(y=..density..), bins = 10) + 
  facet_wrap( ~Feature, scales = 'free_x') +
  xlim(-pi, pi) + 
  theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))

## png 
##   2

#### Perform the Space-kime transform separately for the X_test data
dim(X_test) # [1] 24  6

## [1] 24  6

colnames(X_test)

## [1] "GOVT"   "DUR"    "HOM"    "CAR"    "INCOME" "EDUC"

head(X_test)

##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300

FT_X_test <- array(complex(), c(24, 6))
mag_FT_X_test <- array(complex(), c(24, 6))
phase_FT_X_test <- array(complex(), c(24, 6))
IFT_NilPhase_FT_X_test <- X_test        # array( , c(24, 6))
IFT_ScramblePhase_FT_X_test <- X_test   # array( , c(24, 6)
IFT_LoadedPhase_FT_X_test <- X_test   # array( , c(24, 6)

for (i in 1:6) {
  FT_X_test[ , i] <- fft(X_test[ , i])
  X2 <- FT_X_test[ , i]
  mag_FT_X_test[ , i] <- sqrt(Re(X2)^2+Im(X2)^2); 
  phase_FT_X_test[ , i] <- atan2(Im(X2), Re(X2)); 
}

##########################################################################################
# 2. Nil-Phase reconstruction
#       Invert back to spacetime the mag_FT_MCSI_data[ , i] & mag_FT_X_test [, i] signals with nil-phase
for (i in 1:7) {
  Real <- mag_FT_MCSI_data[ , i] * cos(0)  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(0) 
  IFT_NilPhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
# display(ift_NilPhase_X2mag, method = "raster")
# dim(IFT_NilPhase_FT_MCSI_data); View(IFT_NilPhase_FT_MCSI_data); # compare to View(MCSI_data[ , ])
}
colnames(IFT_NilPhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_NilPhase_FT_MCSI_data)

## [1] 468   7

dim(FT_MCSI_data)

## [1] 468   7

colnames(IFT_NilPhase_FT_MCSI_data)

## [1] "Y_train" "GOVT"    "DUR"     "HOM"     "CAR"     "INCOME"  "EDUC"

head(IFT_NilPhase_FT_MCSI_data)

##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 207.9947 7.096082 6.361494 7.448879 6.702678 225647.6 6.456717
## 2 158.7957 5.514798 4.119853 5.316651 4.422884 160111.0 5.162829
## 3 141.7572 5.090193 3.742753 4.849988 4.050338 143263.9 5.044055
## 4 133.9254 4.841114 3.482007 4.525654 3.756915 130886.9 4.913845
## 5 127.0829 4.662767 3.358568 4.331847 3.639068 125015.0 4.861882
## 6 122.8747 4.585099 3.284439 4.192362 3.508368 121664.0 4.790130

head(MCSI_data)

##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789

for (i in 1:6) {
  Real <- mag_FT_X_test[ , i] * cos(0)  
  Imaginary <- mag_FT_X_test[ , i] * sin(0) 
  IFT_NilPhase_FT_X_test[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
head(IFT_NilPhase_FT_X_test)

##         GOVT      DUR      HOM      CAR   INCOME     EDUC
## 469 3.751431 2.303827 2.806630 3.152860 119636.8 4.666088
## 470 3.368501 1.872374 2.306036 2.500152 108714.2 4.502943
## 471 3.310008 1.858239 2.217383 2.506473 104066.2 4.498433
## 472 3.286447 1.911739 2.221120 2.385652 105367.7 4.532361
## 473 3.171287 1.801815 2.191204 2.324749 102780.5 4.510567
## 474 3.178573 1.829411 2.063818 2.243006 106010.0 4.490027

head(X_test)

##         GOVT      DUR      HOM      CAR    INCOME     EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389  99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300

# 3. Perform ARIMAX modeling on IFT_NilPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
# library(forecast)
IFT_NilPhase_FT_MCSI_data_Y_train <- IFT_NilPhase_FT_MCSI_data$Y_train[1:(39*12)]
length(Y_train)/12

## [1] 39

# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_NilPhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_NilPhase_FT_MCSI_data$GOVT, IFT_NilPhase_FT_MCSI_data$DUR, 
                      IFT_NilPhase_FT_MCSI_data$HOM, IFT_NilPhase_FT_MCSI_data$CAR, 
                      IFT_NilPhase_FT_MCSI_data$INCOME, 
                      IFT_NilPhase_FT_MCSI_data$EDUC))[1:(39*12), ]
dim(X_train)

## [1] 468   6

colnames(IFT_NilPhase_FT_MCSI_data_X_train) <- colnames(IFT_NilPhase_FT_X_test)

# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase <- 
         ts(IFT_NilPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_NilPhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase,
             xreg=as.matrix(IFT_NilPhase_FT_MCSI_data_X_train))
modArima_train_IFT_NilPhase

## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase 
## Regression with ARIMA(1,0,0)(1,0,0)[12] errors 
## 
## Coefficients:

## Warning in sqrt(diag(x$var.coef)): NaNs produced

##          ar1    sar1  intercept     GOVT     DUR     HOM     CAR  INCOME
##       0.8194  0.1554    26.3831  10.6846  8.6527  2.3138  2.2177   2e-04
## s.e.  0.0316  0.0480     5.9317   1.3501  1.1259  1.1826  1.0817     NaN
##          EDUC
##       -3.5998
## s.e.   1.8950
## 
## sigma^2 estimated as 3.401:  log likelihood=-946.65
## AIC=1913.31   AICc=1913.79   BIC=1954.79

# Regression with ARIMA(1,0,0)(1,0,0)[12] errors 
# Coefficients:
#  ar1    sar1  intercept       V1      V2      V3      V4     V5       V6
# 0.8194  0.1554    26.3831  10.6846  8.6527  2.3138  2.2177  2e-04  -3.5998
# s.e.  0.0316  0.0480     5.9317   1.3501  1.1259  1.1826  1.0817    NaN   1.8950
# sigma^2 estimated as 3.401:  log likelihood=-946.65, AIC=1913.31   AICc=1913.79   BIC=1954.79

pred_arimax_1_0_1 <- forecast(modArima_train_IFT_NilPhase, xreg = as.matrix(IFT_NilPhase_FT_X_test))
pred_arimax_1_0_1_2017_2018 <- 
  ts(pred_arimax_1_0_1$mean, frequency=12, start=c(2017,1), end=c(2018,12))

pred_arimax_1_0_1_2017_2018

##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2017 109.49273  96.86326  94.40218  94.00355  90.78807  90.22039  89.67892
## 2018  86.67948  86.83445  87.33288  87.35519  85.83416  87.25408  88.46184
##            Aug       Sep       Oct       Nov       Dec
## 2017  87.80911  86.21182  87.76334  87.41553  87.34237
## 2018  88.17067  87.68511  90.43932  90.34170  92.14105

# alternatively:
# pred_arimax_1_0_1_2017_2018 <- predict(modArima_train_IFT_NilPhase, 
#                                              n.ahead = pred_length, newxreg = X_test)$pred

####################################################################
# 4. Scrambled-Phase reconstruction
#   Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using scrambled-phases
#   randomly shaffle the rows of the Phases-matrix (Training & Testing XReg Data)
shuffle_phase_FT_MCSI_data <- phase_FT_MCSI_data

for (i in 1:7) {
  set.seed(12345)   # sample randomly Phases for each of the 7 covariates (X & Y)
  shuffle_phase_FT_MCSI_data[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_MCSI_data)), i]
  Real <- mag_FT_MCSI_data[ , i] * cos(Re(shuffle_phase_FT_MCSI_data[ , i]))  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(Re(shuffle_phase_FT_MCSI_data[ , i])) 
  IFT_ScramblePhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_ScramblePhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_ScramblePhase_FT_MCSI_data)

## [1] 468   7

dim(FT_MCSI_data)

## [1] 468   7

colnames(IFT_ScramblePhase_FT_MCSI_data)

## [1] "Y_train" "GOVT"    "DUR"     "HOM"     "CAR"     "INCOME"  "EDUC"

head(IFT_ScramblePhase_FT_MCSI_data)

##    Y_train      GOVT       DUR       HOM         CAR     INCOME      EDUC
## 1 77.44748 -2.637215 -2.480036 -2.284504  0.33511452 -114200.58 -3.387427
## 2 85.39286 -2.513407 -2.625748 -2.451977  0.03138933  -83208.34 -2.977489
## 3 84.07092 -2.506406 -2.529063 -2.526502 -0.06944438  -72815.95 -2.958866
## 4 86.95059 -2.613794 -2.630362 -2.463728 -0.32197237  -68345.87 -2.688935
## 5 92.90797 -2.729300 -2.654001 -2.681969 -0.66209018  -66130.08 -2.817957
## 6 91.22433 -2.657121 -2.554884 -2.437078 -0.50665855  -64333.23 -2.784244

# Perform similar scrambling of the phases separately for X_test (24 * 6) data
shuffle_phase_FT_X_test <- phase_FT_X_test
for (i in 1:6) {
  set.seed(12345)   # sample randomly Phases for each of the 6 predictors covariates (X)
  shuffle_phase_FT_X_test[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_X_test)), i]
  Real <- mag_FT_X_test[ , i] * cos(Re(shuffle_phase_FT_X_test[ , i]))  
  Imaginary <- mag_FT_X_test[ , i] * sin(Re(shuffle_phase_FT_X_test[ , i])) 
  IFT_ScramblePhase_FT_X_test[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
dim(IFT_ScramblePhase_FT_X_test)

## [1] 24  6

head(IFT_ScramblePhase_FT_X_test)

##          GOVT       DUR      HOM       CAR    INCOME       EDUC
## 469 -2.863100 0.6084955 1.850718 0.5141494 103116.73 -0.5176098
## 470 -2.921598 0.6317009 1.830746 0.3558439 105675.98 -0.5179213
## 471 -3.037529 0.6453448 1.974847 0.5262837 110766.41 -0.5206675
## 472 -2.874540 0.4232556 1.717884 0.3813262 101077.52 -0.5157843
## 473 -3.071031 0.6279396 1.798611 0.2153813  98585.91 -0.5217236
## 474 -3.219981 0.6048978 1.958505 0.2836233 104378.21 -0.5034275

#######################################################
# 5. Perform ARIMAX modeling on IFT_ScramblePhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_ScramblePhase_FT_MCSI_data_Y_train <- 
  IFT_ScramblePhase_FT_MCSI_data$Y_train[1:(39*12)]

length(Y_train)/12

## [1] 39

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_ScramblePhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_ScramblePhase_FT_MCSI_data$GOVT, IFT_ScramblePhase_FT_MCSI_data$DUR, 
                      IFT_ScramblePhase_FT_MCSI_data$HOM, IFT_ScramblePhase_FT_MCSI_data$CAR, 
                      IFT_ScramblePhase_FT_MCSI_data$INCOME,
                      IFT_ScramblePhase_FT_MCSI_data$EDUC))[1:(39*12), ]
dim(IFT_ScramblePhase_FT_MCSI_data_X_train)

## [1] 468   6

# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase <- 
  ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_ScramblePhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase,
             xreg=as.matrix(IFT_ScramblePhase_FT_MCSI_data_X_train))

modArima_train_IFT_ScramblePhase

## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase 
## Regression with ARIMA(1,0,2)(2,0,0)[12] errors 
## 
## Coefficients:

## Warning in sqrt(diag(x$var.coef)): NaNs produced

##          ar1      ma1      ma2     sar1    sar2  intercept       V1
##       0.9570  -0.0508  -0.1066  -0.0249  0.0320     62.736  -7.4009
## s.e.  0.0151   0.0511   0.0516   0.0483  0.0497        NaN   1.3351
##            V2       V3       V4   V5      V6
##       -6.2064  -2.1374  -3.2478    0  4.5433
## s.e.   1.0909   1.1224   1.1051  NaN  1.9499
## 
## sigma^2 estimated as 6.45:  log likelihood=-1095.3
## AIC=2216.6   AICc=2217.4   BIC=2270.53

# Regression with ARIMA(1,0,2)(2,0,0)[12] errors 
# Coefficients:
#  ar1      ma1      ma2     sar1    sar2  intercept       V1       V2       V3       V4   V5      V6
#0.9570  -0.0508  -0.1066  -0.0249  0.0320     62.736  -7.4009  -6.2064  -2.1374  -3.2478    0  4.5433
#s.e.  0.0151   0.0511   0.0516   0.0483  0.0497        NaN   1.3351   1.0909   1.1224   1.1051  NaN  1.9499
# sigma^2 estimated as 6.45:  log likelihood=-1095.3 AIC=2216.6   AICc=2217.4   BIC=2270.53

pred_arimax_1_1_3 <- 
  forecast(modArima_train_IFT_ScramblePhase, 
           xreg =as.matrix(IFT_ScramblePhase_FT_X_test))

## Warning in forecast.Arima(modArima_train_IFT_ScramblePhase, xreg =
## as.matrix(IFT_ScramblePhase_FT_X_test)): xreg contains different column
## names from the xreg used in training. Please check that the regressors are
## in the same order.

pred_arimax_1_1_3_2017_2018 <- 
  ts(pred_arimax_1_1_3$mean, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_1_1_3_2017_2018

##           Jan      Feb      Mar      Apr      May      Jun      Jul
## 2017 68.95487 70.20348 70.58534 71.88092 72.60548 73.85421 73.43008
## 2018 74.13513 75.01577 74.08203 75.26787 73.70892 73.61762 74.36375
##           Aug      Sep      Oct      Nov      Dec
## 2017 73.80472 73.40466 72.42727 73.05006 72.67080
## 2018 73.02887 74.04394 73.84603 73.17739 74.30894

#pred_arimax_1_1_3_2017_2018_pred <- predict(modArima_train_IFT_ScramblePhase, n.ahead = pred_length, newxreg = IFT_ScramblePhase_FT_X_test)$pred

######################################################################
# 6. Loaded-Phase reconstruction: Phase' = \pi/8 + Phase
#   Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using Loaded-phases
# define a pi-wrapper calculator to ensure phase-arithmetic stays within [-pi : pi)
pi_wrapper <- function (x) {  
  if(abs(x%%(2*pi)) <= pi) return(x%%(2*pi))
  else if (-2*pi <= x%%(2*pi) & x%%(2*pi) < -pi) return (2*pi + x%%(2*pi))
  else if (pi <= x%%(2*pi) & x%%(2*pi) < 2*pi) return (x%%(2*pi) - 2*pi)
  else return (0)
}

loaded_phase_FT_MCSI_data <- Re(phase_FT_MCSI_data)
for (i in 1:dim(phase_FT_MCSI_data)[1]) {
  for (j in 1:dim(phase_FT_MCSI_data)[2])
  loaded_phase_FT_MCSI_data[i,j] <- pi_wrapper(pi/8 + Re(phase_FT_MCSI_data[i,j]))
}
# head(loaded_phase_FT_MCSI_data)
# Compare Loaded Phases to Native Phases
    df.loadedPhase_FT_MCSI_data <- as.data.frame(loaded_phase_FT_MCSI_data)
    df.loadedPhase_FT_MCSI_data %>% gather() %>% head()

##   key      value
## 1  V1  0.3926991
## 2  V1 -2.4528385
## 3  V1 -0.6816193
## 4  V1  2.1375102
## 5  V1  1.2256141
## 6  V1  2.0206829

    colnames(df.loadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
    colnames(df.loadedPhase_FT_MCSI_data)[1] <- "ICS"
    loadedPhaseDistributions <- gather(df.loadedPhase_FT_MCSI_data)
    colnames(loadedPhaseDistributions) <- c("Feature", "Phase")
    
    # map the value as our x variable, and use facet_wrap to separate by the key column:
    ggplot(loadedPhaseDistributions, aes(Phase)) + 
      geom_histogram(aes(y=..density..), bins = 10) + 
      facet_wrap( ~Feature, scales = 'free_x') +
      xlim(-pi, pi) + 
      theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))

for (i in 1:7) {
  Real <- mag_FT_MCSI_data[ , i] * cos(loaded_phase_FT_MCSI_data[ , i])  
  Imaginary <- mag_FT_MCSI_data[ , i] * sin(loaded_phase_FT_MCSI_data[ , i]) 
  IFT_LoadedPhase_FT_MCSI_data[ ,i] <- 
    Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_LoadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_LoadedPhase_FT_MCSI_data)

## [1] 468   7

dim(FT_MCSI_data)

## [1] 468   7

head(IFT_LoadedPhase_FT_MCSI_data)

##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 81.01240 3.037219 2.083673 2.381744 2.859224 14359.69 3.150231
## 2 83.42135 3.112394 2.443681 2.474077 2.961214 15044.04 3.202782
## 3 76.49093 3.313351 2.239820 2.341237 2.737009 15416.54 3.219355
## 4 78.50332 3.349560 2.217576 2.358080 2.761035 15051.15 3.222312
## 5 79.10811 3.257119 2.210519 2.416597 2.778585 15376.77 3.225474
## 6 72.99850 3.413609 2.170140 2.371105 2.731167 14800.22 3.205667

head(MCSI_data)

##    Y_train     GOVT      DUR      HOM      CAR   INCOME     EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789

#########################################################################
# 7. Perform ARIMAX modeling on IFT_LoadedPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_LoadedPhase_FT_MCSI_data_Y_train <- IFT_LoadedPhase_FT_MCSI_data$Y_train[1:(39*12)]

length(IFT_LoadedPhase_FT_MCSI_data_Y_train)/12

## [1] 39

# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12

# Training and Testing Data Covariates explaining the longitudinal outcome (Y): 
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_LoadedPhase_FT_MCSI_data_X_train <- 
  as.data.frame(cbind(IFT_LoadedPhase_FT_MCSI_data$GOVT, IFT_LoadedPhase_FT_MCSI_data$DUR, 
                      IFT_LoadedPhase_FT_MCSI_data$HOM, IFT_LoadedPhase_FT_MCSI_data$CAR, 
                      IFT_LoadedPhase_FT_MCSI_data$INCOME,
                      IFT_LoadedPhase_FT_MCSI_data$EDUC))[1:(39*12),]
dim(IFT_LoadedPhase_FT_MCSI_data_X_train)

## [1] 468   6

# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase <- 
  ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)

# Find ARIMAX model
modArima_train_IFT_LoadedPhase <- 
  auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase,
             xreg=as.matrix(IFT_LoadedPhase_FT_MCSI_data_X_train))
modArima_train_IFT_LoadedPhase

## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase 
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sma1  intercept        V1
##       1.4447  -0.4737  -0.6944  0.5781  -0.5307   140.8610  -11.4417
## s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.0166    1.1624
##             V2       V3       V4     V5      V6
##       -10.9797  -2.8305  -0.7844  0e+00  1.6079
## s.e.    1.0447   0.9301   1.0023  1e-04  1.8702
## 
## sigma^2 estimated as 7.31:  log likelihood=-1124.24
## AIC=2274.48   AICc=2275.28   BIC=2328.41

# Regression with ARIMA(2,0,1)(1,0,1)[12] errors 
# Coefficients:
#  ar1      ar2      ma1    sar1     sma1  intercept        V1        V2       V3       V4     V5 V6
# 1.4447  -0.4737  -0.6944  0.5781  -0.5307   140.8610  -11.4417  -10.9797  -2.8305  -0.7844  0e+00  1.6079
# s.e.  0.1915   0.1719   0.1667  0.4193   0.4340     8.0166    1.1624    1.0447   0.9301   1.0023  1e-04  1.8702
# sigma^2 estimated as 7.31:  log likelihood=-1124.24 AIC=2274.48   AICc=2275.28   BIC=2328.41

# pred_arimax_2_0_1_Loaded <- forecast(modArima_train_IFT_LoadedPhase, xreg = X_test)
#pred_arimax_2_0_1_Loaded_2017_2018 <- 
#  ts(pred_arimax_2_0_1_Loaded$fitted, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_2_0_1_Loaded_2017_2018 <- 
  predict(modArima_train_IFT_LoadedPhase, n.ahead = pred_length, newxreg = X_test)$pred

1.5 Figure 1.9

# 8. Final VIZ: Zoom in on a smaller interval
# windows(width=10, height=8)
plot(forecast(modArima_train, xreg = as.matrix(X_test)),                             # Spacetime ARIMA forecast
     include=48, lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(65,110),
     main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
      based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)          # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1)         # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1)        # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1)  # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1)        # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                        "ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
                        "MCSI-reported Official Index (2017-2018)",
                        "ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
                        "ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
       col=c("black", "blue", "red", "green", "purple", "brown", "orange"), 
       lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.5, x.intersp=1.5, y.intersp=0.4)
text(2015.4, 66, expression(atop(paste("Training Region (1978-2016)"), 
                paste(Model(ICS) %->% "ARIMAX(p, q, r) ;  ", 
                      XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)
text(2018, 66, expression(atop(paste("Validation Region (2017-2018)"), 
        paste(hat(ICS) %<-% "ARIMAX( . , . , . ) ;  ", 
              XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)

Interactive plot here

nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_0_1_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")

TSplot(48,modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="grey"))%>%
  add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
  add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
  add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
  add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))

## Warning in forecast.Arima(ARIMAmodel, xreg = XREG): xreg contains different
## column names from the xreg used in training. Please check that the
## regressors are in the same order.

## Warning in forecast.Arima(modArima_train, xreg = as.matrix(X_test)): xreg
## contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.

## png 
##   2

#### Full 41-year plot
plot(forecast(modArima_train, xreg = as.matrix(X_test)),  # include=48,              # Spacetime ARIMA forecast
     lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(55,125),
     main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
     based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)          # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1)         # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1)        # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1)  # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1)        # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)", 
                                    "ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
                                    "MCSI-reported Official Index (2017-2018)",
                                    "ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
                                    "ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
       col=c("black", "blue", "red", "green", "purple", "brown", "orange"), 
       lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.2, x.intersp=1.5, y.intersp=0.4)

Interactive plot here

nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_0_1_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")

TSplot("all",modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
  add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="grey"))%>%
  add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
  add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
  add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
  add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))

## Warning in forecast.Arima(ARIMAmodel, xreg = XREG): xreg contains different
## column names from the xreg used in training. Please check that the
## regressors are in the same order.

## Warning in forecast.Arima(modArima_train, xreg = as.matrix(X_test)): xreg
## contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.

## png 
##   2

2 Appendix: Functions Used

2.1 fftshift1D()

# 1D timeseries FFT SHIFT
# A special case of imlementaiton of `fftshift` for 1D arrays
#' This function is useful for visualizing the 1D Fourier transform with the zero-frequency 
#' component in the middle of the spectrum.
#' 
#' @param img_ff A Fourier transform of a 1D signal.
#' @return A properly shifted FT of the 1D array.
#' 
fftshift1D <- function(img_ff) {
  rows <- length(img_ff)   
  rows_half <- ceiling(rows/2)
  return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}

2.2 noiseGen()

# Noise-generating function to generate some synthetic predictors (X_new) 
# and test the ARIMA(2,0,1)(1,0,1)[12] model
#' This function is useful for generating noises for a given dataset 
#' 
#' @param data A data in the form of vector or of a matrix 
#' @return A updated version of data where noise is also included
#' 
noiseGen <- function(data) { 
  if (is.vector(data)) 
      return( data + rnorm(length(data), mean(data), sd(data))) 
  else 
      return(data + matrix(rnorm(dim(data)[1] * dim(data)[2], 0, 1), dim(data)[1]))
}

2.3 pi_wrapper()

# define a pi-wrapper calculator to ensure phase-arithmetic stays within [-pi : pi)
#' This function makes sure that phase-arithmetic is between [-pi, pi]
#' 
#' @param x A numeric value
#' @return A number that satifies one of the 4 cases specified below
#' 
pi_wrapper <- function (x) {  
  if(abs(x%%(2*pi)) <= pi) return(x%%(2*pi))
  else if (-2*pi <= x%%(2*pi) & x%%(2*pi) < -pi) return (2*pi + x%%(2*pi))
  else if (pi <= x%%(2*pi) & x%%(2*pi) < 2*pi) return (x%%(2*pi) - 2*pi)
  else return (0)
}

Figure 1.8, 1.9, 1.11 & 1.12

Back To Index

SOCR Team

June 2019

1 TCIU Kime ARIMAX on ED Econ Data

1.1 Data Source Type

1.2 Figure 1.5

1.3 Figure 1.6

1.4 Figure 1.8

1.5 Figure 1.9

2 Appendix: Functions Used

2.1 fftshift1D()

2.2 noiseGen()

2.3 pi_wrapper()