SOCR ≫ | TCIU ≫ | Topics ≫ |
library(dplyr)
library(ggplot2)
library(plot3D)
library(scatterplot3d)
library(plotly)
library(forecast)
library(ggplot2)
library(reshape2)
library(tidyr)
library(TSplotly)
load("Fig1.8_to_1.12.RData")
MCSI_Data <- read.csv(unzip("~/UM_Michigan_Consumer_Sentiment_Index_Data.csv.zip"))
dim(MCSI_Data)
## [1] 279242 107
# [1] 279242 107
# extract features
# ICS INDEX OF CONSUMER SENTIMENT
# ICC INDEX OF CURRENT ECONOMIC CONDITIONS
# GOVT GOVERNMENT ECONOMIC POLICY
# DUR DURABLES BUYING ATTITUDES
# HOM HOME BUYING ATTITUDES
# CAR VEHICLE BUYING ATTITUDES
# INCOME TOTAL HOUSEHOLD INCOME - CURRENT DOLLARS
# AGE AGE OF RESPONDENT
# EDUC EDUCATION OF RESPONDENT
DT::datatable(head(MCSI_Data,1000), fillContainer = TRUE)
MCSI_Data_short <-
MCSI_Data[ , which(names(MCSI_Data) %in%
c("YYYYMM", "ICS", "ICC", "GOVT", "DUR",
"HOM", "CAR", "INCOME", "AGE", "EDUC"))]
dim(MCSI_Data_short)
## [1] 279242 10
# [1] 279242 10
# Compute the monthly avarages across all participants
MCSI_Data_monthAvg <- aggregate(.~YYYYMM, data = MCSI_Data_short, mean)
dim(MCSI_Data_monthAvg)
## [1] 492 10
# [1] 492 10
# colnames(MCSI_Data_monthAvg)
# convert YYYMM to Date
MCSI_Data_monthAvg$YYYYMM <-
as.Date(as.character(
as.POSIXct(paste0(as.character(MCSI_Data_monthAvg$YYYYMM),"01"), format = "%Y%m%d")))
head(MCSI_Data_monthAvg$YYYYMM)
## [1] "1978-01-01" "1978-02-01" "1978-03-01" "1978-04-01" "1978-05-01"
## [6] "1978-06-01"
# View(MCSI_Data_monthAvg)
# plot all features on the same plot (across time/months)
MCSI_Data_monthAvg_melt <-
melt(MCSI_Data_monthAvg , id.vars = 'YYYYMM', variable.name = 'series')
# plot on same grid, each series colored differently --
# good if the series have same scale
#ggplot(MCSI_Data_monthAvg_melt, aes(YYYYMM, value), log10="y") +
# geom_line(aes(colour = series))
# Exclude INCOME as its too large!
ggplot(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ],
aes(YYYYMM, value)) +
geom_line(aes(linetype=series, colour = series), size=2) +
geom_point(aes(shape=series, colour = series), size=0.3) +
# geom_line(aes(colour = series)) +
geom_smooth(aes(colour = series), se = TRUE) +
coord_trans(y="log10") +
xlab("Time (monthly)") + ylab("Index Values (log-scale)") +
# scale_x_date(labels = date_label("%m-%Y")) +
scale_x_date(date_breaks = "12 month", date_labels = "%m-%Y") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
text = element_text(size=20))+
theme(legend.position="top")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Interactive plot here
updatemenus <- list(
list(
xanchor="left",
yanchor="top",
active = -1,
type= 'buttons',
buttons = list(
list(
label = "ALL",
method = "update",
args = list(list(visible = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE)),
list(title = "All Indexes"))),
list(
label = "ICS",
method = "update",
args = list(list(visible = c(FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
list(title = "ICS"))),
list(
label = "ICC",
method = "update",
args = list(list(visible = c(FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE)),
list(title = "ICC"))),
list(
label = "GOVT",
method = "update",
args = list(list(visible = c(FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE)),
list(title = "GOVT"))),
list(
label = "DUR",
method = "update",
args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE,FALSE)),
list(title = "DUR"))),
list(
label = "HOM",
method = "update",
args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE,FALSE)),
list(title = "HOM"))),
list(
label = "CAR",
method = "update",
args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,FALSE)),
list(title = "CAR"))),
list(
label = "AGE",
method = "update",
args = list(list(visible = c(FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE)),
list(title = "AGE"))),
list(
label = "EDUC",
method = "update",
args = list(list(visible = c(TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE)),
list(title = "EDUC")))
)
)
)
PYdf<-GtoP_trans(MCSI_Data_monthAvg_melt[MCSI_Data_monthAvg_melt$series!="INCOME", ],NAME="series",X="YYYYMM",Y="value")
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.5.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
PYdf$INCOME<-NULL
PYdf<-log10(PYdf)
plot_ly(type="scatter",mode="lines")%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICS,name="ICS",line=list(color="powderblue"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$ICC,name="ICC",line=list(color="red"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$GOVT,name="GOVT",line=list(color="green"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$DUR,name="DUR",line=list(color="orange"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$HOM,name="HOM",line=list(color="purple"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$CAR,name="CAR",line=list(color="pink"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$AGE,name="AGE",line=list(color="brown"))%>%
add_lines(x=as.yearmon(rownames(PYdf)),text=rownames(PYdf),y=PYdf$EDUC,name="EDUC",line=list(color="black"))%>%
layout(title= list(text="Time series for 8 indexes",font=list(family = "Times New Roman",size = 16,color = "black" )),
paper_bgcolor='rgb(255,255,255)', plot_bgcolor='rgb(229,229,229)',
xaxis = list(title ="Time (monthly)",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE),
yaxis = list(title = "Index Values (log10-scale)",
gridcolor = 'rgb(255,255,255)',
showgrid = TRUE,
showline = FALSE,
showticklabels = TRUE,
tickcolor = 'rgb(127,127,127)',
ticks = 'outside',
zeroline = FALSE),
updatemenus=updatemenus)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 8.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 984 rows containing missing values (geom_point).
## png
## 2
# Next, we want to take this example one step further and demonstrate the foundation of
# spacekime analytics using one oversimplified example that illustrates the tradeoffs
# between traditional spacetime inference and their spacekime analytical counterparts.
colnames(MCSI_Data_monthAvg)
## [1] "YYYYMM" "ICS" "ICC" "GOVT" "DUR" "HOM" "CAR"
## [8] "INCOME" "AGE" "EDUC"
MCSI_Data_monthAvg_ts <-
ts(MCSI_Data_monthAvg, start=c(1978,1), end=c(2018, 12), frequency = 12)
class(MCSI_Data_monthAvg_ts)
## [1] "mts" "ts" "matrix"
dim(MCSI_Data_monthAvg_ts)
## [1] 492 10
# [1] 492 10
# Time-series plots: 41 years * 12 months = 492 time points
plot.ts(MCSI_Data_monthAvg_ts)
plot(MCSI_Data_monthAvg_ts, plot.type="m")
# See MCSI docs: https://data.sca.isr.umich.edu/fetchdoc.php?docid=45121
# Outcome to Predict: ICS = INDEX OF CONSUMER SENTIMENT
Y <- MCSI_Data_monthAvg$ICS
# Covariates explaining the longitudinal outcome:
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X <- cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR,
MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR,
MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC)
# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y <- ts(Y, start=c(1978,1), end=c(2018, 12), frequency = 12)
############################################################################
# Fit ARIMAX model on entire dataset, validate on prospective data (2019-2020) using (noisy) simulated XReg
modArima <- auto.arima(MCSI_Data_monthAvg_ts_Y, xreg=X)
modArima
## Series: MCSI_Data_monthAvg_ts_Y
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1 intercept xreg1
## 1.4289 -0.4577 -0.6788 0.5851 -0.5346 151.4591 -11.2435
## s.e. 0.1885 0.1704 0.1649 0.3503 0.3637 8.4220 1.1118
## xreg2 xreg3 xreg4 xreg5 xreg6
## -10.7791 -2.7576 -0.9281 0e+00 1.5743
## s.e. 1.0010 0.9115 0.9593 1e-04 1.8224
##
## sigma^2 estimated as 8.266: log likelihood=-1212.44
## AIC=2450.88 AICc=2451.64 BIC=2505.46
# Regression with ARIMA(2,0,1)(1,0,1)[12] errors
# Coefficients:
# ar1 ar2 ma1 sar1 sma1 intercept xreg1 xreg2 xreg3 xreg4 xreg5
# 1.4289 -0.4577 -0.6788 0.5851 -0.5346 151.4591 -11.2435 -10.7791 -2.7576 -0.9281 0e+00
#s.e. 0.1885 0.1704 0.1649 0.3503 0.3637 8.4220 1.1118 1.0010 0.9115 0.9593 1e-04
# xreg6
# 1.5743
#s.e. 1.8224
# sigma^2 estimated as 8.266: log likelihood=-1212.44
# AIC=2450.88 AICc=2451.64 BIC=2505.46
# Predict prospective ICS = INDEX OF CONSUMER SENTIMENT over next 2 years
pred_length <- 2 * 12 # 2-years of monthly forward forecasting (2017-2018)
start_period <- 468 # 39 years (2017-1978)*12
# Noise-generating function to generate some synthetic predictors (X_new) and test the ARIMA(2,0,1)(1,0,1)[12] model
noiseGen <- function(data) {
if (is.vector(data))
return( data + rnorm(length(data), mean(data), sd(data)))
else
return(data + matrix(rnorm(dim(data)[1] * dim(data)[2], 0, 1), dim(data)[1]))
}
X_new <- noiseGen(X[(start_period+1):(start_period+24), ]) # just noise-corrupt the months 301:324 predictors
for (i in 1:6){
# just noise-corrupt the months 468:492 predictors
X_new[ ,i] <- noiseGen(X[(start_period+1):(start_period+24), i])
}
# Check X_new, relative to X
# head (X[(start_period+1):(start_period+24), ])
# head(X_new)
# dim(X)
# dim(X_new)
pred2years_ICS <- predict(modArima, n.ahead = pred_length, newxreg = X_new)
#plot(pred2years_ICS$pred, main="Forward time-series predictions (fitArimaX)")
# Plot data (4-yrs) and prediciton model, CI's (prospective 2-yrs)
plot(forecast(modArima, xreg = X_new), lwd=2, include=48, xlab="Time",
ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
main = "(2020-2021) Forecasting US ICS based on ARIMAX(2,0,1) Model
\n using prospective regression estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
# autoplot(forecast(modArima, xreg = X_new))
Interactive plot here
TSplot(48,modArima,X_new,TITLE = "(2020-2021) Forecasting US ICS based on ARIMAX(2,0,1) Model using prospective regression estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Xlab ="Time",Ylab = "ICS = INDEX OF CONSUMER SENTIMENT (US)",title_size = 8,ts_original = "Original time series",ts_forecast = "Predicted time series")
################################################################################################
# Fit model only on 39 years (1978-2016), and Evaluate on testing data (2017-2018)
Y_train <- MCSI_Data_monthAvg$ICS[1:(39*12)]
length(Y_train)/12
## [1] 39
Y_test <- MCSI_Data_monthAvg$ICS[((39*12)+1):length(MCSI_Data_monthAvg$ICS)]
length(Y_test)/12
## [1] 2
# Training and Testing Data Covariates explaining the longitudinal outcome (Y):
# GOVT, DUR, HOM, CAR, INCOME, EDUC
X_train <-
as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR,
MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR,
MCSI_Data_monthAvg$INCOME, MCSI_Data_monthAvg$EDUC))[1:(39*12), ]
dim(X_train)
## [1] 468 6
X_test <-
as.data.frame(cbind(MCSI_Data_monthAvg$GOVT, MCSI_Data_monthAvg$DUR,
MCSI_Data_monthAvg$HOM, MCSI_Data_monthAvg$CAR,
MCSI_Data_monthAvg$INCOME,
MCSI_Data_monthAvg$EDUC))[((39*12)+1):length(MCSI_Data_monthAvg$ICS), ]
dim(X_test)
## [1] 24 6
# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train <- ts(Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
MCSI_Data_monthAvg_ts_Y_test <- ts(Y_test, start=c(2017,1), end=c(2018, 12), frequency = 12)
# Find ARIMAX model
modArima_train <- auto.arima(MCSI_Data_monthAvg_ts_Y_train, xreg=as.matrix(X_train))
modArima_train
## Series: MCSI_Data_monthAvg_ts_Y_train
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1 intercept V1
## 1.4447 -0.4737 -0.6944 0.5781 -0.5307 152.4668 -11.4417
## s.e. 0.1915 0.1719 0.1667 0.4193 0.4340 8.6771 1.1624
## V2 V3 V4 V5 V6
## -10.9797 -2.8305 -0.7844 0e+00 1.6079
## s.e. 1.0447 0.9301 1.0023 1e-04 1.8702
##
## sigma^2 estimated as 8.564: log likelihood=-1161.29
## AIC=2348.59 AICc=2349.39 BIC=2402.52
# Regression with ARIMA(2,0,1)(1,0,1)[12] errors
# Coefficients:
# ar1 ar2 ma1 sar1 sma1 intercept V1 V2 V3 V4 V5
# 1.4447 -0.4737 -0.6944 0.5781 -0.5307 152.4668 -11.4417 -10.9797 -2.8305 -0.7844 0e+00
#s.e. 0.1915 0.1719 0.1667 0.4193 0.4340 8.6771 1.1624 1.0447 0.9301 1.0023 1e-04
# V6
# 1.6079
#s.e. 1.8702
# sigma^2 estimated as 8.564: log likelihood=-1161.29
# AIC=2348.59 AICc=2349.39 BIC=2402.52
pred2years_ICS <- predict(modArima_train, n.ahead = pred_length, frequency=12, newxreg = X_test)
#plot(pred2years_ICS$pred, main="Forward time-series predictions (fitArimaX)")
# pred_arimax_2_0_1 <- forecast(modArima_train, xreg = X_test)
# pred_arimax_2_0_1_2017_2018 <-
# ts(pred_arimax_2_0_1$fitted, frequency=12, start=c(2017,1), end=c(2018,12))
# Plot ICS training Data, Testing ICS and Predicted ICS
plot(forecast(modArima_train, xreg = as.matrix(X_test) ), lwd=2, xlab="Time",
ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
main = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
\n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)
legend(1990, 60, bty="n", legend=c("Training ICS Data (1978-2016)",
"ARIMAX-model ICS Forecasting (2017-2018)",
"MCSI Official ICS (2017-2018)"),
col=c("black", "blue", "red"),
lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=0.5, y.intersp=0.7)
Interactive Plot here
newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")
TSplot("all",modArima_train,as.matrix(X_test),TITLE = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (1978-2016)",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)")%>%
add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="grey"))
## png
## 2
# Zoom in on a smaller interval (4-years prior and 2-years forecasting)
plot(forecast(modArima_train, xreg = as.matrix(X_test) ), include=48, lwd=2, xlab="Time",
ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)",
main = "ZOOM: (2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data
\n using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1)
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)",
"ARIMAX-model ICS Forecasting (2017-2018)",
"MCSI Official ICS (2017-2018)"),
col=c("black", "blue", "red"),
lty=c(1,1,1), lwd=c(2,2,4), cex=1.2, x.intersp=1.5, y.intersp=0.7)
text(2014, 91, "Training Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6", cex=1.5)
text(2018, 79, "Validation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6", cex=1.5)
Interactive plot here
newline<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI Official ICS (2017-2018)")
TSplot(48,modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
add_lines(x=newline$X,text=newline$TEXT,y=newline$Y,name=newline$NAME,line=list(color="grey"))
##############################################################################################
# FT/Spacekime Analytics
# 1D timeseries FFT SHIFT
fftshift1D <- function(img_ff) {
rows <- length(img_ff)
rows_half <- ceiling(rows/2)
return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}
# 1. Transform all 6 predictors (X) and outcome (Y) series to k-space (Fourier domain)
# Transform entire Xreg matrix (12*39 = 468 PLUS 12*2 = 24, a total of 492 time points: Training + Testing)
# But transform only the Training Y outcome (12*39 = 468 time points: Training)
MCSI_data <- cbind(Y_train, X_train)
dim(MCSI_data)
## [1] 468 7
colnames(MCSI_data) <- c("Y_train", "GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
#head(MCSI_data)
colnames(X_test) <- c("GOVT", "DUR", "HOM", "CAR", "INCOME", "EDUC")
head(X_test)
## GOVT DUR HOM CAR INCOME EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389 99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
# MCSI_data_numeric <- as.matrix(MCSI_data) # remove column names for FT processing
FT_MCSI_data <- array(complex(), c(468, 7))
mag_FT_MCSI_data <- array(complex(), c(468, 7))
phase_FT_MCSI_data <- array(complex(), c(468, 7))
IFT_NilPhase_FT_MCSI_data <- cbind(Y_train, X_train) # array( , c(468, 7))
IFT_ScramblePhase_FT_MCSI_data <- cbind(Y_train, X_train) # array( , c(468, 7))
IFT_LoadedPhase_FT_MCSI_data <- cbind(Y_train, X_train) # array( , c(468, 7))
for (i in 1:7) {
FT_MCSI_data[ , i] <- fft(MCSI_data[ , i])
X2 <- FT_MCSI_data[ , i]
# plot(fftshift1D(log(Re(X2)+2)), main = "log(fftshift1D(Re(FFT(timeseries))))")
mag_FT_MCSI_data[ , i] <- sqrt(Re(X2)^2+Im(X2)^2);
# plot(log(fftshift1D(Re(mag_FT_MCSI_data))), main = "log(Magnitude(FFT(timeseries)))")
phase_FT_MCSI_data[ , i] <- atan2(Im(X2), Re(X2));
# plot(Re(fftshift1D(phase_FT_MCSI_data[ , 1])), main = "Shift(Phase(FFT(timeseries)))")
}
######### Test the FT-IFT analysis-synthesis back-and-forth transofrm process to confirm calculations
# X2 <- FT_MCSI_data[ , 1]; mag_FT_MCSI_data[ , 1] <- sqrt(Re(X2)^2+Im(X2)^2);
# phase_FT_MCSI_data[ , 1] <- atan2(Im(X2), Re(X2));
# Real2 = mag_FT_MCSI_data[ , 1] * cos(phase_FT_MCSI_data[ , 1])
# Imaginary2 = mag_FT_MCSI_data[ , 1] * sin(phase_FT_MCSI_data[ , 1])
# man_hat_X2 = Re(fft(Real2 + 1i*Imaginary2, inverse = T)/length(X2))
# ifelse(abs(man_hat_X2[5] - MCSI_data[5, 1]) < 0.001, "Perfect Syntesis", "Problems!!!")
#########
# Examine the Kime-direction Distributions of the Phases for all 7 features (predictors + outcome)
df.phase_FT_MCSI_data <- as.data.frame(Re(phase_FT_MCSI_data))
df.phase_FT_MCSI_data %>%
gather() %>%
head()
## key value
## 1 V1 0.000000
## 2 V1 -2.845538
## 3 V1 -1.074318
## 4 V1 1.744811
## 5 V1 0.832915
## 6 V1 1.627984
colnames(df.phase_FT_MCSI_data) <- colnames(MCSI_data)
colnames(df.phase_FT_MCSI_data)[1] <- "ICS"
phaseDistributions <- gather(df.phase_FT_MCSI_data)
colnames(phaseDistributions) <- c("Feature", "Phase")
# map the value as our x variable, and use facet_wrap to separate by the key column:
ggplot(phaseDistributions, aes(Phase)) +
# geom_histogram(bins = 10) +
geom_histogram(aes(y=..density..), bins = 10) +
facet_wrap( ~Feature, scales = 'free_x') +
xlim(-pi, pi) +
theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))
## png
## 2
#### Perform the Space-kime transform separately for the X_test data
dim(X_test) # [1] 24 6
## [1] 24 6
colnames(X_test)
## [1] "GOVT" "DUR" "HOM" "CAR" "INCOME" "EDUC"
head(X_test)
## GOVT DUR HOM CAR INCOME EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389 99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
FT_X_test <- array(complex(), c(24, 6))
mag_FT_X_test <- array(complex(), c(24, 6))
phase_FT_X_test <- array(complex(), c(24, 6))
IFT_NilPhase_FT_X_test <- X_test # array( , c(24, 6))
IFT_ScramblePhase_FT_X_test <- X_test # array( , c(24, 6)
IFT_LoadedPhase_FT_X_test <- X_test # array( , c(24, 6)
for (i in 1:6) {
FT_X_test[ , i] <- fft(X_test[ , i])
X2 <- FT_X_test[ , i]
mag_FT_X_test[ , i] <- sqrt(Re(X2)^2+Im(X2)^2);
phase_FT_X_test[ , i] <- atan2(Im(X2), Re(X2));
}
##########################################################################################
# 2. Nil-Phase reconstruction
# Invert back to spacetime the mag_FT_MCSI_data[ , i] & mag_FT_X_test [, i] signals with nil-phase
for (i in 1:7) {
Real <- mag_FT_MCSI_data[ , i] * cos(0)
Imaginary <- mag_FT_MCSI_data[ , i] * sin(0)
IFT_NilPhase_FT_MCSI_data[ ,i] <-
Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
# display(ift_NilPhase_X2mag, method = "raster")
# dim(IFT_NilPhase_FT_MCSI_data); View(IFT_NilPhase_FT_MCSI_data); # compare to View(MCSI_data[ , ])
}
colnames(IFT_NilPhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_NilPhase_FT_MCSI_data)
## [1] 468 7
dim(FT_MCSI_data)
## [1] 468 7
colnames(IFT_NilPhase_FT_MCSI_data)
## [1] "Y_train" "GOVT" "DUR" "HOM" "CAR" "INCOME" "EDUC"
head(IFT_NilPhase_FT_MCSI_data)
## Y_train GOVT DUR HOM CAR INCOME EDUC
## 1 207.9947 7.096082 6.361494 7.448879 6.702678 225647.6 6.456717
## 2 158.7957 5.514798 4.119853 5.316651 4.422884 160111.0 5.162829
## 3 141.7572 5.090193 3.742753 4.849988 4.050338 143263.9 5.044055
## 4 133.9254 4.841114 3.482007 4.525654 3.756915 130886.9 4.913845
## 5 127.0829 4.662767 3.358568 4.331847 3.639068 125015.0 4.861882
## 6 122.8747 4.585099 3.284439 4.192362 3.508368 121664.0 4.790130
head(MCSI_data)
## Y_train GOVT DUR HOM CAR INCOME EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789
for (i in 1:6) {
Real <- mag_FT_X_test[ , i] * cos(0)
Imaginary <- mag_FT_X_test[ , i] * sin(0)
IFT_NilPhase_FT_X_test[ ,i] <-
Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
head(IFT_NilPhase_FT_X_test)
## GOVT DUR HOM CAR INCOME EDUC
## 469 3.751431 2.303827 2.806630 3.152860 119636.8 4.666088
## 470 3.368501 1.872374 2.306036 2.500152 108714.2 4.502943
## 471 3.310008 1.858239 2.217383 2.506473 104066.2 4.498433
## 472 3.286447 1.911739 2.221120 2.385652 105367.7 4.532361
## 473 3.171287 1.801815 2.191204 2.324749 102780.5 4.510567
## 474 3.178573 1.829411 2.063818 2.243006 106010.0 4.490027
head(X_test)
## GOVT DUR HOM CAR INCOME EDUC
## 469 3.147368 1.843860 1.814035 2.150877 100026.35 4.514035
## 470 3.443463 1.906360 2.008834 2.259717 101614.96 4.514134
## 471 3.291740 1.970123 2.082601 2.168717 102438.36 4.434095
## 472 3.347594 1.764706 1.957219 2.062389 99627.75 4.525847
## 473 3.249129 1.864111 2.094077 2.466899 102524.11 4.505226
## 474 3.448763 1.998233 2.151943 2.388693 109156.26 4.505300
# 3. Perform ARIMAX modeling on IFT_NilPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
# library(forecast)
IFT_NilPhase_FT_MCSI_data_Y_train <- IFT_NilPhase_FT_MCSI_data$Y_train[1:(39*12)]
length(Y_train)/12
## [1] 39
# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12
# Training and Testing Data Covariates explaining the longitudinal outcome (Y):
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_NilPhase_FT_MCSI_data_X_train <-
as.data.frame(cbind(IFT_NilPhase_FT_MCSI_data$GOVT, IFT_NilPhase_FT_MCSI_data$DUR,
IFT_NilPhase_FT_MCSI_data$HOM, IFT_NilPhase_FT_MCSI_data$CAR,
IFT_NilPhase_FT_MCSI_data$INCOME,
IFT_NilPhase_FT_MCSI_data$EDUC))[1:(39*12), ]
dim(X_train)
## [1] 468 6
colnames(IFT_NilPhase_FT_MCSI_data_X_train) <- colnames(IFT_NilPhase_FT_X_test)
# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase <-
ts(IFT_NilPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# Find ARIMAX model
modArima_train_IFT_NilPhase <-
auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase,
xreg=as.matrix(IFT_NilPhase_FT_MCSI_data_X_train))
modArima_train_IFT_NilPhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_NilPhase
## Regression with ARIMA(1,0,0)(1,0,0)[12] errors
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 sar1 intercept GOVT DUR HOM CAR INCOME
## 0.8194 0.1554 26.3831 10.6846 8.6527 2.3138 2.2177 2e-04
## s.e. 0.0316 0.0480 5.9317 1.3501 1.1259 1.1826 1.0817 NaN
## EDUC
## -3.5998
## s.e. 1.8950
##
## sigma^2 estimated as 3.401: log likelihood=-946.65
## AIC=1913.31 AICc=1913.79 BIC=1954.79
# Regression with ARIMA(1,0,0)(1,0,0)[12] errors
# Coefficients:
# ar1 sar1 intercept V1 V2 V3 V4 V5 V6
# 0.8194 0.1554 26.3831 10.6846 8.6527 2.3138 2.2177 2e-04 -3.5998
# s.e. 0.0316 0.0480 5.9317 1.3501 1.1259 1.1826 1.0817 NaN 1.8950
# sigma^2 estimated as 3.401: log likelihood=-946.65, AIC=1913.31 AICc=1913.79 BIC=1954.79
pred_arimax_1_0_1 <- forecast(modArima_train_IFT_NilPhase, xreg = as.matrix(IFT_NilPhase_FT_X_test))
pred_arimax_1_0_1_2017_2018 <-
ts(pred_arimax_1_0_1$mean, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_1_0_1_2017_2018
## Jan Feb Mar Apr May Jun Jul
## 2017 109.49273 96.86326 94.40218 94.00355 90.78807 90.22039 89.67892
## 2018 86.67948 86.83445 87.33288 87.35519 85.83416 87.25408 88.46184
## Aug Sep Oct Nov Dec
## 2017 87.80911 86.21182 87.76334 87.41553 87.34237
## 2018 88.17067 87.68511 90.43932 90.34170 92.14105
# alternatively:
# pred_arimax_1_0_1_2017_2018 <- predict(modArima_train_IFT_NilPhase,
# n.ahead = pred_length, newxreg = X_test)$pred
####################################################################
# 4. Scrambled-Phase reconstruction
# Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using scrambled-phases
# randomly shaffle the rows of the Phases-matrix (Training & Testing XReg Data)
shuffle_phase_FT_MCSI_data <- phase_FT_MCSI_data
for (i in 1:7) {
set.seed(12345) # sample randomly Phases for each of the 7 covariates (X & Y)
shuffle_phase_FT_MCSI_data[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_MCSI_data)), i]
Real <- mag_FT_MCSI_data[ , i] * cos(Re(shuffle_phase_FT_MCSI_data[ , i]))
Imaginary <- mag_FT_MCSI_data[ , i] * sin(Re(shuffle_phase_FT_MCSI_data[ , i]))
IFT_ScramblePhase_FT_MCSI_data[ ,i] <-
Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_ScramblePhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_ScramblePhase_FT_MCSI_data)
## [1] 468 7
dim(FT_MCSI_data)
## [1] 468 7
colnames(IFT_ScramblePhase_FT_MCSI_data)
## [1] "Y_train" "GOVT" "DUR" "HOM" "CAR" "INCOME" "EDUC"
head(IFT_ScramblePhase_FT_MCSI_data)
## Y_train GOVT DUR HOM CAR INCOME EDUC
## 1 77.44748 -2.637215 -2.480036 -2.284504 0.33511452 -114200.58 -3.387427
## 2 85.39286 -2.513407 -2.625748 -2.451977 0.03138933 -83208.34 -2.977489
## 3 84.07092 -2.506406 -2.529063 -2.526502 -0.06944438 -72815.95 -2.958866
## 4 86.95059 -2.613794 -2.630362 -2.463728 -0.32197237 -68345.87 -2.688935
## 5 92.90797 -2.729300 -2.654001 -2.681969 -0.66209018 -66130.08 -2.817957
## 6 91.22433 -2.657121 -2.554884 -2.437078 -0.50665855 -64333.23 -2.784244
# Perform similar scrambling of the phases separately for X_test (24 * 6) data
shuffle_phase_FT_X_test <- phase_FT_X_test
for (i in 1:6) {
set.seed(12345) # sample randomly Phases for each of the 6 predictors covariates (X)
shuffle_phase_FT_X_test[ , i] <- phase_FT_MCSI_data[sample(nrow(phase_FT_X_test)), i]
Real <- mag_FT_X_test[ , i] * cos(Re(shuffle_phase_FT_X_test[ , i]))
Imaginary <- mag_FT_X_test[ , i] * sin(Re(shuffle_phase_FT_X_test[ , i]))
IFT_ScramblePhase_FT_X_test[ ,i] <-
Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_X_test[ , i]))
}
dim(IFT_ScramblePhase_FT_X_test)
## [1] 24 6
head(IFT_ScramblePhase_FT_X_test)
## GOVT DUR HOM CAR INCOME EDUC
## 469 -2.863100 0.6084955 1.850718 0.5141494 103116.73 -0.5176098
## 470 -2.921598 0.6317009 1.830746 0.3558439 105675.98 -0.5179213
## 471 -3.037529 0.6453448 1.974847 0.5262837 110766.41 -0.5206675
## 472 -2.874540 0.4232556 1.717884 0.3813262 101077.52 -0.5157843
## 473 -3.071031 0.6279396 1.798611 0.2153813 98585.91 -0.5217236
## 474 -3.219981 0.6048978 1.958505 0.2836233 104378.21 -0.5034275
#######################################################
# 5. Perform ARIMAX modeling on IFT_ScramblePhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_ScramblePhase_FT_MCSI_data_Y_train <-
IFT_ScramblePhase_FT_MCSI_data$Y_train[1:(39*12)]
length(Y_train)/12
## [1] 39
# Training and Testing Data Covariates explaining the longitudinal outcome (Y):
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_ScramblePhase_FT_MCSI_data_X_train <-
as.data.frame(cbind(IFT_ScramblePhase_FT_MCSI_data$GOVT, IFT_ScramblePhase_FT_MCSI_data$DUR,
IFT_ScramblePhase_FT_MCSI_data$HOM, IFT_ScramblePhase_FT_MCSI_data$CAR,
IFT_ScramblePhase_FT_MCSI_data$INCOME,
IFT_ScramblePhase_FT_MCSI_data$EDUC))[1:(39*12), ]
dim(IFT_ScramblePhase_FT_MCSI_data_X_train)
## [1] 468 6
# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase <-
ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_ScramblePhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)
# Find ARIMAX model
modArima_train_IFT_ScramblePhase <-
auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase,
xreg=as.matrix(IFT_ScramblePhase_FT_MCSI_data_X_train))
modArima_train_IFT_ScramblePhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_ScramblePhase
## Regression with ARIMA(1,0,2)(2,0,0)[12] errors
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ma1 ma2 sar1 sar2 intercept V1
## 0.9570 -0.0508 -0.1066 -0.0249 0.0320 62.736 -7.4009
## s.e. 0.0151 0.0511 0.0516 0.0483 0.0497 NaN 1.3351
## V2 V3 V4 V5 V6
## -6.2064 -2.1374 -3.2478 0 4.5433
## s.e. 1.0909 1.1224 1.1051 NaN 1.9499
##
## sigma^2 estimated as 6.45: log likelihood=-1095.3
## AIC=2216.6 AICc=2217.4 BIC=2270.53
# Regression with ARIMA(1,0,2)(2,0,0)[12] errors
# Coefficients:
# ar1 ma1 ma2 sar1 sar2 intercept V1 V2 V3 V4 V5 V6
#0.9570 -0.0508 -0.1066 -0.0249 0.0320 62.736 -7.4009 -6.2064 -2.1374 -3.2478 0 4.5433
#s.e. 0.0151 0.0511 0.0516 0.0483 0.0497 NaN 1.3351 1.0909 1.1224 1.1051 NaN 1.9499
# sigma^2 estimated as 6.45: log likelihood=-1095.3 AIC=2216.6 AICc=2217.4 BIC=2270.53
pred_arimax_1_1_3 <-
forecast(modArima_train_IFT_ScramblePhase,
xreg =as.matrix(IFT_ScramblePhase_FT_X_test))
## Warning in forecast.Arima(modArima_train_IFT_ScramblePhase, xreg =
## as.matrix(IFT_ScramblePhase_FT_X_test)): xreg contains different column
## names from the xreg used in training. Please check that the regressors are
## in the same order.
pred_arimax_1_1_3_2017_2018 <-
ts(pred_arimax_1_1_3$mean, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_1_1_3_2017_2018
## Jan Feb Mar Apr May Jun Jul
## 2017 68.95487 70.20348 70.58534 71.88092 72.60548 73.85421 73.43008
## 2018 74.13513 75.01577 74.08203 75.26787 73.70892 73.61762 74.36375
## Aug Sep Oct Nov Dec
## 2017 73.80472 73.40466 72.42727 73.05006 72.67080
## 2018 73.02887 74.04394 73.84603 73.17739 74.30894
#pred_arimax_1_1_3_2017_2018_pred <- predict(modArima_train_IFT_ScramblePhase, n.ahead = pred_length, newxreg = IFT_ScramblePhase_FT_X_test)$pred
######################################################################
# 6. Loaded-Phase reconstruction: Phase' = \pi/8 + Phase
# Invert back to spacetime the mag_FT_MCSI_data[ , i] signal using Loaded-phases
# define a pi-wrapper calculator to ensure phase-arithmetic stays within [-pi : pi)
pi_wrapper <- function (x) {
if(abs(x%%(2*pi)) <= pi) return(x%%(2*pi))
else if (-2*pi <= x%%(2*pi) & x%%(2*pi) < -pi) return (2*pi + x%%(2*pi))
else if (pi <= x%%(2*pi) & x%%(2*pi) < 2*pi) return (x%%(2*pi) - 2*pi)
else return (0)
}
loaded_phase_FT_MCSI_data <- Re(phase_FT_MCSI_data)
for (i in 1:dim(phase_FT_MCSI_data)[1]) {
for (j in 1:dim(phase_FT_MCSI_data)[2])
loaded_phase_FT_MCSI_data[i,j] <- pi_wrapper(pi/8 + Re(phase_FT_MCSI_data[i,j]))
}
# head(loaded_phase_FT_MCSI_data)
# Compare Loaded Phases to Native Phases
df.loadedPhase_FT_MCSI_data <- as.data.frame(loaded_phase_FT_MCSI_data)
df.loadedPhase_FT_MCSI_data %>% gather() %>% head()
## key value
## 1 V1 0.3926991
## 2 V1 -2.4528385
## 3 V1 -0.6816193
## 4 V1 2.1375102
## 5 V1 1.2256141
## 6 V1 2.0206829
colnames(df.loadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
colnames(df.loadedPhase_FT_MCSI_data)[1] <- "ICS"
loadedPhaseDistributions <- gather(df.loadedPhase_FT_MCSI_data)
colnames(loadedPhaseDistributions) <- c("Feature", "Phase")
# map the value as our x variable, and use facet_wrap to separate by the key column:
ggplot(loadedPhaseDistributions, aes(Phase)) +
geom_histogram(aes(y=..density..), bins = 10) +
facet_wrap( ~Feature, scales = 'free_x') +
xlim(-pi, pi) +
theme(strip.text.x = element_text(size = 16, colour = "black", angle = 0))
for (i in 1:7) {
Real <- mag_FT_MCSI_data[ , i] * cos(loaded_phase_FT_MCSI_data[ , i])
Imaginary <- mag_FT_MCSI_data[ , i] * sin(loaded_phase_FT_MCSI_data[ , i])
IFT_LoadedPhase_FT_MCSI_data[ ,i] <-
Re(fft(Real+1i*Imaginary, inverse = T)/length(FT_MCSI_data[ , i]))
}
colnames(IFT_LoadedPhase_FT_MCSI_data) <- colnames(MCSI_data)
dim(IFT_LoadedPhase_FT_MCSI_data)
## [1] 468 7
dim(FT_MCSI_data)
## [1] 468 7
head(IFT_LoadedPhase_FT_MCSI_data)
## Y_train GOVT DUR HOM CAR INCOME EDUC
## 1 81.01240 3.037219 2.083673 2.381744 2.859224 14359.69 3.150231
## 2 83.42135 3.112394 2.443681 2.474077 2.961214 15044.04 3.202782
## 3 76.49093 3.313351 2.239820 2.341237 2.737009 15416.54 3.219355
## 4 78.50332 3.349560 2.217576 2.358080 2.761035 15051.15 3.222312
## 5 79.10811 3.257119 2.210519 2.416597 2.778585 15376.77 3.225474
## 6 72.99850 3.413609 2.170140 2.371105 2.731167 14800.22 3.205667
head(MCSI_data)
## Y_train GOVT DUR HOM CAR INCOME EDUC
## 1 87.68719 3.287462 2.255352 2.577982 3.094801 15542.81 3.409786
## 2 90.29462 3.368831 2.645022 2.677922 3.205195 16283.55 3.466667
## 3 82.79319 3.586345 2.424364 2.534137 2.962517 16686.75 3.484605
## 4 84.97138 3.625538 2.400287 2.552367 2.988522 16291.25 3.487805
## 5 85.62600 3.525480 2.392648 2.615706 3.007519 16643.69 3.491228
## 6 79.01301 3.694864 2.348943 2.566465 2.956193 16019.64 3.469789
#########################################################################
# 7. Perform ARIMAX modeling on IFT_LoadedPhase_FT_MCSI_data; report (p,d,q) params and quality metrics AIC/BIC
IFT_LoadedPhase_FT_MCSI_data_Y_train <- IFT_LoadedPhase_FT_MCSI_data$Y_train[1:(39*12)]
length(IFT_LoadedPhase_FT_MCSI_data_Y_train)/12
## [1] 39
# Y_test <- IFT_NilPhase_FT_MCSI_data$Y_train[(39*12+1):length(IFT_NilPhase_FT_MCSI_data$Y_train)]; length(Y_test)/12
# Training and Testing Data Covariates explaining the longitudinal outcome (Y):
# GOVT, DUR, HOM, CAR, INCOME, EDUC
IFT_LoadedPhase_FT_MCSI_data_X_train <-
as.data.frame(cbind(IFT_LoadedPhase_FT_MCSI_data$GOVT, IFT_LoadedPhase_FT_MCSI_data$DUR,
IFT_LoadedPhase_FT_MCSI_data$HOM, IFT_LoadedPhase_FT_MCSI_data$CAR,
IFT_LoadedPhase_FT_MCSI_data$INCOME,
IFT_LoadedPhase_FT_MCSI_data$EDUC))[1:(39*12),]
dim(IFT_LoadedPhase_FT_MCSI_data_X_train)
## [1] 468 6
# Outcome Variable to be modelled, as a timeseries
MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase <-
ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(1978,1), end=c(2016, 12), frequency = 12)
# unchanged: MCSI_Data_monthAvg_ts_Y_test <- ts(IFT_LoadedPhase_FT_MCSI_data_Y_train, start=c(2017,1), end=c(2018, 12), frequency = 12)
# Find ARIMAX model
modArima_train_IFT_LoadedPhase <-
auto.arima(MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase,
xreg=as.matrix(IFT_LoadedPhase_FT_MCSI_data_X_train))
modArima_train_IFT_LoadedPhase
## Series: MCSI_Data_monthAvg_ts_Y_train_IFT_LoadedPhase
## Regression with ARIMA(2,0,1)(1,0,1)[12] errors
##
## Coefficients:
## ar1 ar2 ma1 sar1 sma1 intercept V1
## 1.4447 -0.4737 -0.6944 0.5781 -0.5307 140.8610 -11.4417
## s.e. 0.1915 0.1719 0.1667 0.4193 0.4340 8.0166 1.1624
## V2 V3 V4 V5 V6
## -10.9797 -2.8305 -0.7844 0e+00 1.6079
## s.e. 1.0447 0.9301 1.0023 1e-04 1.8702
##
## sigma^2 estimated as 7.31: log likelihood=-1124.24
## AIC=2274.48 AICc=2275.28 BIC=2328.41
# Regression with ARIMA(2,0,1)(1,0,1)[12] errors
# Coefficients:
# ar1 ar2 ma1 sar1 sma1 intercept V1 V2 V3 V4 V5 V6
# 1.4447 -0.4737 -0.6944 0.5781 -0.5307 140.8610 -11.4417 -10.9797 -2.8305 -0.7844 0e+00 1.6079
# s.e. 0.1915 0.1719 0.1667 0.4193 0.4340 8.0166 1.1624 1.0447 0.9301 1.0023 1e-04 1.8702
# sigma^2 estimated as 7.31: log likelihood=-1124.24 AIC=2274.48 AICc=2275.28 BIC=2328.41
# pred_arimax_2_0_1_Loaded <- forecast(modArima_train_IFT_LoadedPhase, xreg = X_test)
#pred_arimax_2_0_1_Loaded_2017_2018 <-
# ts(pred_arimax_2_0_1_Loaded$fitted, frequency=12, start=c(2017,1), end=c(2018,12))
pred_arimax_2_0_1_Loaded_2017_2018 <-
predict(modArima_train_IFT_LoadedPhase, n.ahead = pred_length, newxreg = X_test)$pred
# 8. Final VIZ: Zoom in on a smaller interval
# windows(width=10, height=8)
plot(forecast(modArima_train, xreg = as.matrix(X_test)), # Spacetime ARIMA forecast
include=48, lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(65,110),
main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1) # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1) # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1) # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1) # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1) # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)",
"ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
"MCSI-reported Official Index (2017-2018)",
"ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
col=c("black", "blue", "red", "green", "purple", "brown", "orange"),
lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.5, x.intersp=1.5, y.intersp=0.4)
text(2015.4, 66, expression(atop(paste("Training Region (1978-2016)"),
paste(Model(ICS) %->% "ARIMAX(p, q, r) ; ",
XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)
text(2018, 66, expression(atop(paste("Validation Region (2017-2018)"),
paste(hat(ICS) %<-% "ARIMAX( . , . , . ) ; ",
XReg %==% X[i], " ", i %in% {1 : 6}))), cex=1.5)
Interactive plot here
nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_0_1_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
TSplot(48,modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="grey"))%>%
add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))
## Warning in forecast.Arima(ARIMAmodel, xreg = XREG): xreg contains different
## column names from the xreg used in training. Please check that the
## regressors are in the same order.
## Warning in forecast.Arima(modArima_train, xreg = as.matrix(X_test)): xreg
## contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
## png
## 2
#### Full 41-year plot
plot(forecast(modArima_train, xreg = as.matrix(X_test)), # include=48, # Spacetime ARIMA forecast
lwd=4, xlab="Time", ylab="ICS = INDEX OF CONSUMER SENTIMENT (US)", ylim=c(55,125),
main = "Spacetime vs. Spacekime Analytics: (2017-2018) US ICS Forecasting\n
based on fitting ARIMAX Models on raw & kime-transformed 1978-2016 data")
lines(MCSI_Data_monthAvg_ts_Y_test, col = "red", lwd = 4, lty=1) # Observed Y_Test timeseries
lines(pred_arimax_1_0_1_2017_2018, col = "green", lwd = 4, lty=1) # Nil Phase reconstr
lines(pred_arimax_1_1_3_2017_2018, col = "purple", lwd = 4, lty=1) # Scramble Phase recon
lines(pred_arimax_2_0_1_Loaded_2017_2018, col = "brown", lwd = 4, lty=1) # Loaded Phase recon
lines(26+pred_arimax_1_1_3_2017_2018, col = "orange", lwd = 4, lty=1) # Offset Scramble Phase recon
legend("topleft", bty="n", legend=c("Training ICS Data (2013-2016)",
"ARIMAX(2,0,1)-model ICS Forecasting (2017-2018)",
"MCSI-reported Official Index (2017-2018)",
"ARIMAX(1,0,0) Nil-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(1,0,2) Scrambled-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(2,0,1) Loaded-Phase recon model ICS Forecasting (2017-2018)",
"ARIMAX(1,0,2) Offset Scrambled-Phase recon model ICS Forecasting (2017-2018)"),
col=c("black", "blue", "red", "green", "purple", "brown", "orange"),
lty=c(1,1,1,1,1,1,1), lwd=c(4,4,4,4,4,4,4), cex=1.2, x.intersp=1.5, y.intersp=0.4)
Interactive plot here
nl1<-ADDline(TS = MCSI_Data_monthAvg_ts_Y_test,linetype = "TS",Name = "MCSI-reported Official Index (2017-2018)")
nl2<-ADDline(TS = pred_arimax_1_0_1_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,0) Nil-Phase\nrecon model ICS Forecasting (2017-2018)")
nl3<-ADDline(TS = pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
nl4<-ADDline(TS = pred_arimax_2_0_1_Loaded_2017_2018,linetype = "TS",Name = "ARIMAX(2,0,1) Loaded-Phase\nrecon model ICS Forecasting (2017-2018)")
nl5<-ADDline(TS = 26+pred_arimax_1_1_3_2017_2018,linetype = "TS",Name = "ARIMAX(1,0,2) Offset Scrambled-Phase\nrecon model ICS Forecasting (2017-2018)")
TSplot("all",modArima_train,as.matrix(X_test),TITLE = "(2017-2018) Forecasting US ICS based on fitting ARIMAX(2,0,1) Model on 1978-2016 data using regression effect estimates of GOVT, DUR, HOM, CAR, INCOME & EDUC",Ylab ="ICS = INDEX OF CONSUMER SENTIMENT (US)",Xlab ="Time",title_size = 8,ts_original = "Training ICS Data (2013-2016)\nTraining Region (1978-2016)\n Model(ICS) -> ARIMAX(p,q,r)\n XReg={X_i}, 1<=i<=6",ts_forecast = "ARIMAX-model ICS Forecasting (2017-2018)\nValidation Region (2017-2018)\n hat(ICS) <- ARIMAX(2,0,1)\n XReg={X_i}, 1<=i<=6")%>%
add_lines(x=nl1$X,text=nl1$TEXT,y=nl1$Y,name=nl1$NAME,line=list(color="grey"))%>%
add_lines(x=nl2$X,text=nl2$TEXT,y=nl2$Y,name=nl2$NAME,line=list(color="blue"))%>%
add_lines(x=nl3$X,text=nl3$TEXT,y=nl3$Y,name=nl3$NAME,line=list(color="purple"))%>%
add_lines(x=nl4$X,text=nl4$TEXT,y=nl4$Y,name=nl4$NAME,line=list(color="brown"))%>%
add_lines(x=nl5$X,text=nl5$TEXT,y=nl5$Y,name=nl5$NAME,line=list(color="orange"))
## Warning in forecast.Arima(ARIMAmodel, xreg = XREG): xreg contains different
## column names from the xreg used in training. Please check that the
## regressors are in the same order.
## Warning in forecast.Arima(modArima_train, xreg = as.matrix(X_test)): xreg
## contains different column names from the xreg used in training. Please
## check that the regressors are in the same order.
## png
## 2
# 1D timeseries FFT SHIFT
# A special case of imlementaiton of `fftshift` for 1D arrays
#' This function is useful for visualizing the 1D Fourier transform with the zero-frequency
#' component in the middle of the spectrum.
#'
#' @param img_ff A Fourier transform of a 1D signal.
#' @return A properly shifted FT of the 1D array.
#'
fftshift1D <- function(img_ff) {
rows <- length(img_ff)
rows_half <- ceiling(rows/2)
return(append(img_ff[(rows_half+1):rows], img_ff[1:rows_half]))
}
# Noise-generating function to generate some synthetic predictors (X_new)
# and test the ARIMA(2,0,1)(1,0,1)[12] model
#' This function is useful for generating noises for a given dataset
#'
#' @param data A data in the form of vector or of a matrix
#' @return A updated version of data where noise is also included
#'
noiseGen <- function(data) {
if (is.vector(data))
return( data + rnorm(length(data), mean(data), sd(data)))
else
return(data + matrix(rnorm(dim(data)[1] * dim(data)[2], 0, 1), dim(data)[1]))
}
# define a pi-wrapper calculator to ensure phase-arithmetic stays within [-pi : pi)
#' This function makes sure that phase-arithmetic is between [-pi, pi]
#'
#' @param x A numeric value
#' @return A number that satifies one of the 4 cases specified below
#'
pi_wrapper <- function (x) {
if(abs(x%%(2*pi)) <= pi) return(x%%(2*pi))
else if (-2*pi <= x%%(2*pi) & x%%(2*pi) < -pi) return (2*pi + x%%(2*pi))
else if (pi <= x%%(2*pi) & x%%(2*pi) < 2*pi) return (x%%(2*pi) - 2*pi)
else return (0)
}