SOCR ≫ | TCIU Website ≫ | TCIU GitHub ≫ |
library(dplyr)
library(plotly)
library(pracma)
library(ggplot2)
require(magrittr)
library(gridExtra)
#super sample the dataset
cleardata <- function(mat) {
for (i in 1:ncol(mat)) {
mat[is.na(mat[,i]),i]<-mean(mat[,i],na.rm = T) + rnorm(sum(is.na(mat[,i])),sd = sd(mat[,i],na.rm = T))
}
return(mat)
}
# 1. Find all "Common" freatures (highly-observed and congruent Econ indicators)
countryNames <- unique(time_series$country); length(countryNames); # countryNames
# initialize 3D array of DF's that will store the data for each of hte countries into a 2D frame
countryData <- list() # countryData[[listID==Country]][1-time-72, 1-feature-197]
for (i in 1:length(countryNames)) {
countryData[[i]] <- filter(time_series, country == countryNames[i])
}
# Check countryData[[2]][2, 3] == Belgium[2,3]
list_of_dfs_CommonFeatures <- list() # list of data for supersampled countries 360 * 197
# 2. General function that ensures the XReg predictors for ALL 31 EU countries are homologous
completeHomologousX_features <- function (list_of_dfs) {
# delete features that are missing at all time points
for (j in 1:length(list_of_dfs)) {
print(paste0("Pre-processing Country: ...", countryNames[j], "... "))
data = list_of_dfs[[j]]
data = data[ , colSums(is.na(data)) != nrow(data)]
data = select(data, -time, -country)
DataMatrix = as.matrix(data)
DataMatrix = cleardata(DataMatrix)
DataMatrix = DataMatrix[ , colSums(is.na(DataMatrix)) == 0] # remove features with only 1 value
DataMatrix = DataMatrix[ , colSums(DataMatrix) != 0] # remove features with all values=0
# Supersample 72 --*5--> 360 timepoints
#DataMatrix = splinecreate(DataMatrix)
DataSuperSample = as.data.frame(DataMatrix) # super-Sample the data
# remove some of features
#DataSuperSample = DataSuperSample[, -c(50:80)]; dim(X) # 360 167
# ensure full-rank design matrix, DataSuperSample
DataSuperSample <-
DataSuperSample[ , qr(DataSuperSample)$pivot[seq_len(qr(DataSuperSample)$rank)]]
print(paste0("dim()=(", dim(DataSuperSample)[1], ",", dim(DataSuperSample)[2], ") ..."))
# update the current DF/Country
list_of_dfs_CommonFeatures[[j]] <- DataSuperSample
}
# Identify All Xreg features that are homologous (same feature columns) across All 31 countries
# Identify Common Columns (freatures)
comCol <- Reduce(intersect, lapply(list_of_dfs_CommonFeatures, colnames))
list_of_dfs_CommonFeatures <- lapply(list_of_dfs_CommonFeatures, function(x) x[comCol])
for (j in 1:length(list_of_dfs_CommonFeatures)) {
list_of_dfs_CommonFeatures[[j]] <- subset(list_of_dfs_CommonFeatures[[j]], select = comCol)
print(paste0("dim(", countryNames[j], ")=(", dim(list_of_dfs_CommonFeatures[[j]])[1],
",", dim(list_of_dfs_CommonFeatures[[j]])[2], ")!")) # 72 * 197
}
return(list_of_dfs_CommonFeatures)
}
# Test completeHomologousX_features: dim(AllCountries)=(360,42)!
list_of_dfs_CommonFeatures <- completeHomologousX_features(countryData);
length(list_of_dfs_CommonFeatures); dim(list_of_dfs_CommonFeatures[[1]]) # Austria data matrix 360*42
x2 = seq(from = 1, to = 11, length.out = 50)
# drop the first row to avoid real part value of 0
y2 = seq(from = -5, to = 5, length.out = 50)
# drop the first column to avoid imaginary part value of 0
XY = expand.grid(X=x2, Y=y2)
complex_xy = mapply(complex, real=XY$X,imaginary=XY$Y)
X<-1:72
time_points <- seq(0+0.01, 2*pi, length.out = 72)
# create the LT
NuLT = function(datax, datay, inputz, k = 3, fitwarning = FALSE, mirror = FALSE, range = 2*pi) {
datax = as.numeric(datax)
datay = as.numeric(datay)
n = length(datax)
x1 = n/(n+0.5)*((datax-min(datax))/(max(datax)-min(datax)))*range
if(mirror){
x1 = c(x1,rev(2*range - x1))/2
n = 2*n
datay = c(datay, rev(datay))
#plot(x1, datay)
}
#generate the coefficients in indefinite integral of t^n*exp(-zt)
coef = 1;
coefm = as.matrix(coef)
for(i in 1:k){
coefm = cbind(coefm,0)
coef = c(coef*i,1)
coefm = rbind(coefm,coef)
}
# these coefficients ordered by ^0, ^1, ^2, ... in column format
# compute 1, z, z^2...,z^k
zz = cbind(1,inputz)
zt = inputz
for (i in 2:k){
zt = zt*inputz
zz = cbind(zz,zt)
}
zd = zt*inputz
# compute 1, x, x^2...,x^k
tx = x1;
xm = cbind(1,x1)
for (i in 2:k){
tx = tx*x1
xm = cbind(xm,tx)
}
# sum over intervals
result = 0*inputz
ii = 1
while(ii+k<=n)
{
A = xm[seq(ii,ii+k),c(0:k+1)]
b = datay[seq(ii,ii+k)]
# polyfit might be faster when using polynomial basis, while matrix inverse, `solve()`,
# is the more general approach that works for any function basis
polyc = as.numeric(solve(A,b))
#ordered by ^0, ^1, ^2, ... in column format
# Enter a new function variable qualityCheck=FALSE
# check fit quality; this step can be skipped for speed/efficiency
# if (qualityCheck) { .... }
if (fitwarning){
xx = seq(A[1,2],A[k+1,2],length.out = 100);
yy = polyval(rev(polyc),xx)
if(max(abs(yy-mean(b)))>2*max(abs(b-mean(b)))){
print(c("Warning: Poor Fit at ",ii,", Largest Deviation is",max(abs(yy-mean(b)))))
print(c("Spline Polynomial is", polyc),3);
#print(c(polyval(rev(polyc),A[,2]),b))
plot(xx, yy, main="Polynomial fit", ylab="", type="l", col="blue")
lines(A[,2],b, col="red")
legend("topleft",c("fit","data"),fill=c("blue","red"))
print(" ")
}
}
# Use vector/matrix operations to avoid looping,
# some of the expressions look weird
# May need to actually compare the efficiency/speed of
# vector based vs. standard numeric calculations
m1 = t(t(polyc*coefm)*A[1,])
m11 = as.numeric(tapply(m1, col(m1)-row(m1), sum))[0:k+1]
m2 = t(t(polyc*coefm)*A[k+1,])
m22 = as.numeric(tapply(m2, col(m2)-row(m2), sum))[0:k+1]
intgl = (exp(-inputz*A[1,2])*colSums(t(zz)*m11)-
exp(-inputz*A[k+1,2])*colSums(t(zz)*m22))/zd
result = result+intgl
ii=ii+k
}
# Computations over the last interval
if(ii<n){
nk = n-ii;
A = xm[seq(ii,ii+nk),c(0:nk+1)]
b = datay[seq(ii,ii+nk)]
nc = as.numeric(solve(A,b))
nc = c(nc,seq(0,0,length.out = k-nk))
A = xm[seq(ii,ii+nk),]
m1 = t(t(nc*coefm)*A[1,])
m11 = as.numeric(tapply(m1, col(m1)-row(m1), sum))[0:k+1]
m2 = t(t(nc*coefm)*A[nk+1,])
m22 = as.numeric(tapply(m2, col(m2)-row(m2), sum))[0:k+1]
# cc = colSums(coefm*polyc)
intgl = (exp(-inputz*A[1,2])*colSums(t(zz)*m11)-
exp(-inputz*A[nk+1,2])*colSums(t(zz)*m22))/zd
result = result+intgl
}
#offset = 0.05*pi
#result = result + datay[n]*(exp(-2*pi*inputz)-exp(-(2*pi+offset)*inputz))/inputz
return(result)
}
magnitude_feature<-lapply(1:33,function(i) Mod(tensor_all[,i,,]))
phase_feature<-lapply(1:33,function(i) atan2(Im(tensor_all[,i,,]), Re(tensor_all[,i,,])))
#xy2<-expand.grid(1:20,1:20)
colorscale = cbind(seq(0, 1, by=1/(length(x2) - 1)), rainbow(length(x2)))
commonefeature<-colnames(list_of_dfs_CommonFeatures[[31]])
p_feature<- plot_ly(hoverinfo="none", showscale = FALSE) %>% layout(title=commonefeature[31])
for (j in 1:5)
{
for (i in 1:6){
xx2<-1:50+50*(j-1)
yy2<-1:50+50*(i-1)
p_feature <- p_feature %>%
add_trace(x=xx2,y=yy2, z =magnitude_feature[[31]][j+(i-1)*5,,],
surfacecolor=phase_feature[[31]][j+(i-1)*5,,], colorscale=colorscale, #Phase-based color
type = 'surface',name=substr(countryNames[j+(i-1)*5],0,50),opacity=0.7,showlegend=TRUE)
}
}