SOCR ≫ TCIU Website ≫ TCIU GitHub ≫

1 Data Preprocessing

Loading all required packages.

library(dplyr)
library(arm)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(plot3D)
library(scatterplot3d)
library(plotly)
library(fastDummies)
library(forecast)

setwd("C:/Users/dinov/Desktop/Ivo.dir/Eclipse_Projects/HTML5_WebSite/TCIU/Chapter6/")

# Load Previously Computed Workspace:
load("C:/Users/dinov/Desktop/Ivo.dir/Eclipse_Projects/HTML5_WebSite/TCIU/Chapter6/EuEco_Yuming.RData")

Load and and preprocess the EU Economic data.

eu <- read.csv("Master_Aggregate_EU_Econ_Data_11_29_2018_TimeTransform.csv", stringsAsFactors = F)[,-5]
colnames(eu) <- c("country","time","feature","value")
eu <- filter(eu,!country %in% c("European Union (25 countries)","D1_Country",""))
eu$value <- sapply(c(1:nrow(eu)),function(x) as.numeric(gsub(":|,","",eu$value[x])))
eu <- filter(eu, feature != "")
dim(eu)
## [1] 667368      4

2 Reformat the data into a 3D array (country \(\times\) feature \(\times\) time)

unq_country <- sort(unique(eu$country))
unq_time <- sort(unique(eu$time))
unq_fea <- sort(unique(eu$feature))
num_country <- length(unq_country)
num_time <- length(unq_time)
num_fea <- length(unq_fea)
eu <- arrange(eu,country,time,feature)
eu_3d_array <- array(NA,dim = c(num_country,num_time,num_fea),dimnames = list(unq_country,unq_time,unq_fea))
for (i in 1:num_country){
  for (j in 1:num_time){
    for (k in 1:num_fea){
      eu_3d_array[i,j,k] = eu$value[(i-1)*num_time*num_fea + (j-1)*num_fea + k]
    }
  }
}
eu_3d_array[1:10,1:10,1]
##                 2000Q1 2000Q2 2000Q3 2000Q4  2001Q1    2001Q2 2001Q3 2001Q4
## Austria             NA     NA  165.6     NA      NA        NA    0.0 1306.2
## Belgium        42016.2 1398.5 1454.3  182.9  9320.7  2220.900  358.5 5000.2
## Bulgaria        1414.7    2.5    3.3   94.7 16174.0 -7002.800 8708.0  888.4
## Croatia             NA     NA 1326.2     NA    12.1   474.800   15.0     NA
## Cyprus             7.7 1061.6    0.8    0.8   419.3     8.200    5.7  403.1
## Czech Republic  1989.8  191.1     NA 9265.2      NA        NA  386.8     NA
## Denmark             NA     NA  115.7    3.2     6.1    78.854     NA    5.6
## Estonia           -4.0   61.7    7.7   -3.8      NA    10.800   -6.0   85.2
## Finland          525.8  148.2   -2.3     NA     6.9     5.000 2811.7   28.3
## France              NA     NA 1547.7     NA      NA  1270.600   16.4     NA
##                2002Q1 2002Q2
## Austria            NA  762.0
## Belgium         894.0  998.3
## Bulgaria       1782.6 3446.1
## Croatia         801.7  274.1
## Cyprus            9.5     NA
## Czech Republic     NA     NA
## Denmark            NA    4.8
## Estonia          -2.7    0.2
## Finland          -6.2 1444.2
## France          791.1    4.2

3 3D Data visualization

eu <- arrange(eu,time,feature,country)
# eu_visualization <- select(eu,time,feature,country,value)
eu_visualization$time <- sapply(c(1:nrow(eu_visualization)),function(x) as.numeric(gsub("Q",".",eu_visualization$time[x])))
eu_visualization$feature <- as.factor(eu_visualization$feature)
eu_visualization$country <- as.factor(eu_visualization$country)
eu_visualization$value <- as.numeric(eu_visualization$value)
eu_visualization$feature <- sapply(c(1:nrow(eu_visualization)),function(x) substr(eu_visualization$feature[x],1,20))

plot_ly(eu_visualization, x = ~time, y = ~country, z = ~value, color = ~feature,split = ~ country,type = 'scatter3d', mode = 'lines')

4 Time series format

#Find the duplicates
eu_time_series <- na.omit(eu)
allFeatures = as.character(unique(eu_time_series$feature))
allTime = unique(eu_time_series$time)
allCountry = as.character(unique(eu_time_series$country))
allCombination = length(allFeatures)*length(allTime)*length(allCountry)
dup = c()
for (i in 1:length(allFeatures)){
  for (j in 1:length(allCountry)){
    for (k in 1:length(allTime)){
      if (nrow(filter(eu_time_series,country == allCountry[j] & 
                      feature == allFeatures[i] & time == allTime[k]))>1){
        dup = c(dup,as.character(allFeatures[i]))
        break
      }
    }
    break
  }
}
dup # These features have mutiple observations at the same time point
## [1] "Employment by sex, age and educational attainment level, Total, From 15 to 64 years, All ISCED 2011 levels"
## [2] "Labor cost for LCI excluding bonuses"                                                                      
## [3] "Labor costs other than wages or salaries"                                                                  
## [4] "Labour cost for LCI (compensation of employees plus taxes minus subsidies)"                                
## [5] "Labour cost for LCI excluding bonuses"                                                                     
## [6] "Labour costs other than wages and salaries"                                                                
## [7] "Wages and salaries (total)"

Remove duplicates.

removeDup = filter(eu_time_series, feature != "Employment by sex, age and educational attainment level, Total, From 15 to 64 years, All ISCED 2011 levels" &
                     feature != "Labor cost for LCI excluding bonuses" &
                     feature != "Labor costs other than wages or salaries" &
                     feature != "Labour cost for LCI (compensation of employees plus taxes minus subsidies)" &
                     feature != "Labour cost for LCI excluding bonuses" &
                     feature != "Labour costs other than wages and salaries" &
                     feature != "Wages and salaries (total)")
time_series = spread(removeDup,feature,value)
dim(time_series)
## [1] 2232  197

Additional details, examples, modeling strategies for interpreting the EU Economics data are available in TCIU Chapter 6 (Applications).

SOCR Resource Visitor number Web Analytics SOCR Email