SOCR ≫ | DSPA ≫ | Topics ≫ |
#Put all packages here
library(doParallel)
registerDoParallel(10, cores = detectCores() -2)
library(glmnet)
library(e1071)
library("randomForest")
library(ada)
library(adabag)
library(caret)
library(kernlab)
library(cluster)
library(ipred)
library(ggplot2)
library("factoextra")
library("FactoMineR")
Use hierarchical, k-means and spectral clustering to generate derived-computed phenotypes of countries. Do these derived lables relate (correspond to) the overall (OA) country ranking?
load("Fig5.23_24.RData")
# View(aggregate_arima_vector_country_ranking_df)
dim(aggregate_arima_vector_country_ranking_df)
## [1] 31 387
# 31(countries) 387(fetaures)
# Features = country-index + 386 features (378 time-series derivatives + 8 meta-data features)
# To choose features we like to have based on lasso
chooselambda <- function(cvlasso, option, k=NULL) {
lambmat<-cbind(cvlasso$glmnet.fit$df,cvlasso$glmnet.fit$lambda)
result<-tapply(lambmat[,2],lambmat[,1],max)
kresult<-result[which(names(result)==as.factor(k))]
if(option==1) {return(result)}
else if (option==2) {return(kresult)}
else (return("Not a valid option"))
}
showfeatures <- function(object, lambda, k ,...) {
lam<-lambda[which(names(lambda)==as.factor(k))]
beta <- predict(object, s = lam, type = "coef")
if(is.list(beta)) {
out <- do.call("cbind", lapply(beta, function(x) x[,1]))
out <- as.data.frame(out)
s <- rowSums(out)
out <- out[which(s)!=0,,drop=FALSE]
} else {out<-data.frame(Overall = beta[,1])
out<-out[which(out!=0),,drop=FALSE]
}
out <- abs(out[rownames(out) != "(Intercept)",,drop = FALSE])
out
}
randchoose <- function(matr) {
leng<-nrow(matr)
se<-seq(1:leng)
sam<-sample(se,as.integer(leng*0.6))
return(sam)
}
MLcomp <- function(fitlas, cvlas, trn, test, option=1) {
allfeat<-as.numeric(names(chooselambda(cvlasso = cvlas, option = 1)))
allfeat<-allfeat[which(allfeat>4)]
trainlist<-as.list(NULL)
for (i in 1:length(allfeat)) {
trainlist[[i]]<-trn[,which(colnames(trn) %in%
c(row.names(showfeatures(fitlas, chooselambda(cvlas = cvlas,1), allfeat[i])), "Rank"))]
}
resultframe<-data.frame(origin=rep(NA,length(allfeat)))
rownames(resultframe)<-allfeat
resultframe$Decision_tree_bagging<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
eubag<-ipred::bagging(Rank~.,data = trainlist[[i]],nbagg=100)
bagtest<-predict(eubag, eutest)
bagagg<-bagtest==eutest$Rank
accuracy<-prop.table(table(bagagg))[c("TRUE")]
resultframe$Decision_tree_bagging[i]<-accuracy
}
resultframe$Random_forest<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
eurf<-randomForest(Rank~.,data=trainlist[[i]])
rftest<-predict(eurf,eutest)
rfagg<-rftest==eutest$Rank
accuracy<-prop.table(table(rfagg))[c("TRUE")]
resultframe$Random_forest[i]<-accuracy
}
resultframe$Decision_tree_adaboost<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
enada<-ada(Rank~.,data = trainlist[[i]],iter=50)
adatest<-predict(enada,eutest)
adaagg<-adatest==eutest$Rank
accuracy<-prop.table(table(adaagg))[c("TRUE")]
resultframe$Decision_tree_adaboost[i]<-accuracy
}
resultframe$GLM<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
euglm<-glm(Rank~.,data = trainlist[[i]],family = "binomial")
glmtest<-predict(euglm,eutest)
glmtest<-ifelse(glmtest<0,0,1)
glmagg<-glmtest==eutest$Rank
accuracy<-prop.table(table(glmagg))[c("TRUE")]
resultframe$GLM[i]<-accuracy
}
resultframe$SVM_best_Gamma_Cost<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
svmtune<-tune.svm(Rank~.,data = trainlist[[i]],gamma = 10^(-6:1),cost = 10^(-10:10))
svmed<-svm(Rank~.,data=trainlist[[i]],gamma=svmtune$best.parameters[1],cost=svmtune$best.parameters[2])
svmtest<-predict(svmed,eutest)
svmagg<-svmtest==eutest$Rank
accuracy<-prop.table(table(svmagg))[c("TRUE")]
resultframe$SVM_best_Gamma_Cost[i]<-accuracy
}
resultframe$origin<-NULL
if(option==1){return(resultframe)}
}
# resultXframe <- MLcomp(fitLASSO, cvLASSO, Xtrain, Xtest, 1)
MLcompX <- function(fitlas, cvlas, trn, test, option=1) {
allfeat<-as.numeric(names(chooselambda(cvlasso = cvlas, option = 1)))
allfeat<-allfeat[which(allfeat>4)]
trainlist<-as.list(NULL)
for (i in 1:length(allfeat)) {
trainlist[[i]]<-trn[,which(colnames(trn) %in%
c(row.names(showfeatures(fitlas, chooselambda(cvlas = cvlas,1), allfeat[i])), "Rank"))]
}
resultXframe<-data.frame(origin=rep(NA,length(allfeat)))
rownames(resultXframe)<-allfeat
resultXframe$Decision_tree_bagging<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
#ERROR HANDLING
possibleError <- tryCatch(
function () {
set.seed(1234)
Xbag<-ipred::bagging(Rank~ . ,data = trainlist[[i]], nbagg=100,
control=rpart.control(minsplit=2, cp=0.1, xval=10))
bagtest<-predict(Xbag, Xtest)
bagagg<-bagtest==Xtest$Rank
accuracy<-prop.table(table(bagagg))[c("TRUE")]
resultXframe$Decision_tree_bagging[i]<-accuracy
},
error=function(e) e
)
if(inherits(possibleError, "error")) next
# print(i)
}
resultXframe$Random_forest<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
Xrf<-randomForest(Rank~.,data=trainlist[[i]])
rftest<-predict(Xrf,test)
rfagg<-rftest==test$Rank
accuracy<-prop.table(table(rfagg))[c("TRUE")]
resultXframe$Random_forest[i]<-accuracy
}
resultXframe$Decision_tree_adaboost<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
Xada<-ada(Rank~.,data = trainlist[[i]],iter=50)
adatest<-predict(Xada,test)
adaagg<-adatest==test$Rank
accuracy<-prop.table(table(adaagg))[c("TRUE")]
resultXframe$Decision_tree_adaboost[i]<-accuracy
}
resultXframe$GLM<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
euglm<-glm(Rank~.,data = trainlist[[i]],family = "binomial")
glmtest<-predict(euglm,test)
glmtest<-ifelse(glmtest<0,0,1)
glmagg<-glmtest==test$Rank
accuracy<-prop.table(table(glmagg))[c("TRUE")]
resultXframe$GLM[i]<-accuracy
}
resultXframe$SVM_best_Gamma_Cost<-rep(NA,length(allfeat))
for (i in 1:length(allfeat)) {
set.seed(1234)
svmtune<-tune.svm(Rank~.,data = trainlist[[i]],gamma = 10^(-6:1),cost = 10^(-10:10))
svmed<-svm(Rank~.,data=trainlist[[i]],gamma=svmtune$best.parameters[1],cost=svmtune$best.parameters[2])
svmtest<-predict(svmed,test)
svmagg<-svmtest==test$Rank
accuracy<-prop.table(table(svmagg))[c("TRUE")]
resultXframe$SVM_best_Gamma_Cost[i]<-accuracy
}
resultXframe$origin<-NULL
if(option==1){return(resultXframe)}
}
eudata <- aggregate_arima_vector_country_ranking_df
colnames(eudata) <- c("country",colnames(eudata[,-1]))
eudata <- eudata[ , -ncol(eudata)]
Y<-aggregate_arima_vector_country_ranking_df$OA
# Compelte data 386 features (378 + 8)
X<-eudata[,-ncol(eudata)]; dim(X)
## [1] 31 385
# TS-derivative features only (378)
X378 <- X[, -c(379:386)]; dim(X378)
## [1] 31 378
countryinfo<-as.character(X[,1])
countryinfo[11]<-"Germany"
X<-X[,-1]
keeplist<-NULL
for (i in 1:ncol(X)) {
if(FALSE %in% (X[,i]==mean(X[,i]))) {keeplist<-c(keeplist,i)}
}
X<-X[,keeplist]; dim(X)
## [1] 31 290
# Reduced to 378 features
#countryinfo<-as.character(X378[,1])
#countryinfo[11]<-"Germany"
#X378<-X378[,-1]
#keeplist<-NULL
#for (i in 1:ncol(X378)) {
# if(FALSE %in% (X378[,i]==mean(X378[,i]))) {keeplist<-c(keeplist,i)}
#}
#X378<-X378[,keeplist]; dim(X378)
fitLASSO <- glmnet(as.matrix(X), Y, alpha = 1)
#cross-validation
cvLASSO = cv.glmnet(data.matrix(X), Y, alpha = 1, parallel=TRUE)
# fitLASSO <- glmnet(as.matrix(X378), Y, alpha = 1)
#library(doParallel)
#registerDoParallel(5)
#cross-validation
#cvLASSO = cv.glmnet(data.matrix(X378), Y, alpha = 1, parallel=TRUE)
Feature 5.23 A
eusample<-X
eusample$Rank<-as.factor(ifelse(Y<30, 1, 0))
set.seed(1234)
eutrain<-eusample[randchoose(eusample), ]
set.seed(1234)
eutest<-eusample[-randchoose(eusample), ]
eusample378 <- X378
eusample378$Rank <- as.factor(ifelse(Y<30, 1, 0))
set.seed(1234)
eutrain378 <- eusample378[randchoose(eusample378), ]
set.seed(1234)
eutest378 <- eusample378[-randchoose(eusample378), ]
resultframe <- MLcomp(fitLASSO, cvLASSO, eutrain, eutest, 1)
resultframe_386_ST <- resultframe
# View(resultframe_386_ST)
# resultframe_378_ST <- MLcomp(fitLASSO, cvLASSO, eutrain378, eutest378, 1)
# Display results
resultframe$features<-as.factor(as.numeric(rownames(resultframe)))
ppframe<-data.frame(NULL)
for (i in 1:5) {
FM <- data.frame(resultframe[,i], resultframe$features,
Methods<-rep(colnames(resultframe)[i], nrow(resultframe)))
ppframe<-rbind(ppframe, FM)
}
colnames(ppframe)<-c("Accuracy", "Features", "Methods")
ggplot(ppframe, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacetime (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
png("Figures/Fig5.23A.png",width = 1920,height = 1080,res = 100)
ggplot(ppframe, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacetime (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
dev.off()
## quartz_off_screen
## 2
Feature 5.23 B
# spacetime (ST) 378_ST
resultframe_378_ST$features<-as.factor(as.numeric(rownames(resultframe_378_ST)))
ppframe_378_ST<-data.frame(NULL)
for (i in 1:5) {
FM_378_ST <- data.frame(resultframe_378_ST[,i], resultframe_378_ST$features,
Methods<-rep(colnames(resultframe_378_ST)[i], nrow(resultframe_378_ST)))
ppframe_378_ST<-rbind(ppframe_378_ST, FM_378_ST)
}
colnames(ppframe_378_ST)<-c("Accuracy", "Features", "Methods")
ggplot(ppframe_378_ST, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacetime (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
png("Figures/Fig5.23B.png",width = 1920,height = 1080,res = 100)
ggplot(ppframe_378_ST, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacetime (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
dev.off()
## quartz_off_screen
## 2
Feature 5.23 C
showfeatures(fitLASSO, chooselambda(cvLASSO,1), 10)
## Overall
## Feature_1_ArimaVec_8 5.256360e-01
## Feature_16_ArimaVec_4 6.716879e-01
## Feature_19_ArimaVec_8 3.949426e-01
## Feature_22_ArimaVec_4 1.796214e+00
## Feature_25_ArimaVec_1 9.911928e-04
## Feature_25_ArimaVec_4 4.972196e-01
## Feature_25_ArimaVec_5 3.717699e-01
## Feature_27_ArimaVec_3 1.693775e-02
## Feature_35_ArimaVec_4 9.390697e-01
## IncomeGroup 1.252439e+01
feat_5 <- predict(fitLASSO, s = chooselambda(cvLASSO,2,10), newx = data.matrix(X))
df1 <- as.data.frame(rbind(as.numeric(feat_5),Y),
row.names = c("Predicted Rank","OA Rank"))
colnames(df1) <- countryNames
df1 # View(t(df1))
## Austria Belgium Bulgaria Croatia Cyprus Czech Republic
## Predicted Rank 21.48021 22.01051 32.74413 21.15846 34.84274 21.90391
## OA Rank 18.00000 19.00000 38.00000 28.00000 50.00000 25.00000
## Denmark Estonia Finland France Germany Greece
## Predicted Rank 17.78768 21.21233 15.60224 16.10767 14.64532 21.15785
## OA Rank 10.00000 32.00000 1.00000 16.00000 12.00000 26.00000
## Hungary Iceland Ireland Italy Latvia Lithuania
## Predicted Rank 24.47112 35.20295 21.07326 20.1141 33.74456 35.48394
## OA Rank 33.00000 36.00000 17.00000 23.0000 36.00000 34.00000
## Luxembourg Malta Netherlands Norway Poland Portugal
## Predicted Rank 19.53351 35.39727 16.31461 11.33358 33.08849 22.12424
## OA Rank 5.00000 50.00000 8.00000 6.00000 29.00000 26.00000
## Romania Slovakia Slovenia Spain Sweden Switzerland
## Predicted Rank 35.33505 20.73411 22.59695 19.70497 18.95008 14.89101
## OA Rank 39.00000 31.00000 24.00000 26.00000 3.00000 2.00000
## United Kingdom
## Predicted Rank 16.25316
## OA Rank 14.00000
# Clustering
cluster5 <- X[, which(colnames(X) %in%
row.names(showfeatures(fitLASSO, chooselambda(cvLASSO,1), 10)))]
rownames(cluster5) <- countryNames # countryinfo
#1. hierarchical clustering
scaled_cluster5 <- scale(cluster5)
##deal with NAN values
#scaled_country<-scaled_country[,which(is.nan(scaled_country[1,])==FALSE)]
dis_SC5 <- dist(scaled_cluster5)
H_clust_SC5 <- hclust(dis_SC5)
H_clust_SC5 <- eclust(scaled_cluster5, k=5, "hclust")
fviz_dend(H_clust_SC5, rect = TRUE, cex=0.5)
png("Figures/Fig5.23C.png",width = 1920,height = 1080,res = 100)
fviz_dend(H_clust_SC5, rect = TRUE, cex=0.5)
dev.off()
## quartz_off_screen
## 2
# fviz_dend(H_clust_SC5, lwd=2, rect = TRUE)
Feature 5.23 D
# ST 378
cluster5_378_ST <- X378[, which(colnames(X378) %in%
row.names(showfeatures(fitLASSO, chooselambda(cvLASSO,1), 10)))]
rownames(cluster5_378_ST) <- countryNames # countryinfo
#1. hierarchical clustering
scaled_cluster5_378_ST <- scale(cluster5_378_ST)
dis_SC5_378_ST <- dist(scaled_cluster5_378_ST)
H_clust_SC5_378_ST <- hclust(dis_SC5_378_ST)
H_clust_SC5_378_ST <- eclust(scaled_cluster5_378_ST, k=5, "hclust")
fviz_dend(H_clust_SC5_378_ST,rect = TRUE, cex=0.5)
png("Figures/Fig5.23D.png",width = 1920,height = 1080,res = 100)
fviz_dend(H_clust_SC5_378_ST,rect = TRUE, cex=0.5)
dev.off()
## quartz_off_screen
## 2
** 2.1 Lasso features selection**
** 2.2 Comparison of different ML algorithms of different feature numbers**
** 2.3 Clustering**
dim(IFT_SwappedPhase_FT_aggregate_arima_vector)
## [1] 31 387
# [1] 31 387
eudata_SwappedPhase <- IFT_SwappedPhase_FT_aggregate_arima_vector
colnames(eudata_SwappedPhase) <- c("country", colnames(eudata_SwappedPhase[,-1]))
eudata_SwappedPhase <- as.data.frame(eudata_SwappedPhase[ , -ncol(eudata_SwappedPhase)])
Y <- as.data.frame(IFT_SwappedPhase_FT_aggregate_arima_vector)$OA
# Compelte data 386 features (378 + 8)
X <- eudata_SwappedPhase
countryinfo<-as.character(X[,1])
countryinfo[11]<-"Germany"
X<-X[,-1]
keeplist<-NULL
for (i in 1:ncol(X)) {
if(FALSE %in% (X[,i]==mean(X[,i]))) {keeplist<-c(keeplist,i)}
}
X<-X[,keeplist]; dim(X) # 31 343
## [1] 31 343
# Reduced to 378 features
# TS-derivative features only (378)
# X378 <- X[, -c(379:386)]; dim(X378)
#countryinfo<-as.character(X378[,1])
#countryinfo[11]<-"Germany"
#X378<-X378[,-1]
#keeplist<-NULL
#for (i in 1:ncol(X378)) {
# if(FALSE %in% (X378[,i]==mean(X378[,i]))) {keeplist<-c(keeplist,i)}
#}
#X378<-X378[,keeplist]; dim(X378)
fitLASSO_X <- glmnet(as.matrix(X), Y, alpha = 1)
#cross-validation
cvLASSO_X = cv.glmnet(data.matrix(X), Y, alpha = 1, parallel=TRUE)
# fitLASSO_X <- glmnet(as.matrix(X378), Y, alpha = 1)
#library(doParallel)
#registerDoParallel(5)
#cross-validation
#cvLASSO_X = cv.glmnet(data.matrix(X378), Y, alpha = 1, parallel=TRUE)
Feature 5.24 A
#test training data setup
Xsample <- X
Xsample$Rank <- as.factor(ifelse(Y<30, 1, 0))
set.seed(1234)
Xtrain <- Xsample[randchoose(Xsample), ]
set.seed(1234)
Xtest <- Xsample[-randchoose(Xsample), ]
#Xsample378 <- X378
#Xsample378$Rank <- as.factor(ifelse(Y<30, 1, 0))
#set.seed(1234)
#Xtrain378 <- Xsample378[randchoose(Xsample378), ]
#set.seed(1234)
#Xtest378 <- Xsample378[-randchoose(Xsample378), ]
resultXframe <- MLcompX(fitLASSO_X, cvLASSO_X, Xtrain, Xtest, 1)
resultXframe_386_SK_Swapped <- resultXframe
# View(resultXframe_386_SK_Swapped)
# resultXframe_378_SK_Swapped <- MLcompX(fitLASSO_X, cvLASSO_X, Xtrain378, Xtest378, 1)
# Display results
resultXframe$features<-as.factor(as.numeric(rownames(resultXframe)))
ppframeX<-data.frame(NULL)
for (i in 1:5) {
FM <- data.frame(resultXframe[,i], resultXframe$features,
Methods<-rep(colnames(resultXframe)[i], nrow(resultXframe)))
ppframeX<-rbind(ppframeX, FM)
}
colnames(ppframeX)<-c("Accuracy", "Features", "Methods")
ggplot(ppframeX, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacekime Swapped-Phases (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
png("Figures/Fig5.24A.png",width = 1920,height = 1080,res = 100)
ggplot(ppframeX, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacekime Swapped-Phases (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
dev.off()
## quartz_off_screen
## 2
# spacetime (ST) 378_ST
resultframe_378_ST$features<-as.factor(as.numeric(rownames(resultframe_378_ST)))
ppframe_378_ST<-data.frame(NULL)
for (i in 1:5) {
FM_378_ST <- data.frame(resultframe_378_ST[,i], resultframe_378_ST$features,
Methods<-rep(colnames(resultframe_378_ST)[i], nrow(resultframe_378_ST)))
ppframe_378_ST<-rbind(ppframe_378_ST, FM_378_ST)
}
colnames(ppframe_378_ST)<-c("Accuracy", "Features", "Methods")
ggplot(ppframe_378_ST, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacetime (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
Feature 5.24 B
##################### for resultXframe_378_SK_Swapped
resultXframe_378_SK_Swapped$features<-as.factor(as.numeric(rownames(resultXframe_378_SK_Swapped)))
ppframeX<-data.frame(NULL)
for (i in 1:5) {
FM <- data.frame(resultXframe_378_SK_Swapped[, i], resultXframe_378_SK_Swapped$features,
Methods<-rep(colnames(resultXframe_378_SK_Swapped)[i], nrow(resultXframe_378_SK_Swapped)))
ppframeX<-rbind(ppframeX, FM)
}
colnames(ppframeX)<-c("Accuracy", "Features", "Methods")
ggplot(ppframeX, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacekime Swapped-Phases (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
png("Figures/Fig5.24B.png",width = 1920,height = 1080,res = 100)
ggplot(ppframeX, aes(x=Features, y=Accuracy, colour=Methods,
group=Methods, shape=Methods))+
geom_line(position=position_dodge(0.2), lwd=2)+
ylim(0.2, 1.0) +
geom_point(size=5, position=position_dodge(0.2))+
theme(legend.position="top", legend.text=element_text(size=16))+
ggtitle("Spacekime Swapped-Phases (386 features): Compare ML Forecasting Results")+
theme(
axis.text=element_text(size=16),
plot.title = element_text(size=18, face="bold.italic"),
axis.title.x = element_text(size=14, face="bold"),
axis.title.y = element_text(size=14, face="bold"))
dev.off()
## quartz_off_screen
## 2
Feature 5.24 C
showfeatures(fitLASSO_X, chooselambda(cvLASSO_X, 1), 10)
## Overall
## Feature_2_ArimaVec_8 1.232679e+15
## Feature_23_ArimaVec_7 4.201288e+13
## Feature_29_ArimaVec_2 1.117510e-05
## Feature_34_ArimaVec_4 1.197363e+00
## Feature_36_ArimaVec_3 9.500284e-02
## Feature_37_ArimaVec_5 2.179608e+00
## Feature_38_ArimaVec_2 9.531233e-03
## Feature_41_ArimaVec_5 8.925103e-02
## Feature_42_ArimaVec_7 1.992485e+14
## HI 6.623288e-01
feat_5 <- predict(fitLASSO_X, s = chooselambda(cvLASSO_X, 2, 10), newx = data.matrix(X))
df1 <- as.data.frame(rbind(as.numeric(feat_5), Y),
row.names = c("Predicted Rank","OA Rank"))
colnames(df1) <- countryNames
df1 # View(t(df1))
## Austria Belgium Bulgaria Croatia Cyprus Czech Republic
## Predicted Rank 16.8738 23.3342 30.74162 27.44991 35.05499 24.99034
## OA Rank 18.0000 19.0000 38.00000 28.00000 50.00000 25.00000
## Denmark Estonia Finland France Germany Greece
## Predicted Rank 17.10833 28.00065 16.82787 14.9732 15.42112 25.1666
## OA Rank 10.00000 32.00000 1.00000 16.0000 12.00000 26.0000
## Hungary Iceland Ireland Italy Latvia Lithuania
## Predicted Rank 29.03048 29.60176 23.17419 27.23579 29.7126 32.9529
## OA Rank 33.00000 36.00000 17.00000 23.00000 36.0000 34.0000
## Luxembourg Malta Netherlands Norway Poland Portugal
## Predicted Rank 13.19769 39.70136 13.37373 16.06833 26.81967 21.50892
## OA Rank 5.00000 50.00000 8.00000 6.00000 29.00000 26.00000
## Romania Slovakia Slovenia Spain Sweden Switzerland
## Predicted Rank 29.61407 22.33643 22.26078 20.61835 17.08804 12.77284
## OA Rank 39.00000 31.00000 24.00000 26.00000 3.00000 2.00000
## United Kingdom
## Predicted Rank 15.57164
## OA Rank 14.00000
# Clustering
cluster5 <- X[, which(colnames(X) %in%
row.names(showfeatures(fitLASSO_X, chooselambda(cvLASSO_X, 1), 10)))]
rownames(cluster5) <- countryNames # countryinfo
#1. hierarchical clustering
scaled_cluster5 <- scale(cluster5)
##deal with NAN values
#scaled_country<-scaled_country[,which(is.nan(scaled_country[1,])==FALSE)]
dis_SC5 <- dist(scaled_cluster5)
H_clust_SC5 <- hclust(dis_SC5)
H_clust_SC5 <- eclust(scaled_cluster5, k=5, "hclust")
png("Figures/Fig5.24C.png",width = 1920,height = 1080,res = 100)
fviz_dend(H_clust_SC5, rect = TRUE, cex=0.5)
dev.off()
## quartz_off_screen
## 2
fviz_dend(H_clust_SC5, rect = TRUE, cex=0.5)
# fviz_dend(H_clust_SC5, lwd=2, rect = TRUE)
Feature 5.24 D
# ST 378
cluster5_378_SK <- X378[, which(colnames(X378) %in%
row.names(showfeatures(fitLASSO_X, chooselambda(cvLASSO_X, 1), 10)))]
rownames(cluster5_378_SK) <- countryNames # countryinfo
#1. hierarchical clustering
scaled_cluster5_378_SK <- scale(cluster5_378_SK)
dis_SC5_378_SK <- dist(scaled_cluster5_378_SK)
H_clust_SC5_378_SK <- hclust(dis_SC5_378_SK)
H_clust_SC5_378_SK <- eclust(scaled_cluster5_378_SK, k=5, "hclust")
fviz_dend(H_clust_SC5_378_SK,rect = TRUE, cex=0.5)
png("Figures/Fig5.24D.png",width = 1920,height = 1080,res = 100)
fviz_dend(H_clust_SC5_378_SK,rect = TRUE, cex=0.5)
dev.off()
## quartz_off_screen
## 2