empirical_results - RebeccaSalles/TSPred GitHub Wiki
Some empirical results of TSPred:
This page provides code for obtaining some empirical results of TSPred regarding the task of predicting the fifth gap of missing observations of the CATS dataset.
Different approaches were adopted combining data preprocessing and data modeling, either by the machine learning model SVM or by the deep learning model CNN. The time series in CATS are mostly nonstationary, thus a data preprocessing step also adopted based on the application of the splitting-based nonstationary time series transform EMD. The linear ARIMA model is selected as baseline, and the results from each approach are ranked based on MSE prediction errors.
Loading the package
library(TSPred)
#Installing the required tensorflow version
if(tensorflow::tf_version() < "2.0") tensorflow::install_tensorflow(version="2.0.0")
Defining the dataset
data(CATS,CATS.cont)
data <- rbind(CATS[5],CATS.cont[5])
test_len <- nrow(CATS.cont[5])
Defining required objects for time series prediction
Defining data processing objects
data_subsetting <- subsetting(test_len=test_len)
mapping_based <- list(BCT=BoxCoxT(lambda=NULL))
splitting_based <- list(EMD=EMD(),
WT=WT())
normalization <- list(MM=MinMax(),
AN=AN())
sliding_window <- SW(window_len=5)
Defining modeling objects
ARIMA_model <- ARIMA()
CNN_model <- Tensor_CNN(sw=sliding_window,proc=list(norm=normalization["MM"](/RebeccaSalles/TSPred/wiki/"MM")))
SVM_model <- SVM(sw=sliding_window,proc=list(norm=normalization["MM"](/RebeccaSalles/TSPred/wiki/"MM")))
Defining evaluation objects
mse_eval <- MSE_eval()
Time series prediction approaches
ARIMA (baseline)
tspred_arima <- tspred(subsetting=data_subsetting,
modeling=ARIMA_model,
evaluating=list(MSE=mse_eval))
#Baseline
tspred_arima_run <- workflow(tspred_arima,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
CNN (do not beat the baseline)
tspred_cnn <- tspred(subsetting=data_subsetting,
processing=NULL,
modeling=CNN_model,
evaluating=list(MSE=mse_eval))
tspred_cnn_run <- workflow(tspred_cnn,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
CNN+EMD (do not beat the baseline)
tspred_cnn_proc <- tspred(subsetting=data_subsetting,
processing=list(EMD=splitting_based["EMD"](/RebeccaSalles/TSPred/wiki/"EMD")),
modeling=CNN_model,
evaluating=list(MSE=mse_eval))
tspred_cnn_proc_run <- workflow(tspred_cnn_proc,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
SVM (do not beat the baseline)
tspred_svm <- tspred(subsetting=data_subsetting,
processing=NULL,
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_run <- workflow(tspred_svm,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
SVM+EMD (beat the baseline)
#=========== SVM+EMD (beat the baseline) =============
tspred_svm_proc <- tspred(subsetting=data_subsetting,
processing=list(EMD=splitting_based["EMD"](/RebeccaSalles/TSPred/wiki/"EMD")),
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_proc_run <- workflow(tspred_svm_proc,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
SVM+(BCT+EMD) (beat the baseline)
tspred_svm_proc_2 <- tspred(subsetting=data_subsetting,
processing=list(BCT=mapping_based["BCT"](/RebeccaSalles/TSPred/wiki/"BCT"),
EMD=splitting_based["EMD"](/RebeccaSalles/TSPred/wiki/"EMD")),
modeling=SVM_model,
evaluating=list(MSE=mse_eval))
tspred_svm_proc_2_run <- workflow(tspred_svm_proc_2,data=data,prep_test=TRUE,onestep=TRUE,eval_fitness=FALSE)
Benchmarking prediction results
MLM_tests <- list(tspred_cnn_run,tspred_cnn_proc_run,tspred_svm_run,tspred_svm_proc_run)
benchmarking <- benchmark(tspred_arima_run,MLM_tests,rank.by=c("MSE"))
Discussions:
- TSPred prediction results are competitive when comparing the errors produced by CATS competitors.
- The demand for the adoption of a suitable baseline model is noticeable.
- The CNN and SVM could not outperform the baseline.
- Introducing nonstationarity treatment resulted in smaller prediction errors.
Plotting prediction results
predictions <- data.frame()
models <- stringr::str_remove(names(benchmarking$ranked_tspred_objs), "MinMax-")
models <- stringr::str_remove(models, "Tensor_")
models <- stringr::str_replace(models, "-","+")
for(m in 1:length(benchmarking$ranked_tspred_objs)){
model <- names(benchmarking$ranked_tspred_obj)[m]
obj <- benchmarking$ranked_tspred_objs[model](/RebeccaSalles/TSPred/wiki/model)
if(!is.null(obj$pred$postp)) pred <- obj$pred$postp[1](/RebeccaSalles/TSPred/wiki/1)
else if(!is.null(obj$pred$raw)) pred <- obj$pred$raw[1](/RebeccaSalles/TSPred/wiki/1)
predictions <- rbind(predictions,cbind(time=981:1000,pred=pred,model=models[m]))
}
predictions$time <- as.numeric(predictions$time)
predictions$pred <- as.numeric(predictions$pred)
predictions$model <- factor(predictions$model,levels = models)
library(ggplot2)
library(RColorBrewer)
colors <- brewer.pal(5,"Set1")
ggplot(predictions[predictions$model %in% c("ARIMA","SVM","EMD+SVM"),], aes(x = time, y = pred, col=model)) +
geom_line(data = CATS.cont[5], aes(x = 981:1000, y = V5), size=1.1, col="gray45", linetype = "dashed") +
geom_line(size=1) +
geom_point(size=2) +
labs(x ="Observation", y = "", col="Model", shape="Model") +
scale_colour_manual(values=colors[1:3])+
theme_bw()
ggplot(predictions[predictions$model %in% c("ARIMA","CNN","EMD+CNN"),], aes(x = time, y = pred, col=model)) +
geom_line(data = CATS.cont[5], aes(x = 981:1000, y = V5), size=1.1, col="gray45", linetype = "dashed") +
geom_line(size=1) +
geom_point(size=2) +
labs(x ="Observation", y = "", col="Model") +
scale_colour_manual(values=colors[c(2,4:5)])+
theme_bw()