- Dear jury,
We are students from Faculty of Economics and Social Sciences, University of Plovdiv Paisii Hilendarski. As we are first year students and have not studied any programming languages we tried to forecasting the value of cryptocurrency with R. Unfortunately, while working with file price_data1442.csv None of the functions we know, gave satisfying results We are looking forward to receive your comment about the solution we have proposed. Thank you in advance
PU FESS Datathon case solution
#I. Holt-Winters exponential smoothing forecasting method
#Set working directory. In advance please copy the input file price_data1442.csv in C:\RData directory.
setwd(“C:/RData”)
#Read data
df <- read.csv(“price_data1442.csv”)
#Data structure
str(df)
#Select only one column: price (#4 in the input file price_data1442.csv)
#
mydata <- df[, c(4)]
str(mydata)
mydataforecasts <- HoltWinters(mydata, beta=FALSE, gamma=FALSE)
mydataforecasts
#
#Result:
#Holt-Winters exponential smoothing without trend and without seasonal component.
#
#Call:
#HoltWinters(x = mydata, beta = FALSE, gamma = FALSE)
#Smoothing parameters:
# alpha: 0.999935
# beta : FALSE
# gamma: FALSE
#
#Coefficients:
# [,1]
#a 8507.579
#
# => HoltWinters – Not suitable for forecasting this cryptocurrency.
#
Note: With this method we forecast the price value for every obesrvation made.
#
#II.
#To use Holt’s exponential smoothing to fit a predictive model for currency values, we type:
mydataforecasts <- HoltWinters(mydata, gamma=FALSE)
mydataforecasts
#
#Holt-Winters exponential smoothing with trend and without seasonal component.
#
#Call:
#HoltWinters(x = mydata, gamma = FALSE)
#
#Smoothing parameters:
#alpha: 1
# beta : 0.0002208343
# gamma: FALSE
#
#Coefficients:
# [,1]
#a 8507.5800000
#b -0.2355879
mydataforecasts$SSE
#[1] 32634137
# Again the mistake is very big => this type of Holt-Winters method is not suitable for forecasting prices of cryptocurrency.
#III
https://www.youtube.com/watch?v=N_XKJqr-VT4&ab_channel=MichaelGrogan
install.packages(“ggplot2”)
library(ggplot2)
setwd(“C:/RData”)
df <- read.csv(“price_data1442.csv”)
str(df)
#Select only two columns for our data frame: time (column #2) and price (column #4)
mydata <- df[, c(2,4)]
str(mydata)
#
#Trend, seasonality and error extraction
tsprice <- mydata[ ,2] #ts data
decomposed <- decompose(tsprice, type=”mult”)
#Error in decompose(tsprice, type = “mult”): time series has no or less than 2 periods
decomposed <- decompose(tsprice, type=”additive”)
#Error in decompose(tsprice, type = “additive”): time series has no or less than 2 periods
#plot(decomposed)
#stlRes <-stl(tsprice, s.window=”periodic”)
# Decomposition is impossible.
#Correlation tests
cor.test(mydata$price, as.numeric(mydata$time), method=c(“pearson”, “kendall”, “spearman”))
# cor = -0.3958351
#The correlation is missing.
#De-trend the crypto time series
trModel <- lm(mydata$price ~ mydata$time)
plot(resid(trModel), type=”l”) # resid(trModel) contains the de-trended series.
trModel
#Result:
#Call:
#lm(formula = mydata$price ~ mydata$time)
#Coefficients:
#(Intercept) mydata$time
# 10942.3519 -0.1233
#
#Auto ARIMA forecasting
install.packages(“MASS”)
install.packages(“tseries”)
install.packages(“forecast”)
library(“MASS”)
library(“tseries”)
library(“forecast”)
#Just test without conversion
currencyprice <- mydata$price[2:13758]
currencyprice
# ACF, PACF, Dickey-Fuller Test
acf(currencyprice, lag.max=20)
pacf(currencyprice, lag.max=20)
# The stationarity is missing.
#Convert to log format and plot
lncurrencyprice=log(mydata$price[2:13758])
lncurrencyprice
# ACF, PACF, Dickey-Fuller Test
acf(lncurrencyprice, lag.max=20)
pacf(lncurrencyprice, lag.max=20)
# The stationarity is missing.
#Log difference
diffcurrencyprice =diff(lncurrencyprice$price[2:13758], 1)
diffcurrencyprice
acf(currencyprice, lag.max=20)
pacf(currencyprice, lag.max=20)
# The stationarity is missing again.
#Difference
diffcurrencyprice =diff(mydata$price[2:13758], 1)
diffcurrencyprice
acf(currencyprice, lag.max=20)
pacf(currencyprice, lag.max=20)
# The stationarity is missing again.
adf.test(mydata$price[2:13758]) # p-value = 0.04
adf.test(diffcurrencyprice) # p-value = 0.01, where p-value < 0.05 indicates the ts is stationary
plot(diffcurrencyprice, type=’l’)
#Time series, ACF, PACF, Dickey-Fuller Test
pricearima <- diffcurrencyprice$price[2:13758]
pricearima<- ts(pricearima)
acf(pricearima, lag.max=20)
pacf(pricearima, lag.max=20)
# => the stationary model is not appropriate to our data
#Auto ARIMA forecast in case of stationarity
fitcurrency <- auto.arima(pricearima)
fitcurrency
forecastedvalues=forecast(fitcurrency, h=1440)
forecastedvalues
plot(forecastedvalues)
forecastedvaluesextracted=as.numeric(forecastedvalues$mean)
forecastedvaluesextracted
#Percentage error
df <- data.frame(mydata$price[13759:15199, forecastedvaluesextracted)
col_headings <- c(“Actual price”, “Forecasted price”)
names(df) <- col_headings
attach(df)
percentage_error =((df$’Actual price’ – df$’Forecasted price’)/ (df$’Actual price’))
percentage_error
mean(percentage_error)
#Ljung-Box
Box.test(fitcurrency$resid, lag=5, type=”Ljung-Box”)
Box.test(fitcurrency$resid, lag=10, type=”Ljung-Box”)
Box.test(fitcurrency$resid, lag=15, type=”Ljung-Box”)