################################################# # ARIMA Models: U.S. COVID-19 CASES AND DEATHS # ################################################# remove(list=ls()) # Load packages library(readxl) library(fpp3) library(tsibble) library(tsibbledata) library(tidyverse) library(fable) library(dplyr) library(lubridate) library(ggplot2) library(cowplot) library(ggpubr) library(feasts) # Read data, we need sheet 2 and use the col names of the file, col_names = TRUE setwd("C:/Documents/Classes/Econ4115/Code") data <- read_excel(na.omit("covid-data.xlsx"), sheet = 1, col_names = TRUE) head(data) # To read in data from a different country, just type in the appropriate iso_code. data1 <- data %>% filter(iso_code == "USA", date > "2020-01-31") %>% mutate(Daily = as_date(date)) %>% select(-date) %>% as_tsibble(key = location, index = Daily) ############# # TRIM DATA # ############# data1 <- data1 %>% slice(30:931) ############################## # COVID-19: DAILY U.S. CASES # ############################## # Regular time series plot of daily U.S. COVID-19 cases data1 %>% autoplot(new_cases) + ggtitle("U.S.: Daily COVID-19 Cases") + ylab("Cases") + xlab("Day") # Partial plots of regular and seasonal differenced data data1 %>% gg_tsdisplay(difference(new_cases,7), plot_type='partial') data1 %>% gg_tsdisplay(new_cases %>% difference(7) %>% difference(),plot_type='partial') # Estimating a seasonal ARIMA model and residual diagnostics fit <- data1 %>% model(arima = ARIMA(new_cases ~ pdq(1,1,0) + PDQ(0,1,1))) fit %>% gg_tsresiduals() # Forecasts and plot fit %>% forecast(h=14) %>% autoplot(slice(data1,(n()-90):n())) # Automated ARIMA model selection data1 %>% model(ARIMA(new_cases)) # Box-Ljung test for white-noise residuals # Degrees of freedom (dof) is the number of ARIMA parameters estimated augment(fit) %>% features(.resid, ljung_box, lag = 14, dof = 2) # Forecasts using automated ARIMA model data1 %>% model(ARIMA(new_cases ~ pdq(2,0,3) + PDQ(0,1,1))) %>% forecast() %>% autoplot(slice(data1,(n()-30):n())) + ylab("U.S. Daily COVID Cases") + xlab("Day") ############################### # COVID-19: DAILY U.S. DEATHS # ############################### # Regular time series plot of daily U.S. COVID-19 deaths data1 %>% autoplot(new_deaths) + ggtitle("U.S.: Daily COVID-19 Deaths") + ylab("Deaths") + xlab("Day") # Partial plots of regular and seasonal differenced data data1 %>% gg_tsdisplay(difference(new_deaths,7), plot_type='partial') data1 %>% gg_tsdisplay(new_deaths %>% difference(7) %>% difference(),plot_type='partial') # Estimating a seasonal ARIMA model and residual diagnostics fit2 <- data1 %>% model(arima = ARIMA(new_deaths ~ pdq(2,0,0) + PDQ(0,1,1))) fit2 %>% gg_tsresiduals() # Forecasts and plot fit2 %>% forecast(h=14) %>% autoplot(slice(data1,(n()-90):n())) # Automated ARIMA model selection data1 %>% model(ARIMA(new_deaths)) # Box-Ljung test for white-noise residuals # Degrees of freedom (dof) is the number of ARIMA parameters estimated augment(fit2) %>% features(.resid, ljung_box, lag = 14, dof = 3) # Forecast using automated ARIMA model data1 %>% model(ARIMA(new_deaths ~ pdq(1,0,2) + PDQ(0,1,2))) %>% forecast() %>% autoplot(slice(data1,(n()-90):n())) + ylab("U.S. Daily COVID Deaths") + xlab("Day") ######################################################### # ARIMA vs. ETS: FORECASTING U.S. DAILY COVID-19 CASES # ######################################################### # Time series cross validation comparison of ARIMA vs. ETS # (Takes too long so commenting out) # data1 %>% slice(-n()) %>% stretch_tsibble(.init = 10) %>% # model(ETS(new_cases),ARIMA(new_cases)) %>% # forecast(h = 1) %>% accuracy(data1) # Use data up to July 15 as the training set train <- data1 %>% filter(Daily <= "2022-07-15") # Select and estimate the ARIMA model fit_arima <- train %>% model(ARIMA(new_cases)) report(fit_arima) # ARIMA residual diagnostics gg_tsresiduals(fit_arima, lag_max = 14) augment(fit_arima) %>% features(.resid, ljung_box, lag = 14, dof = 6) # Select and estimate the ETS model fit_ets <- train %>% model(ETS(new_cases)) report(fit_ets) # ETS residual diagnostics fit_ets %>% gg_tsresiduals(lag_max = 14) augment(fit_ets) %>% features(.resid, ljung_box, lag = 14, dof = 6) # Generate forecasts and compare accuracy over the test set bind_rows( fit_arima %>% accuracy(), fit_ets %>% accuracy(), fit_arima %>% forecast(h = "30 days") %>% accuracy(data1), fit_ets %>% forecast(h = "30 days") %>% accuracy(data1)) # Generate forecasts from winning model (ARIMA, in this case) data1 %>% model(ARIMA(new_cases)) %>% forecast(h="14 days") %>% autoplot(slice(data1,(n()-90):n()))