# 4230 Spring 2018 # some packages we may use: # install.packages("quantmod") require(quantmod) # install.packages("forecast") require(forecast) # install.packages("fBasics") require(fBasics) # install.packages("sandwich") require(sandwich) # install.packages("lmtest") require(lmtest) # install.packages("nlme") # require(nlme) # install.packages("car") require(car) # install.packages("vars") require(vars) # ts() can transform data frames, vectors, and arrays into ts object # often a time series function will automatically make a ts object getSymbols("SPY",from="2000-01-03") # default pulls from yahoo, could do src="google". default time span is 1/3/07 to present # SPY is the symbol for the S&P 500 Index dim(SPY) # see dimensions of dataset head(SPY) #see first six rows SPY.dsrtn = ts(100*dailyReturn(SPY,leading=FALSE,type='arithmetic')) SPY.dlrtn = 100*dailyReturn(SPY,leading=FALSE,type='log') SPY.msrtn = 100*monthlyReturn(SPY,leading=FALSE,type='arithmetic') SPY.mlrtn = 100*monthlyReturn(SPY,leading=FALSE,type='log') chartSeries(SPY,type=c("line"),theme="white") plot.ts(SPY.dsrtn) chartSeries(SPY.dlrtn,type=c("line"),theme="white",TA=NULL) # Individual stocks: SRE (Sempra Energy) getSymbols("SRE",from="2000-01-03") # default pulls from yahoo, could do src="google". default time span is 1/3/07 to present dim(SRE) # see dimensions of dataset head(SRE) #see first six rows chartSeries(SRE,type=c("line"),theme="white",TA=NULL) SRE.dsrtn = 100*dailyReturn(SRE,leading=FALSE,type='arithmetic') SRE.dlrtn = 100*dailyReturn(SRE,leading=FALSE,type='log') SRE.msrtn = 100*monthlyReturn(SRE,leading=FALSE,type='arithmetic') SRE.mlrtn = 100*monthlyReturn(SRE,leading=FALSE,type='log') chartSeries(SRE.dlrtn,type=c("line"),theme="white",TA=NULL) chartSeries(SRE.mlrtn,type=c("line"),theme="white",TA=NULL) # WTI getSymbols("MCOILWTICO",src="FRED") # Oil & gas drilling index getSymbols("IPN213111N",src="FRED") # Henry Hub getSymbols("MHHNGSP",src="FRED") # Brent getSymbols("MCOILBRENTEU",src="FRED") # LNG Asia getSymbols("PNGASJPUSDM",src="FRED") # Additional commodities to illustrate spurious regression # Global Coffee getSymbols("PCOFFOTMUSDM",src="FRED") # Global Fish getSymbols("PSALMUSDM",src="FRED") # transform them into ts() objects with same time frame boil=ts(MCOILBRENTEU$MCOILBRENTEU,freq=12,start=1987+(4/12)) boil=ts(boil[c(57:362)],freq=12,start=1992) lng=ts(PNGASJPUSDM$PNGASJPUSDM,freq=12,start=1992) coff = ts(PCOFFOTMUSDM$PCOFFOTMUSDM,freq=12,start=1980) coff = ts(coff[c(145:450)],freq=12,start=1992) slm = ts(PSALMUSDM$PSALMUSDM,freq=12,start=1980) slm = ts(slm[c(145:450)],freq=12,start=1992) # consider the global coffee price vs brent oil # bind them and plot them to see what's going on tsoilcoff = cbind(boil,coff) ts.plot(tsoilcoff) # both are probably nonstationary (random walks) reg_cb = lm(coff ~ boil) # very high t-stat, pretty good R-squared summary(reg_cb) # but residuals look non-stationary ts.plot(residuals(reg_cb)) # need to regress changes on changes reg2_cb = lm(diff(coff) ~ diff(boil)) summary(reg2_cb) # fairly low t-stat, R-squared almost zero plot(diff(boil),diff(coff)) # coffee-brent oil relationship was spurious in levels # fish price index vs brent oil ts.plot(slm) ar(na.omit(diff(slm))) # plot the two series in levels, some correlation appears plot(boil,slm) reg3 = lm(slm ~ boil) summary(reg3) # high t-stats and R-squared ts.plot(residuals(reg3)) # two different ways to plot the time series tsfish = cbind(boil,slm) ts.plot(tsfish) require(MTS) MTSplot(tsfish) # need to look in changes reg3d = lm(diff(slm) ~ diff(boil)) summary(reg3d) # a true relationship still exists - oil is a major input to fishing plot(diff(boil),diff(slm)) # residuals still have autocorrelation ts.plot(residuals(reg3d)) acf(residuals(reg3d)) tsdisplay(residuals(reg3d)) # Try just using HAC standard errors summary(reg3d) vcovHAC(reg3d) coeftest(reg3d,vcov=vcovHAC(reg3d)) # test efficient markets hypothesis for salmon and brent oil # lm() may not understand the lag() function or other ts() options # could do more awkward thing, ensure conformable dimensions boil.rtn = lag(diff(boil)) lag.dboil = boil.rtn[-NROW(boil.rtn)] boil.rtn = boil.rtn[-1] # show what a lag looks like - show the data tsboil.rtn = cbind(boil.rtn,lag.dboil) summary(lm(boil.rtn ~ lag.dboil)) # ARIMA modeling # basic: ar1 = arima(boil.rtn,order=c(1,0,0)) summary(ar1) ma1 = arima(boil.rtn,order=c(0,0,1)) summary(ma1) # for AR only, and letting package pick a reasonable order # fits AR(1) ar(boil.rtn,order.max = 5) auto.arima(boil.rtn) # not too bad residuals: tsdisplay(residuals(ar1)) # in order to use "forecast" we have to use the model we fit # and use that model to forecast (not forecast on raw data) fcast.boilrt = forecast(ar1,h=100) plot(fcast.boilrt) # zoom in by including only the first 100 observations of # the original series: plot(fcast.boilrt,include=100) # Important: the long run forecast is just the mean, mu # forecast with a holdout sample to see how well we did fit_no_holds <- arima(boil.rtn[-c(250:304)],order=c(1,0,0)) fcast_no_holds <- forecast(fit_no_holds,h=104) plot(fcast_no_holds,main=" ",include=100) lines(ts(boil.rtn))