#################################################### # Earnings Regression with Multicollinearity Tests # #################################################### # clean all variables rm(list=ls()) # options and libraries library(readxl) library(car) # read data setwd("c:/Documents/Classes/Econ4230/R_and_Stata/") data = read_excel("Data/wage.xlsx") # check for missing values colSums(is.na(data)) # scatterplot matrix pairs(data) #correlation matrix cor(data) # correlation matrix on rhs variables only agesq = data$age^2 rhs = data.frame(agesq, age = data$age, exp = data$exp, grade = data$grade, union = data$union) cor(rhs) # original regression regOrig = lm(wage ~ age + agesq + grade + union, data) # calculate VIFs using CAR package vif(regOrig) # calculate VIFs by hand # auxillary regressions regAge = lm(age ~ agesq + grade + union, data) regGrade = lm(grade ~ age + agesq + union, data) regUnion = lm(union ~ grade + age + agesq, data) # calculate VIFs VIFage = 1/(1-summary(regAge)$r.squared) VIFgrade = 1/(1-summary(regGrade)$r.squared) VIFunion = 1/(1-summary(regUnion)$r.squared)