//Adv Econ Theory II //Hausman and Taylor Instrumental Variables Estimation for Panel Data //Alfredo Cisneros-Pineda //Remi Morin Chasse //April 16, 2013 cls; /* PANEL DATA ESTIMATION -- MEASURING THE RETURNS TO SCHOOL */ /* Load Data and Define Variables */ load path = /Users/rem/Dropbox/Enconometrics_II_presentation; load data[4166,12] = data.txt; //X1: Time-Variant Variables uncorrelated with ui wks = data[2:4166,2]; south = data[2:4166,5]; smsa = data[2:4166,6]; ms = data[2:4166,7]; //X2: Time-Variant Variables correlated with ui expe = data[2:4166,1]; expe2 = expe.^2; occ = data[2:4166,3]; ind = data[2:4166,4]; unio = data[2:4166,9]; //Z1: Time-Invariant Variables uncorrelated with ui fem = data[2:4166,8]; blk = data[2:4166,11]; //Z2: Time-Invariant Variables correlated with ui edu = data[2:4166,10]; //Dependent Variable lwage = data[2:4166,12]; T = 7; n = 595; x1=wks~south~smsa~ms; x2=expe~expe2~occ~ind~unio; z1=fem~blk; z2=edu; xmat = ones(n*T,1)~x1~x2~z1~z2; k = cols(xmat); /* ***************** */ /* Pooled Regression */ /* ***************** */ b = inv(xmat'*xmat)*(xmat'*lwage); residp = lwage - xmat*b; eep = residp'*residp; varb = (eep/(n*T-k))*inv(xmat'*xmat); tstat = b ./ diag(sqrt(varb)); print "Pooled OLS Estimates Std. Error tstats"; print b~diag(sqrt(varb))~tstat; print; /* *********************** */ /* Fixed-Effects Estimator */ /* *********************** */ /* LSDV Approach */ // Creating variable for individuals indiv=zeros(n*T,1); for jj(1,n*T,7); indiv[jj:n*T]=indiv[jj:n*T]+ones(n*T-jj+1,1); endfor; // Creating variable for time time1={1,2,3,4,5,6,7}; time2=ones(n,1) .*. time1; // Dummy Variables for Individuals D = zeros(n*T,n); for ii(1,n,1); D[.,ii] = (indiv .== ii); endfor; // Dummy variables for time (excluding t=7) DT = zeros(n*T,T); for ii(1,T,1); DT[.,ii] = (time2 .== ii); endfor; DT=DT[.,1:T-1]; // Time variant matrix of independent variables in Fixed effects xmat1 = x1~x2; k1 = cols(xmat1); // Mean creator matrix Meaner=D*inv(D'*D)*D'; // Idempotent matrix that creates Xit-Xbar MD=eye(n*T)-Meaner; xmat1MD=MD*xmat1; lwageMD=MD*lwage; xmat1meaner=Meaner*xmat1; lwagemeaner=Meaner*lwage; // Xt and Yt xmat1meant=zeros(T,k1); lwagemeant=zeros(T,1); for j(1,T,1); for i(j,n*T,7); lwagemeant[j,1]=lwagemeant[j,1]+lwage[i,1]; xmat1meant[j,1:k1]=xmat1meant[j,1:k1]+xmat1[i,1:k1]; endfor; endfor; xmat1meant=xmat1meant./n; lwagemeant=lwagemeant./n; // Xi and Yi xmat1meani=zeros(n,k1); lwagemeani=zeros(n,1); for j(0,n-1,1); for i((j*7)+1,(j+1)*7,1); lwagemeani[j+1,1]=lwagemeani[j+1,1]+lwage[i,1]; xmat1meani[j+1,1:k1]=xmat1meani[j+1,1:k1]+xmat1[i,1:k1]; endfor; endfor; xmat1meani=xmat1meani./T; lwagemeani=lwagemeani./T; // 1. Direct regression xmat2=x1~x2~D~DT; XX2=xmat2'*xmat2; invxx2=inv(xx2); // 2. Partial Partioned Regression xmat1mdones=(xmat1MD~ones(n*t,1)); // 3. Partioned regression with both fixed effects dummies: invidiuals and time xmat1MDD=xmat1-(xmat1meani.*.ones(T,1))-(ones(n,1).*.xmat1meant)+(ones(n*T,1).*.(meanc(xmat1)')); lwageMDD=lwage-(lwagemeani.*.ones(T,1))-(ones(n,1).*.xmat1meant)+(ones(n*T,1).*.(meanc(lwage)')); //Fixed Effects for individuals and time bfe=invxx2*xmat2'*lwage; bfe2 = inv(xmat1mdones'*xmat1mdones)*(xmat1mdones'*lwagemD); //bfe3 = inv(xmat1mDD'*xmat1mDD)*xmat1MDD'*lwagemdd; // By using 1, Direct Regression residfe = lwage - xmat2*bfe; eefe = residfe'*residfe; varbfe = (eefe/(n*T-n-T-k1))*invxx2; tfe = (bfe) ./ diag(sqrt(varbfe)); print " FE Estimates Std. Error tstats"; print bfe[1:9]~diag(sqrt(varbfe[1:9,1:9]))~tfe[1:9]; print; //Why is the estimator bad? //There is perfect correlation between all the individuals and time dummies and Experience xmat3=D~DT; b_expe=inv(xmat3'*xmat3)*xmat3'*expe; expe_hat=expe-xmat3*b_expe; print " Mean Variance"; print meanc(expe_hat)~sqrt((sumc(expe_hat-meanc(expe_hat)))^2/(n*t)); print; //Fixed effects with only individual dummies bfe4 = inv((xmat1MD)'*(xmat1MD))*((xmat1MD)'*lwagemD); ai=lwagemeani-xmat1meani*bfe4; residfe = lwage - xmat1*bfe4 - D*ai; eefe = residfe'*residfe; varbfe = (eefe/(n*T-n-k1))*inv((xmat1MD)'*(xmat1MD)); tfe = (bfe4) ./ diag(sqrt(varbfe)); print " FE Estimates Std. Error tstats"; print bfe4~diag(sqrt(varbfe))~tfe; print; //Calculating Sigma E resid_star = lwage - xmat1*bfe4; s2e = (resid_star'*resid_star)/(n*T-n-k1); //Instrumental Variables: We can use Z1 as it is uncorrelated with ui. //Z2, on the other hand, is correlated with ui. We need to build a variable correlated with Z2, //but uncorrelated with ui. We use as instrumental variables x1 and z1, both uncorrelated with ui. iv=x1~z1; //Note that x1 must have at least the same number of variables as z2. In this case, K1=4 > L2=1 biv=inv(iv'*iv)*iv'*z2; //We obtain predicted z2 from the instrumental variables z2bar=iv*biv; //We regress on z1 and z2bar iv2=z1~z2bar; biv2=inv(iv2'*iv2)*iv2'*resid_star; uiEitbar=resid_star-iv2*biv2; //We obtain the whole sigma*^2=Sima_u^2+(Sigma_^2)/T s2star = (uiEitbar'*uiEitbar)/(n*T-k-1); s2u = s2star - s2e/T; /* Random Effects*/ theta = 1 - (sqrt(s2e)/(sqrt(T*s2u + s2e))); Mdtheta = eye(n*T) - theta*Meaner; ystar = Mdtheta*lwage; xstar = Mdtheta*xmat; bre = inv(xstar'*xstar)*(xstar'*ystar); residre = ystar - xstar*bre; s2re = (residre'*residre)/(n*T-k-1); varbre = s2re*inv(xstar'*xstar); tre = bre ./ diag(sqrt(varbre)); print "RE Estimates tstats"; print bre~tre; print; //Instrumental Variables v = xmat1md~z1~xmat1meaner[.,1:4]; bht=inv(xstar'*v*inv(v'*v)*v'*xstar)*(xstar'*v*inv(v'*v)*v'*ystar); residht = ystar - xstar*bht; s2ht = (residht'*residht)/(n*T-k-1); varbht = s2ht*inv(xstar'*v*inv(v'*v)*v'*xstar); tht = bht ./ diag(sqrt(varbht)); print "IV Estimates tstats"; print bht~tht; print;