%%%%%%%%%%%%%%%%%%%%%% % Bootstrap % %%%%%%%%%%%%%%%%%%%%%% close all clear all clc %%% Swithces doresampleresiduals = 0; doresamplepairedXY = 1; if (doresampleresiduals) %% %%%%% Generating Population Data over OLS assumtions, e~Uiid(0,1) %%%%% N = 1000; % Population size x1 = ones(N, 1); x2 = linspace(-10,10,N)'; % Poblational Explanatory variable 1 e = rand(N, 1); %Errors Uniformly distributed beta2 = 0.04; % True population parameter histogram(e); y = x1 + beta2*x2 + e; % Poblational dependent variable, True DGP X = [x1 x2]; p_OLS = (X'*X)\(X'*y); M = [y x1 x2]; %%%% Sample and OLS over sample %%%% n = 30; % Small sample to avoid TLC k = 2; sample_index = randi(N, n, 1); % Sample index S = M(sample_index,:); % Take the sample s_x2 = S(:,3); % Sample of x2 s_y = S(:,1); % Sample of y s_X = [S(:,2) s_x2]; % Sample matrix X s_OLS = (s_X'*s_X)\(s_X'*s_y); % OLS for the sample s_eh = s_y-s_X*s_OLS; % Sample errors estimated (e-hat) s_S2 = (s_eh'*s_eh)/(n-k); % Sample sigma2 estimated s_var_OLS = diag(s_S2./(s_X'*s_X)); % Sample var(b)-hat s_t = s_OLS./(s_var_OLS.^0.5); % t-stat over Ho: beta_k = 0, assuming e~N(0,sigma2) s_pv = tcdf(s_t, n-k, 'upper'); alpha = 0.05; s_qr_l = tinv(1-(alpha/2), n-k); s_qr_u = tinv(alpha/2, n-k); s_CI_l = s_OLS(2)-(s_var_OLS(2)^0.5)*abs(s_qr_l); % Lower boundarie of the confidence interval s_CI_u = s_OLS(2)+(s_var_OLS(2)^0.5)*abs(s_qr_u); % Upper boundarie of the confidence interval disp(['Est of b2 for OLS = ' num2str(s_OLS(2))... ' Confidence interval for t-student = ' '(' num2str(s_CI_l) ', ' num2str(s_CI_u) ')']) %%% Bootstrap errors resampling %%% B = 999; % Size of bootstrap I = n; % Size of the bootstrap sample, resample b_OLS = zeros(B, k); for b = 1:B rs_eh = zeros(n,1); for i = 1:I rs_eh(i) = s_eh(randi(n)); %take a random observation for the vector of e-hat and save it end b_y = s_X*s_OLS + rs_eh; b_OLS(b,:) = (s_X'*s_X)\(s_X'*b_y); end mean(b_OLS(:,2)); var(b_OLS(:,2)); b_qr_l = quantile(b_OLS(:,2), (1-(alpha/2))); b_qr_u = quantile(b_OLS(:,2), alpha/2); b_CI_l = s_OLS(2)-(s_var_OLS(2)^0.5)*abs(b_qr_l); % Lower boundarie of the confidence interval b_CI_u = s_OLS(2)+(s_var_OLS(2)^0.5)*abs(b_qr_u); % Upper boundarie of the confidence interval disp(['Est of b2 for OLS = ' num2str(s_OLS(2))... ' Confidence interval for bootstrap = ' '(' num2str(b_CI_l) ', ' num2str(b_CI_u) ')']) histogram(b_OLS(:,2)) end % if (doresampleresiduals) if (doresamplepairedXY) %% clc close all clear all %%%%% Generating Population Data with heteroscedasticity %%%%% N = 1000; % Population size x1 = ones(N, 1); x2 = linspace(0,10,N)'; % Poblational Explanatory variable 1 e = randn(N,1); %Errors Normal distributed with heteroscedasticity beta2 = 2; % True population parameter histogram(e); y = x1 + beta2*x2 + e.*x2.^2; % Poblational dependent variable, True DGP X = [x1 x2]; p_OLS = (X'*X)\(X'*y); M = [y x1 x2]; %%%% Sample and OLS over sample %%%% n = 20; % Small sample to avoid TLC k = 2; sample_index = randi(N, n, 1); % Sample index S = M(sample_index,:); % Take the sample s_x2 = S(:,3); % Sample of x2 s_y = S(:,1); % Sample of y s_X = [S(:,2) s_x2]; % Sample matrix X scatter(s_y,s_x2) lsline s_OLS = (s_X'*s_X)\(s_X'*s_y); % OLS for the sample s_eh = s_y-s_X*s_OLS; % Sample errors estimated (e-hat) s_S2 = (s_eh'*s_eh)/(n-k); % Sample sigma2 estimated s_var_OLS = diag(s_S2./(s_X'*s_X)); % Sample var(b)-hat s_t = s_OLS./(s_var_OLS.^0.5); % t-stat over Ho: beta_k = 0, assuming e~N(0,sigma2) s_pv = tcdf(s_t, n-k, 'upper'); disp(['Est of b2 for sample = ' num2str(s_OLS(2))... ' SD of b2 for sample = ' num2str(s_var_OLS(2)^0.5)... ' t-stat of b2 for sample = ' num2str(s_t(2))... ' p-value for b2 assuming e~N(0,sigma2) = ' num2str(s_pv(2))]) %%% Bootstrap XY paired or case sample %%% B = 999; % Size of bootstrap I = n; % Size of the bootstrap sample, resample b_OLS = zeros(B, k); for b = 1:B rs_xy = zeros(n,3); for i = 1:I rs_xy(i,:) = S(randi(n),:); %take a random observation for the sample matrix S end b_X = rs_xy(:,2:3); b_y = rs_xy(:,1); b_OLS(b,:) = (b_X'*b_X)\(b_X'*b_y); end b_e_OLS = mean(b_OLS); %boostraped expectation(b) b_var_OLS = var(b_OLS); % bootstraped var(b)-hat b_t = b_e_OLS./(b_var_OLS.^0.5); % t-stat over Ho: beta_k = 0) b_pv = tcdf(b_t, n-k, 'upper'); disp(['Est of b2 for bootstrap = ' num2str(b_e_OLS(2))... ' SD of b2 for bootstrap = ' num2str(b_var_OLS(2)^0.5)... ' t-stat of b2 for bootstrap = ' num2str(b_t(2))... ' p-value for b2 boostraped = ' num2str(b_pv(2))]) histogram(b_OLS(:,2)) end