This example shows a regression module that calculates statistics that are associated with a linear regression.
/* Regression Routine */
/* Given X and Y, this fits Y = X B + E */
/* by least squares. */
proc iml;
start reg;
n = nrow(x); /* number of observations */
k = ncol(x); /* number of variables */
xpx = x`*x; /* crossproducts */
xpy = x`*y;
xpxi = inv(xpx); /* inverse crossproducts */
b = xpxi*xpy; /* parameter estimates */
yhat = x*b; /* predicted values */
resid = y-yhat; /* residuals */
sse = resid`*resid; /* sum of squared errors */
dfe = n-k; /* degrees of freedom error */
mse = sse/dfe; /* mean squared error */
rmse = sqrt(mse); /* root mean squared error */
covb = xpxi#mse; /* covariance of estimates */
stdb = sqrt(vecdiag(covb)); /* standard errors */
t = b/stdb; /* ttest for estimates=0 */
probt = 1-probf(t#t,1,dfe); /* significance probability */
print name b stdb t probt;
s = diag(1/stdb);
corrb = s*covb*s; /* correlation of estimates */
print ,"Covariance of Estimates", covb[r=name c=name] ,
"Correlation of Estimates",corrb[r=name c=name] ;
if nrow(tval)=0 then return; /* is a t value specified? */
projx = x*xpxi*x`; /* hat matrix */
vresid = (i(n)-projx)*mse; /* covariance of residuals */
vpred = projx#mse; /* covariance of predicted values */
h = vecdiag(projx); /* hat leverage values */
lowerm = yhat-tval#sqrt(h*mse); /* low. conf lim for mean */
upperm = yhat+tval#sqrt(h*mse); /* upper lim. for mean */
lower = yhat-tval#sqrt(h*mse+mse); /* lower lim. for indiv*/
upper = yhat+tval#sqrt(h*mse+mse);/* upper lim. for indiv */
print ,,"Predicted Values, Residuals, and Limits" ,,
y yhat resid h lowerm upperm lower upper;
finish reg;
/* Routine to test a linear combination of the estimates */
/* given L, this routine tests hypothesis that LB = 0. */
start test;
dfn=nrow(L);
Lb=L*b;
vLb=L*xpxi*L`;
q=Lb`*inv(vLb)*Lb /dfn;
f=q/mse;
prob=1-probf(f,dfn,dfe);
print ,f dfn dfe prob;
finish test;
/* Run it on population of U.S. for decades beginning 1790 */
x= { 1 1 1,
1 2 4,
1 3 9,
1 4 16,
1 5 25,
1 6 36,
1 7 49,
1 8 64 };
y= {3.929,5.308,7.239,9.638,12.866,17.069,23.191,31.443};
name={"Intercept", "Decade", "Decade**2" };
tval=2.57; /* for 5 df at 0.025 level to get 95% conf. int. */
reset fw=7;
run reg;
do;
print ,"TEST Coef for Linear";
L={0 1 0 };
run test;
print ,"TEST Coef for Linear,Quad";
L={0 1 0,0 0 1};
run test;
print ,"TEST Linear+Quad = 0";
L={0 1 1 };
run test;
end;
The results are shown in Output 9.3.1.
Output 9.3.1: Regression Results
Intercept |
5.06934 |
0.96559 |
5.24997 |
0.00333 |
Decade |
-1.1099 |
0.4923 |
-2.2546 |
0.07385 |
Decade**2 |
0.53964 |
0.0534 |
10.106 |
0.00016 |
0.93237 |
-0.4362 |
0.04277 |
-0.4362 |
0.24236 |
-0.0257 |
0.04277 |
-0.0257 |
0.00285 |
1 |
-0.9177 |
0.8295 |
-0.9177 |
1 |
-0.9762 |
0.8295 |
-0.9762 |
1 |
Predicted Values, Residuals, and Limits |
3.929 |
4.49904 |
-0.57 |
0.70833 |
3.00202 |
5.99606 |
2.17419 |
6.82389 |
5.308 |
5.00802 |
0.29998 |
0.27976 |
4.06721 |
5.94883 |
2.99581 |
7.02023 |
7.239 |
6.59627 |
0.64273 |
0.23214 |
5.73926 |
7.45328 |
4.62185 |
8.57069 |
9.638 |
9.26379 |
0.37421 |
0.27976 |
8.32298 |
10.2046 |
7.25158 |
11.276 |
12.866 |
13.0106 |
-0.1446 |
0.27976 |
12.0698 |
13.9514 |
10.9984 |
15.0228 |
17.069 |
17.8367 |
-0.7677 |
0.23214 |
16.9797 |
18.6937 |
15.8622 |
19.8111 |
23.191 |
23.742 |
-0.551 |
0.27976 |
22.8012 |
24.6828 |
21.7298 |
25.7542 |
31.443 |
30.7266 |
0.71638 |
0.70833 |
29.2296 |
32.2236 |
28.4018 |
33.0515 |
TEST Coef for Linear,Quad |