Skip to content

Commit 9054052

Browse files
authored
Implement (non-)robust residual prediction test (#127)
* Implement (non-)robust residual prediction test * Use robust in test. * Fix test
1 parent ece43da commit 9054052

File tree

2 files changed

+31
-12
lines changed

2 files changed

+31
-12
lines changed

ivmodels/tests/residual_prediction.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ def residual_prediction_test(
1111
X,
1212
y,
1313
C=None,
14+
robust=False,
1415
nonlinear_model=None,
1516
fit_intercept=True,
1617
train_fraction=None,
@@ -36,6 +37,8 @@ def residual_prediction_test(
3637
Outcomes.
3738
C: np.ndarray of dimension (n, mc) or None, optional, default = None
3839
Included exogenous regressors.
40+
robust: bool or string, optional, default = False
41+
Whether to use heteroskedasticity-robust standard errors.
3942
nonlinear_model: object, optional, default = None
4043
Object with a ``fit`` and ``predict`` method. If ``None``, uses an
4144
``sklearn.ensemble.RandomForestRegressor()``.
@@ -136,15 +139,22 @@ def residual_prediction_test(
136139

137140
XCb_proj = np.hstack([proj(np.hstack([Zb, Cb]), Xb), Cb])
138141
XCb = np.hstack([Xb, Cb])
139-
# pinv(X) = (X^T @ X)^(-1) @ X^T
140-
sigma_sq_hat = (
141-
np.mean((wb - np.linalg.pinv(XCb_proj).T @ XCb.T @ wb) ** 2 * residuals_b**2)
142-
- np.mean(wb * residuals_b) ** 2
143-
)
142+
143+
if robust:
144+
# pinv(X) = (X^T @ X)^(-1) @ X^T
145+
sigma_sq_hat = (
146+
np.mean(
147+
(wb - np.linalg.pinv(XCb_proj).T @ XCb.T @ wb) ** 2 * residuals_b**2
148+
)
149+
- np.mean(wb * residuals_b) ** 2
150+
)
151+
else:
152+
sigma_sq_hat = np.mean((wb - np.linalg.pinv(XCb_proj).T @ XCb.T @ wb) ** 2)
153+
sigma_sq_hat *= np.mean(residuals_b**2)
144154

145155
if sigma_sq_hat < gamma: # Pre-test for variance
146156
return -np.inf, 1
147157

148-
stat = wb.T @ residuals_b / np.sqrt(sigma_sq_hat) / np.sqrt(n)
158+
stat = wb.T @ residuals_b / np.sqrt(sigma_sq_hat) / np.sqrt(Xb.shape[0])
149159
p_value = 1 - scipy.stats.norm.cdf(stat)
150160
return stat, p_value

tests/tests/test_residual_prediction.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import numpy as np
22
import pytest
3+
import scipy.stats
34
from sklearn.ensemble import RandomForestRegressor
45

56
from ivmodels.tests import residual_prediction_test
67

78

9+
@pytest.mark.parametrize("robust", [False, True])
810
@pytest.mark.parametrize(
911
"n, k, mx, mc, fit_intercept",
10-
[(200, 3, 3, 1, True), (200, 3, 1, 1, False), (200, 15, 5, 5, False)],
12+
[(500, 3, 3, 1, True), (500, 3, 1, 1, False), (500, 15, 5, 5, True)],
1113
)
12-
def test_residual_prediction_test(n, k, mx, mc, fit_intercept):
14+
def test_residual_prediction_test(n, k, mx, mc, fit_intercept, robust):
1315
rng = np.random.default_rng(0)
1416

1517
Pi = rng.normal(size=(k, mx))
@@ -19,7 +21,7 @@ def test_residual_prediction_test(n, k, mx, mc, fit_intercept):
1921
Pi_CX = rng.normal(size=(mc, mx))
2022
Pi_Cy = rng.normal(size=(mc, 1))
2123

22-
n_seeds = 50
24+
n_seeds = 20
2325
statistics = np.zeros(n_seeds)
2426
p_values = np.zeros(n_seeds)
2527

@@ -29,20 +31,27 @@ def test_residual_prediction_test(n, k, mx, mc, fit_intercept):
2931
U = rng.normal(size=(n, mx + 1))
3032
X = Z @ Pi + U @ gamma + C @ Pi_CX + rng.normal(size=(n, mx))
3133
X[:, 0] += Z[:, 0] ** 2 # allow for nonlinearity Z -> X
32-
y = X @ beta + U[:, 0:1] + U[:, 0:1] ** 3 + C @ Pi_Cy + rng.normal(size=(n, 1))
34+
noise = rng.normal(size=(n, 1))
35+
if robust:
36+
noise *= Z[:, 0:1] ** 2
37+
y = X @ beta + U[:, 0:1] + np.sin(U[:, 0:1]) + C @ Pi_Cy + noise
3338

3439
statistics[idx], p_values[idx] = residual_prediction_test(
3540
Z=Z,
3641
X=X,
3742
y=y,
3843
C=C,
44+
robust=robust,
3945
nonlinear_model=RandomForestRegressor(n_estimators=20, random_state=0),
4046
fit_intercept=fit_intercept,
41-
train_fraction=0.6,
47+
train_fraction=0.4,
4248
seed=0,
4349
)
4450

45-
assert np.mean(p_values < 0.1) < 0.05
51+
assert (
52+
scipy.stats.kstest(p_values, scipy.stats.uniform(loc=0.0, scale=1.0).cdf).pvalue
53+
> 0.05
54+
)
4655

4756

4857
def test_residual_prediction_test_rejects():

0 commit comments

Comments
 (0)