Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doubleml/double_ml.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
self._fit_sensitivity_elements(nuisance_predictions)

# aggregated parameter estimates and standard errors from repeated cross-fitting
self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se, self._var_scaling_factors)
self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se)

# validate sensitivity elements (e.g., re-estimate nu2 if negative)
self._validate_sensitivity_elements()
Expand Down Expand Up @@ -1392,7 +1392,7 @@ def _est_causal_pars_and_se(self):
self._all_se[self._i_treat, self._i_rep], self._var_scaling_factors[self._i_treat] = self._se_causal_pars()

# aggregated parameter estimates and standard errors from repeated cross-fitting
self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se, self._var_scaling_factors)
self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se)

# Score estimation and elements
@abstractmethod
Expand Down
8 changes: 4 additions & 4 deletions doubleml/double_ml_framework.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def __add__(self, other):
# compute standard errors (Uses factor 1/n for scaling!)
sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
all_ses = np.sqrt(sigma2_hat)
thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)

doubleml_dict = {
"thetas": thetas,
Expand Down Expand Up @@ -358,7 +358,7 @@ def __sub__(self, other):
# compute standard errors
sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
all_ses = np.sqrt(sigma2_hat)
thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)

doubleml_dict = {
"thetas": thetas,
Expand Down Expand Up @@ -507,8 +507,8 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
all_sigma_upper[i_theta, i_rep] = np.sqrt(sigma2_upper_hat)

# aggregate coefs and ses over n_rep
theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, self._var_scaling_factors)
theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, self._var_scaling_factors)
theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower)
theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper)

# per repetition confidence intervals
quant = norm.ppf(level)
Expand Down
10 changes: 6 additions & 4 deletions doubleml/plm/tests/_utils_plr_manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ def fit_plr_multitreat(
g_params=None,
use_other_treat_as_covariate=True,
):
n_obs = len(y)
n_d = d.shape[1]

thetas = list()
Expand Down Expand Up @@ -62,7 +61,9 @@ def fit_plr_multitreat(
theta_vec = np.array([xx[i_d] for xx in thetas])
se_vec = np.array([xx[i_d] for xx in ses])
theta[i_d] = np.median(theta_vec)
se[i_d] = np.sqrt(np.median(np.power(se_vec, 2) * n_obs + np.power(theta_vec - theta[i_d], 2)) / n_obs)
upper_bound_vec = theta_vec + 1.96 * se_vec
upper_bound = np.median(upper_bound_vec)
se[i_d] = (upper_bound - theta[i_d]) / 1.96

res = {
"theta": theta,
Expand All @@ -78,7 +79,6 @@ def fit_plr_multitreat(


def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score, n_rep=1, l_params=None, m_params=None, g_params=None):
n_obs = len(y)

thetas = np.zeros(n_rep)
ses = np.zeros(n_rep)
Expand All @@ -95,7 +95,9 @@ def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score, n_rep=1,
all_g_hat.append(g_hat)

theta = np.median(thetas)
se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
upper_bound_vec = thetas + 1.96 * ses
upper_bound = np.median(upper_bound_vec)
se = (upper_bound - theta) / 1.96

res = {
"theta": theta,
Expand Down
1 change: 0 additions & 1 deletion doubleml/tests/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,6 @@ def generate_dml_dict(psi_a, psi_b):
thetas, ses = _aggregate_coefs_and_ses(
all_coefs=all_thetas,
all_ses=all_ses,
var_scaling_factors=var_scaling_factors,
)
scaled_psi = psi_b / np.mean(psi_a, axis=0)

Expand Down
4 changes: 2 additions & 2 deletions doubleml/tests/_utils_doubleml_sensitivity_manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def doubleml_sensitivity_manual(sensitivity_elements, all_coefs, psi, psi_deriv,
all_sigma_lower = np.transpose(np.sqrt(np.divide(np.mean(np.square(psi_lower), axis=0), var_scaling_factor)))
all_sigma_upper = np.transpose(np.sqrt(np.divide(np.mean(np.square(psi_upper), axis=0), var_scaling_factor)))

theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, var_scaling_factor)
theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, var_scaling_factor)
theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower)
theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper)

quant = norm.ppf(level)

Expand Down
13 changes: 7 additions & 6 deletions doubleml/tests/test_framework_sensitivity.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pytest
from sklearn.linear_model import LinearRegression, LogisticRegression

Expand Down Expand Up @@ -121,19 +122,19 @@ def test_dml_framework_sensitivity_operations(dml_framework_sensitivity_fixture)
mul_zero_obj = dml_framework_sensitivity_fixture["dml_framework_obj_mul_zero_obj"]

for key in ["theta", "se", "ci"]:
assert add_obj.sensitivity_params[key]["upper"] == mul_obj.sensitivity_params[key]["upper"]
assert add_obj.sensitivity_params[key]["lower"] == mul_obj.sensitivity_params[key]["lower"]
assert np.allclose(add_obj.sensitivity_params[key]["upper"], mul_obj.sensitivity_params[key]["upper"])
assert np.allclose(add_obj.sensitivity_params[key]["lower"], mul_obj.sensitivity_params[key]["lower"])

assert mul_zero_obj.sensitivity_params[key]["upper"] == 0
assert mul_zero_obj.sensitivity_params[key]["lower"] == 0

assert add_obj.sensitivity_params["rv"] == mul_obj.sensitivity_params["rv"]
assert np.allclose(add_obj.sensitivity_params["rv"], mul_obj.sensitivity_params["rv"])
assert mul_zero_obj.sensitivity_params["rv"] > 0.99 # due to degenerated variance
assert mul_zero_obj.sensitivity_params["rva"] > 0.99 # due to degenerated variance

sub_obj = dml_framework_sensitivity_fixture["dml_framework_obj_sub_obj2"]
assert sub_obj.sensitivity_params["theta"]["upper"] == -1 * sub_obj.sensitivity_params["theta"]["lower"]
assert sub_obj.sensitivity_params["se"]["upper"] == sub_obj.sensitivity_params["se"]["lower"]
assert sub_obj.sensitivity_params["ci"]["upper"] == -1 * sub_obj.sensitivity_params["ci"]["lower"]
assert np.allclose(sub_obj.sensitivity_params["theta"]["upper"], -1 * sub_obj.sensitivity_params["theta"]["lower"])
assert np.allclose(sub_obj.sensitivity_params["se"]["upper"], sub_obj.sensitivity_params["se"]["lower"])
assert np.allclose(sub_obj.sensitivity_params["ci"]["upper"], -1 * sub_obj.sensitivity_params["ci"]["lower"])
assert sub_obj.sensitivity_params["rv"] < 0.01
assert sub_obj.sensitivity_params["rva"] < 0.01
8 changes: 4 additions & 4 deletions doubleml/tests/test_multiway_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
ses[i_rep] = se[0]

theta = np.median(thetas)
n_clusters1 = len(np.unique(obj_dml_cluster_data.cluster_vars[:, 0]))
n_clusters2 = len(np.unique(obj_dml_cluster_data.cluster_vars[:, 1]))
var_scaling_factor = min(n_clusters1, n_clusters2)
se = np.sqrt(np.median(np.power(ses, 2) * var_scaling_factor + np.power(thetas - theta, 2)) / var_scaling_factor)

all_upper_bounds = thetas + 1.96 * ses
upper_bound = np.median(all_upper_bounds, axis=0)
se = (upper_bound - theta) / 1.96

res_dict = {
"coef": dml_pliv_obj.coef,
Expand Down
19 changes: 8 additions & 11 deletions doubleml/utils/_estimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,20 +239,17 @@ def abs_ipw_score(theta):
return ipw_est


def _aggregate_coefs_and_ses(all_coefs, all_ses, var_scaling_factors):
if var_scaling_factors.shape == (all_coefs.shape[0],):
scaling_factors = np.repeat(var_scaling_factors[:, np.newaxis], all_coefs.shape[1], axis=1)
else:
scaling_factors = var_scaling_factors
def _aggregate_coefs_and_ses(all_coefs, all_ses):
# already expects equally scaled variances over all repetitions
# aggregation is done over dimension 1, such that the coefs and ses have to be of shape (n_coefs, n_rep)
coefs = np.median(all_coefs, 1)

coefs_deviations = np.square(all_coefs - coefs.reshape(-1, 1))
scaled_coef_deviations = np.divide(coefs_deviations, scaling_factors)
all_variances = np.square(all_ses) + scaled_coef_deviations

var = np.median(all_variances, 1)
ses = np.sqrt(var)
# construct the upper bounds & aggregate
critical_value = 1.96
all_upper_bounds = all_coefs + critical_value * all_ses
agg_upper_bounds = np.median(all_upper_bounds, axis=1)
# reverse to calculate the standard errors
ses = (agg_upper_bounds - coefs) / critical_value
return coefs, ses


Expand Down
24 changes: 13 additions & 11 deletions doubleml/utils/tests/test_var_est_and_aggregation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import numpy as np
import pytest
from scipy.stats import norm

from doubleml.utils._estimation import _aggregate_coefs_and_ses, _var_est

Expand All @@ -14,19 +15,24 @@ def n_coefs(request):
return request.param


@pytest.fixture(scope="module", params=[0.9, 0.95, 0.975])
def level(request):
return request.param


@pytest.fixture(scope="module")
def test_var_est_and_aggr_fixture(n_rep, n_coefs):
def test_var_est_and_aggr_fixture(n_rep, n_coefs, level):
np.random.seed(42)

all_thetas = np.full((n_coefs, n_rep), np.nan)
all_ses = np.full((n_coefs, n_rep), np.nan)
expected_all_var = np.full((n_coefs, n_rep), np.nan)
expected_all_upper_bounds = np.full((n_coefs, n_rep), np.nan)
all_var_scaling_factors = np.full((n_coefs, n_rep), np.nan)

for i_coef in range(n_coefs):
n_obs = np.random.randint(100, 200)
for i_rep in range(n_rep):
psi = np.random.normal(size=(n_obs))
psi = np.random.normal(size=(n_obs), loc=i_coef, scale=i_coef + 1)
psi_deriv = np.ones((n_obs))

all_thetas[i_coef, i_rep] = np.mean(psi)
Expand All @@ -37,27 +43,23 @@ def test_var_est_and_aggr_fixture(n_rep, n_coefs):
all_var_scaling_factors[i_coef, i_rep] = var_scaling_factor

expected_theta = np.median(all_thetas, axis=1)
critical_value = norm.ppf(level)
for i_coef in range(n_coefs):
for i_rep in range(n_rep):
theta_deviation = np.square(all_thetas[i_coef, i_rep] - expected_theta[i_coef])
expected_all_var[i_coef, i_rep] = np.square(all_ses[i_coef, i_rep]) + np.divide(
theta_deviation, all_var_scaling_factors[i_coef, i_rep]
)
expected_all_upper_bounds[i_coef, i_rep] = all_thetas[i_coef, i_rep] + critical_value * all_ses[i_coef, i_rep]

expected_se = np.sqrt(np.median(expected_all_var, axis=1))
expected_upper_bounds = np.median(expected_all_upper_bounds, axis=1)
expected_se = (expected_upper_bounds - expected_theta) / critical_value

# without n_rep
theta, se = _aggregate_coefs_and_ses(
all_coefs=all_thetas,
all_ses=all_ses,
var_scaling_factors=all_var_scaling_factors[:, 0],
)

# with n_rep
theta_2, se_2 = _aggregate_coefs_and_ses(
all_coefs=all_thetas,
all_ses=all_ses,
var_scaling_factors=all_var_scaling_factors,
)

result_dict = {
Expand Down