DoubleML · SvenKlaassen · May 22, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
@@ -537,7 +537,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                 self._fit_sensitivity_elements(nuisance_predictions)
 
         # aggregated parameter estimates and standard errors from repeated cross-fitting
-        self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se, self._var_scaling_factors)
+        self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se)
 
         # validate sensitivity elements (e.g., re-estimate nu2 if negative)
         self._validate_sensitivity_elements()
@@ -1392,7 +1392,7 @@ def _est_causal_pars_and_se(self):
                 self._all_se[self._i_treat, self._i_rep], self._var_scaling_factors[self._i_treat] = self._se_causal_pars()
 
         # aggregated parameter estimates and standard errors from repeated cross-fitting
-        self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se, self._var_scaling_factors)
+        self.coef, self.se = _aggregate_coefs_and_ses(self._all_coef, self._all_se)
 
     # Score estimation and elements
     @abstractmethod

diff --git a/doubleml/double_ml_framework.py b/doubleml/double_ml_framework.py
@@ -310,7 +310,7 @@ def __add__(self, other):
             # compute standard errors (Uses factor 1/n for scaling!)
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
-            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
+            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)
 
             doubleml_dict = {
                 "thetas": thetas,
@@ -358,7 +358,7 @@ def __sub__(self, other):
             # compute standard errors
             sigma2_hat = np.divide(np.mean(np.square(scaled_psi), axis=0), var_scaling_factors.reshape(-1, 1))
             all_ses = np.sqrt(sigma2_hat)
-            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses, var_scaling_factors)
+            thetas, ses = _aggregate_coefs_and_ses(all_thetas, all_ses)
 
             doubleml_dict = {
                 "thetas": thetas,
@@ -507,8 +507,8 @@ def _calc_sensitivity_analysis(self, cf_y, cf_d, rho, level):
                 all_sigma_upper[i_theta, i_rep] = np.sqrt(sigma2_upper_hat)
 
         # aggregate coefs and ses over n_rep
-        theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, self._var_scaling_factors)
-        theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, self._var_scaling_factors)
+        theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower)
+        theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper)
 
         # per repetition confidence intervals
         quant = norm.ppf(level)

diff --git a/doubleml/plm/tests/_utils_plr_manual.py b/doubleml/plm/tests/_utils_plr_manual.py
@@ -21,7 +21,6 @@ def fit_plr_multitreat(
     g_params=None,
     use_other_treat_as_covariate=True,
 ):
-    n_obs = len(y)
     n_d = d.shape[1]
 
     thetas = list()
@@ -62,7 +61,9 @@ def fit_plr_multitreat(
         theta_vec = np.array([xx[i_d] for xx in thetas])
         se_vec = np.array([xx[i_d] for xx in ses])
         theta[i_d] = np.median(theta_vec)
-        se[i_d] = np.sqrt(np.median(np.power(se_vec, 2) * n_obs + np.power(theta_vec - theta[i_d], 2)) / n_obs)
+        upper_bound_vec = theta_vec + 1.96 * se_vec
+        upper_bound = np.median(upper_bound_vec)
+        se[i_d] = (upper_bound - theta[i_d]) / 1.96
 
     res = {
         "theta": theta,
@@ -78,7 +79,6 @@ def fit_plr_multitreat(
 
 
 def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score, n_rep=1, l_params=None, m_params=None, g_params=None):
-    n_obs = len(y)
 
     thetas = np.zeros(n_rep)
     ses = np.zeros(n_rep)
@@ -95,7 +95,9 @@ def fit_plr(y, x, d, learner_l, learner_m, learner_g, all_smpls, score, n_rep=1,
         all_g_hat.append(g_hat)
 
     theta = np.median(thetas)
-    se = np.sqrt(np.median(np.power(ses, 2) * n_obs + np.power(thetas - theta, 2)) / n_obs)
+    upper_bound_vec = thetas + 1.96 * ses
+    upper_bound = np.median(upper_bound_vec)
+    se = (upper_bound - theta) / 1.96
 
     res = {
         "theta": theta,

diff --git a/doubleml/tests/_utils.py b/doubleml/tests/_utils.py
@@ -101,7 +101,6 @@ def generate_dml_dict(psi_a, psi_b):
     thetas, ses = _aggregate_coefs_and_ses(
         all_coefs=all_thetas,
         all_ses=all_ses,
-        var_scaling_factors=var_scaling_factors,
     )
     scaled_psi = psi_b / np.mean(psi_a, axis=0)
 

diff --git a/doubleml/tests/_utils_doubleml_sensitivity_manual.py b/doubleml/tests/_utils_doubleml_sensitivity_manual.py
@@ -31,8 +31,8 @@ def doubleml_sensitivity_manual(sensitivity_elements, all_coefs, psi, psi_deriv,
     all_sigma_lower = np.transpose(np.sqrt(np.divide(np.mean(np.square(psi_lower), axis=0), var_scaling_factor)))
     all_sigma_upper = np.transpose(np.sqrt(np.divide(np.mean(np.square(psi_upper), axis=0), var_scaling_factor)))
 
-    theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower, var_scaling_factor)
-    theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper, var_scaling_factor)
+    theta_lower, sigma_lower = _aggregate_coefs_and_ses(all_theta_lower, all_sigma_lower)
+    theta_upper, sigma_upper = _aggregate_coefs_and_ses(all_theta_upper, all_sigma_upper)
 
     quant = norm.ppf(level)
 

diff --git a/doubleml/tests/test_framework_sensitivity.py b/doubleml/tests/test_framework_sensitivity.py
@@ -1,3 +1,4 @@
+import numpy as np
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
@@ -121,19 +122,19 @@ def test_dml_framework_sensitivity_operations(dml_framework_sensitivity_fixture)
     mul_zero_obj = dml_framework_sensitivity_fixture["dml_framework_obj_mul_zero_obj"]
 
     for key in ["theta", "se", "ci"]:
-        assert add_obj.sensitivity_params[key]["upper"] == mul_obj.sensitivity_params[key]["upper"]
-        assert add_obj.sensitivity_params[key]["lower"] == mul_obj.sensitivity_params[key]["lower"]
+        assert np.allclose(add_obj.sensitivity_params[key]["upper"], mul_obj.sensitivity_params[key]["upper"])
+        assert np.allclose(add_obj.sensitivity_params[key]["lower"], mul_obj.sensitivity_params[key]["lower"])
 
         assert mul_zero_obj.sensitivity_params[key]["upper"] == 0
         assert mul_zero_obj.sensitivity_params[key]["lower"] == 0
 
-    assert add_obj.sensitivity_params["rv"] == mul_obj.sensitivity_params["rv"]
+    assert np.allclose(add_obj.sensitivity_params["rv"], mul_obj.sensitivity_params["rv"])
     assert mul_zero_obj.sensitivity_params["rv"] > 0.99  # due to degenerated variance
     assert mul_zero_obj.sensitivity_params["rva"] > 0.99  # due to degenerated variance
 
     sub_obj = dml_framework_sensitivity_fixture["dml_framework_obj_sub_obj2"]
-    assert sub_obj.sensitivity_params["theta"]["upper"] == -1 * sub_obj.sensitivity_params["theta"]["lower"]
-    assert sub_obj.sensitivity_params["se"]["upper"] == sub_obj.sensitivity_params["se"]["lower"]
-    assert sub_obj.sensitivity_params["ci"]["upper"] == -1 * sub_obj.sensitivity_params["ci"]["lower"]
+    assert np.allclose(sub_obj.sensitivity_params["theta"]["upper"], -1 * sub_obj.sensitivity_params["theta"]["lower"])
+    assert np.allclose(sub_obj.sensitivity_params["se"]["upper"], sub_obj.sensitivity_params["se"]["lower"])
+    assert np.allclose(sub_obj.sensitivity_params["ci"]["upper"], -1 * sub_obj.sensitivity_params["ci"]["lower"])
     assert sub_obj.sensitivity_params["rv"] < 0.01
     assert sub_obj.sensitivity_params["rva"] < 0.01
diff --git a/doubleml/tests/test_multiway_cluster.py b/doubleml/tests/test_multiway_cluster.py
@@ -121,10 +121,10 @@ def dml_pliv_multiway_cluster_fixture(generate_data_iv, learner, score):
         ses[i_rep] = se[0]
 
     theta = np.median(thetas)
-    n_clusters1 = len(np.unique(obj_dml_cluster_data.cluster_vars[:, 0]))
-    n_clusters2 = len(np.unique(obj_dml_cluster_data.cluster_vars[:, 1]))
-    var_scaling_factor = min(n_clusters1, n_clusters2)
-    se = np.sqrt(np.median(np.power(ses, 2) * var_scaling_factor + np.power(thetas - theta, 2)) / var_scaling_factor)
+
+    all_upper_bounds = thetas + 1.96 * ses
+    upper_bound = np.median(all_upper_bounds, axis=0)
+    se = (upper_bound - theta) / 1.96
 
     res_dict = {
         "coef": dml_pliv_obj.coef,

diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
@@ -239,20 +239,17 @@ def abs_ipw_score(theta):
     return ipw_est
 
 
-def _aggregate_coefs_and_ses(all_coefs, all_ses, var_scaling_factors):
-    if var_scaling_factors.shape == (all_coefs.shape[0],):
-        scaling_factors = np.repeat(var_scaling_factors[:, np.newaxis], all_coefs.shape[1], axis=1)
-    else:
-        scaling_factors = var_scaling_factors
+def _aggregate_coefs_and_ses(all_coefs, all_ses):
+    # already expects equally scaled variances over all repetitions
     # aggregation is done over dimension 1, such that the coefs and ses have to be of shape (n_coefs, n_rep)
     coefs = np.median(all_coefs, 1)
 
-    coefs_deviations = np.square(all_coefs - coefs.reshape(-1, 1))
-    scaled_coef_deviations = np.divide(coefs_deviations, scaling_factors)
-    all_variances = np.square(all_ses) + scaled_coef_deviations
-
-    var = np.median(all_variances, 1)
-    ses = np.sqrt(var)
+    # construct the upper bounds & aggregate
+    critical_value = 1.96
+    all_upper_bounds = all_coefs + critical_value * all_ses
+    agg_upper_bounds = np.median(all_upper_bounds, axis=1)
+    # reverse to calculate the standard errors
+    ses = (agg_upper_bounds - coefs) / critical_value
     return coefs, ses
 
 

diff --git a/doubleml/utils/tests/test_var_est_and_aggregation.py b/doubleml/utils/tests/test_var_est_and_aggregation.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+from scipy.stats import norm
 
 from doubleml.utils._estimation import _aggregate_coefs_and_ses, _var_est
 
@@ -14,19 +15,24 @@ def n_coefs(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[0.9, 0.95, 0.975])
+def level(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def test_var_est_and_aggr_fixture(n_rep, n_coefs):
+def test_var_est_and_aggr_fixture(n_rep, n_coefs, level):
     np.random.seed(42)
 
     all_thetas = np.full((n_coefs, n_rep), np.nan)
     all_ses = np.full((n_coefs, n_rep), np.nan)
-    expected_all_var = np.full((n_coefs, n_rep), np.nan)
+    expected_all_upper_bounds = np.full((n_coefs, n_rep), np.nan)
     all_var_scaling_factors = np.full((n_coefs, n_rep), np.nan)
 
     for i_coef in range(n_coefs):
         n_obs = np.random.randint(100, 200)
         for i_rep in range(n_rep):
-            psi = np.random.normal(size=(n_obs))
+            psi = np.random.normal(size=(n_obs), loc=i_coef, scale=i_coef + 1)
             psi_deriv = np.ones((n_obs))
 
             all_thetas[i_coef, i_rep] = np.mean(psi)
@@ -37,27 +43,23 @@ def test_var_est_and_aggr_fixture(n_rep, n_coefs):
             all_var_scaling_factors[i_coef, i_rep] = var_scaling_factor
 
     expected_theta = np.median(all_thetas, axis=1)
+    critical_value = norm.ppf(level)
     for i_coef in range(n_coefs):
         for i_rep in range(n_rep):
-            theta_deviation = np.square(all_thetas[i_coef, i_rep] - expected_theta[i_coef])
-            expected_all_var[i_coef, i_rep] = np.square(all_ses[i_coef, i_rep]) + np.divide(
-                theta_deviation, all_var_scaling_factors[i_coef, i_rep]
-            )
+            expected_all_upper_bounds[i_coef, i_rep] = all_thetas[i_coef, i_rep] + critical_value * all_ses[i_coef, i_rep]
 
-    expected_se = np.sqrt(np.median(expected_all_var, axis=1))
+    expected_upper_bounds = np.median(expected_all_upper_bounds, axis=1)
+    expected_se = (expected_upper_bounds - expected_theta) / critical_value
 
-    # without n_rep
     theta, se = _aggregate_coefs_and_ses(
         all_coefs=all_thetas,
         all_ses=all_ses,
-        var_scaling_factors=all_var_scaling_factors[:, 0],
     )
 
     # with n_rep
     theta_2, se_2 = _aggregate_coefs_and_ses(
         all_coefs=all_thetas,
         all_ses=all_ses,
-        var_scaling_factors=all_var_scaling_factors,
     )
 
     result_dict = {