Merge pull request #386 from DoubleML/o-blp-multirep

OliverSchacht · web-flow · commit 03a572fa0963 · 2026-02-23T10:56:28.000+01:00
Enable CATEs and GATEs for multiple repetitions
diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
@@ -579,11 +579,9 @@ def capo(self, basis, is_gate=False, **kwargs):
         if self.score not in valid_score:
             raise ValueError("Invalid score " + self.score + ". " + "Valid score " + " or ".join(valid_score) + ".")
 
-        if self.n_rep != 1:
-            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
-
         # define the orthogonal signal
-        orth_signal = self.psi_elements["psi_b"].reshape(-1)
+        orth_signal = np.squeeze(self.psi_elements["psi_b"], axis=2)
+
         # fit the best linear predictor
         model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
         model.fit(**kwargs)
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
@@ -587,11 +587,9 @@ def cate(self, basis, is_gate=False, **kwargs):
         if self.score not in valid_score:
             raise ValueError("Invalid score " + self.score + ". " + "Valid score " + " or ".join(valid_score) + ".")
 
-        if self.n_rep != 1:
-            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
-
         # define the orthogonal signal
-        orth_signal = self.psi_elements["psi_b"].reshape(-1)
+        orth_signal = np.squeeze(self.psi_elements["psi_b"], axis=2)
+
         # fit the best linear predictor
         model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
         model.fit(**kwargs)
diff --git a/doubleml/irm/tests/test_apo.py b/doubleml/irm/tests/test_apo.py
@@ -257,7 +257,7 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     capo = dml_obj.capo(random_basis, cov_type=cov_type)
     assert isinstance(capo, dml.utils.blp.DoubleMLBLP)
     assert isinstance(capo.confint(), pd.DataFrame)
-    assert capo.blp_model.cov_type == cov_type
+    assert capo.blp_model[0].cov_type == cov_type
 
     groups_1 = pd.DataFrame(
         np.column_stack([obj_dml_data.data["X1"] <= -1.0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
@@ -268,7 +268,7 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     assert isinstance(gapo_1, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gapo_1.confint(), pd.DataFrame)
     assert all(gapo_1.confint().index == groups_1.columns.to_list())
-    assert gapo_1.blp_model.cov_type == cov_type
+    assert gapo_1.blp_model[0].cov_type == cov_type
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n, p=[0.1, 0.9]))
@@ -278,4 +278,49 @@ def test_dml_apo_capo_gapo(treatment_level, cov_type):
     assert isinstance(gapo_2, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gapo_2.confint(), pd.DataFrame)
     assert all(gapo_2.confint().index == ["Group_1", "Group_2"])
-    assert gapo_2.blp_model.cov_type == cov_type
+    assert gapo_2.blp_model[0].cov_type == cov_type
+
+
+@pytest.mark.ci
+def test_dml_apo_capo_gapo_multiple_rep(treatment_level, cov_type):
+    n = 120
+    np.random.seed(42)
+    obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
+
+    ml_g = RandomForestRegressor(n_estimators=10, random_state=42)
+    ml_m = RandomForestClassifier(n_estimators=10, random_state=42)
+
+    dml_obj = dml.DoubleMLAPO(
+        obj_dml_data,
+        ml_m=ml_m,
+        ml_g=ml_g,
+        treatment_level=treatment_level,
+        ps_processor_config=PSProcessorConfig(clipping_threshold=0.05),
+        n_folds=3,
+        n_rep=2,
+    )
+
+    dml_obj.fit()
+
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    capo = dml_obj.capo(random_basis, cov_type=cov_type)
+    assert isinstance(capo, dml.utils.blp.DoubleMLBLP)
+    assert capo.n_rep == 2
+    assert isinstance(capo.blp_model, list)
+    assert len(capo.blp_model) == 2
+    assert capo.blp_model[0].cov_type == cov_type
+    assert capo.blp_model[1].cov_type == cov_type
+    assert capo.all_coef.shape == (random_basis.shape[1], 2)
+    assert capo.all_se.shape == (random_basis.shape[1], 2)
+    assert isinstance(capo.confint(), pd.DataFrame)
+    assert isinstance(capo.summary, pd.DataFrame)
+
+    x1 = obj_dml_data.data["X1"]
+    groups = pd.DataFrame({"Group 1": x1 <= x1.median(), "Group 2": x1 > x1.median()})
+    gapo = dml_obj.gapo(groups, cov_type=cov_type)
+    assert isinstance(gapo, dml.utils.blp.DoubleMLBLP)
+    assert gapo.n_rep == 2
+    assert gapo.all_coef.shape == (groups.shape[1], 2)
+    assert gapo.all_se.shape == (groups.shape[1], 2)
+    assert isinstance(gapo.confint(), pd.DataFrame)
+    assert all(gapo.confint().index == groups.columns.to_list())
diff --git a/doubleml/irm/tests/test_apo_exceptions.py b/doubleml/irm/tests/test_apo_exceptions.py
@@ -202,13 +202,6 @@ def test_apo_exception_capo_gapo():
     # reset the score
     dml_obj._score = "APO"
 
-    msg = "Only implemented for one repetition. Number of repetitions is 2."
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_obj._n_rep = 2
-        dml_obj.capo(random_basis)
-    # reset the number of repetitions
-    dml_obj._n_rep = 1
-
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = dml_obj.gapo(1)
diff --git a/doubleml/irm/tests/test_irm.py b/doubleml/irm/tests/test_irm.py
@@ -246,7 +246,7 @@ def test_dml_irm_cate_gate(cov_type):
     cate = dml_irm_obj.cate(random_basis, cov_type=cov_type)
     assert isinstance(cate, dml.utils.blp.DoubleMLBLP)
     assert isinstance(cate.confint(), pd.DataFrame)
-    assert cate.blp_model.cov_type == cov_type
+    assert cate.blp_model[0].cov_type == cov_type
 
     groups_1 = pd.DataFrame(
         np.column_stack([obj_dml_data.data["X1"] <= 0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
@@ -257,7 +257,7 @@ def test_dml_irm_cate_gate(cov_type):
     assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
     assert all(gate_1.confint().index == groups_1.columns.to_list())
-    assert gate_1.blp_model.cov_type == cov_type
+    assert gate_1.blp_model[0].cov_type == cov_type
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -267,7 +267,50 @@ def test_dml_irm_cate_gate(cov_type):
     assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
-    assert gate_2.blp_model.cov_type == cov_type
+    assert gate_2.blp_model[0].cov_type == cov_type
+
+
+@pytest.mark.ci
+def test_dml_irm_cate_gate_multiple_rep(cov_type):
+    n = 120
+    np.random.seed(42)
+    obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
+
+    ml_g = RandomForestRegressor(n_estimators=10, random_state=42)
+    ml_m = RandomForestClassifier(n_estimators=10, random_state=42)
+    ps_processor_config = PSProcessorConfig(clipping_threshold=0.05)
+    dml_irm_obj = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_m=ml_m,
+        ml_g=ml_g,
+        ps_processor_config=ps_processor_config,
+        n_folds=3,
+        n_rep=2,
+    )
+
+    dml_irm_obj.fit()
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    cate = dml_irm_obj.cate(random_basis, cov_type=cov_type)
+    assert isinstance(cate, dml.utils.blp.DoubleMLBLP)
+    assert cate.n_rep == 2
+    assert isinstance(cate.blp_model, list)
+    assert len(cate.blp_model) == 2
+    assert cate.blp_model[0].cov_type == cov_type
+    assert cate.blp_model[1].cov_type == cov_type
+    assert cate.all_coef.shape == (random_basis.shape[1], 2)
+    assert cate.all_se.shape == (random_basis.shape[1], 2)
+    assert isinstance(cate.confint(), pd.DataFrame)
+    assert isinstance(cate.summary, pd.DataFrame)
+
+    x1 = obj_dml_data.data["X1"]
+    groups = pd.DataFrame({"Group 1": x1 <= x1.median(), "Group 2": x1 > x1.median()})
+    gate = dml_irm_obj.gate(groups, cov_type=cov_type)
+    assert isinstance(gate, dml.utils.blp.DoubleMLBLP)
+    assert gate.n_rep == 2
+    assert gate.all_coef.shape == (groups.shape[1], 2)
+    assert gate.all_se.shape == (groups.shape[1], 2)
+    assert isinstance(gate.confint(), pd.DataFrame)
+    assert all(gate.confint().index == groups.columns.to_list())
 
 
 @pytest.fixture(scope="module", params=[1, 3])
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
@@ -470,14 +470,12 @@ def cate(self, basis, is_gate=False, **kwargs):
             raise NotImplementedError(
                 "Only implemented for single treatment. " + f"Number of treatments is {str(self._dml_data.n_treat)}."
             )
-        if self.n_rep != 1:
-            raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")
 
         Y_tilde, D_tilde = self._partial_out()
 
         D_basis = basis * D_tilde
         model = DoubleMLBLP(
-            orth_signal=Y_tilde.reshape(-1),
+            orth_signal=Y_tilde,
             basis=D_basis,
             is_gate=is_gate,
         )
diff --git a/doubleml/plm/tests/test_plr.py b/doubleml/plm/tests/test_plr.py
@@ -315,7 +315,7 @@ def test_dml_plr_cate_gate(score, cov_type):
     cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
     assert isinstance(cate, dml.DoubleMLBLP)
     assert isinstance(cate.confint(), pd.DataFrame)
-    assert cate.blp_model.cov_type == cov_type
+    assert cate.blp_model[0].cov_type == cov_type
 
     groups_1 = pd.DataFrame(
         np.column_stack([obj_dml_data.data["X1"] <= 0, obj_dml_data.data["X1"] > 0.2]), columns=["Group 1", "Group 2"]
@@ -326,7 +326,7 @@ def test_dml_plr_cate_gate(score, cov_type):
     assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
     assert all(gate_1.confint().index == groups_1.columns.tolist())
-    assert gate_1.blp_model.cov_type == cov_type
+    assert gate_1.blp_model[0].cov_type == cov_type
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -336,4 +336,46 @@ def test_dml_plr_cate_gate(score, cov_type):
     assert isinstance(gate_2, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
-    assert gate_2.blp_model.cov_type == cov_type
+    assert gate_2.blp_model[0].cov_type == cov_type
+
+
+@pytest.mark.ci
+def test_dml_plr_cate_gate_multiple_rep(score, cov_type):
+    n = 120
+
+    np.random.seed(42)
+    obj_dml_data = dml.plm.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    ml_l = LinearRegression()
+    ml_g = LinearRegression()
+    ml_m = LinearRegression()
+
+    if score == "partialling out":
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l=ml_l, ml_m=ml_m, n_folds=3, n_rep=2, score=score)
+    else:
+        assert score == "IV-type"
+        dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_l=ml_l, ml_m=ml_m, ml_g=ml_g, n_folds=3, n_rep=2, score=score)
+
+    dml_plr_obj.fit()
+
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 2)))
+    cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
+    assert isinstance(cate, dml.DoubleMLBLP)
+    assert cate.n_rep == 2
+    assert isinstance(cate.blp_model, list)
+    assert len(cate.blp_model) == 2
+    assert cate.blp_model[0].cov_type == cov_type
+    assert cate.blp_model[1].cov_type == cov_type
+    assert cate.all_coef.shape == (random_basis.shape[1], 2)
+    assert cate.all_se.shape == (random_basis.shape[1], 2)
+    assert isinstance(cate.confint(), pd.DataFrame)
+    assert isinstance(cate.summary, pd.DataFrame)
+
+    x1 = obj_dml_data.data["X1"]
+    groups = pd.DataFrame({"Group 1": x1 <= x1.median(), "Group 2": x1 > x1.median()})
+    gate = dml_plr_obj.gate(groups, cov_type=cov_type)
+    assert isinstance(gate, dml.DoubleMLBLP)
+    assert gate.n_rep == 2
+    assert gate.all_coef.shape == (groups.shape[1], 2)
+    assert gate.all_se.shape == (groups.shape[1], 2)
+    assert isinstance(gate.confint(), pd.DataFrame)
+    assert all(gate.confint().index == groups.columns.tolist())
diff --git a/doubleml/plm/tests/test_plr_binary_outcome.py b/doubleml/plm/tests/test_plr_binary_outcome.py
@@ -231,7 +231,7 @@ def test_dml_plr_binary_cate_gate(score, cov_type, generate_binary_data):
     cate = dml_plr_obj.cate(random_basis, cov_type=cov_type)
     assert isinstance(cate, dml.DoubleMLBLP)
     assert isinstance(cate.confint(), pd.DataFrame)
-    assert cate.blp_model.cov_type == cov_type
+    assert cate.blp_model[0].cov_type == cov_type
 
     groups_1 = pd.DataFrame(np.column_stack([data["X1"] <= 0, data["X1"] > 0.2]), columns=["Group 1", "Group 2"])
     msg = "At least one group effect is estimated with less than 6 observations."
@@ -240,4 +240,4 @@ def test_dml_plr_binary_cate_gate(score, cov_type, generate_binary_data):
     assert isinstance(gate_1, dml.utils.blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
     assert all(gate_1.confint().index == groups_1.columns.tolist())
-    assert gate_1.blp_model.cov_type == cov_type
+    assert gate_1.blp_model[0].cov_type == cov_type
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
@@ -1387,10 +1387,9 @@ def test_doubleml_exception_gate():
         n_rep=2,
     )
     dml_irm_obj.fit()
-
-    msg = "Only implemented for one repetition. Number of repetitions is 2."
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.gate(groups=groups)
+    msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.gate(groups=2)
 
 
 @pytest.mark.ci
@@ -1419,17 +1418,17 @@ def test_doubleml_exception_cate():
         n_rep=2,
     )
     dml_irm_obj.fit()
-    msg = "Only implemented for one repetition. Number of repetitions is 2."
-    with pytest.raises(NotImplementedError, match=msg):
+    msg = "The basis must be of DataFrame type. Basis of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
         dml_irm_obj.cate(basis=2)
 
 
 @pytest.mark.ci
 def test_doubleml_exception_plr_cate():
     dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2, n_rep=2)
     dml_plr_obj.fit()
-    msg = "Only implemented for one repetition. Number of repetitions is 2."
-    with pytest.raises(NotImplementedError, match=msg):
+    msg = "The basis must be of DataFrame type. Basis of type <class 'numpy.ndarray'> was passed."
+    with pytest.raises(TypeError, match=msg):
         dml_plr_obj.cate(basis=2)
 
     dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2)
@@ -1460,6 +1459,12 @@ def test_doubleml_exception_plr_gate():
     with pytest.raises(TypeError, match=msg):
         dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
 
+    dml_plr_obj = DoubleMLPLR(dml_data, ml_l=Lasso(), ml_m=Lasso(), n_folds=2, n_rep=2)
+    dml_plr_obj.fit()
+    msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_plr_obj.gate(groups=2)
+
 
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
diff --git a/doubleml/utils/blp.py b/doubleml/utils/blp.py
diff --git a/doubleml/utils/tests/test_blp.py b/doubleml/utils/tests/test_blp.py
diff --git a/pyproject.toml b/pyproject.toml

Original file line number	Diff line number	Diff line change
`@@ -470,14 +470,12 @@ def cate(self, basis, is_gate=False, **kwargs):`
`470`	`470`	`raise NotImplementedError(`
`471`	`471`	`"Only implemented for single treatment. " + f"Number of treatments is {str(self._dml_data.n_treat)}."`
`472`	`472`	`)`
`473`		`- if self.n_rep != 1:`
`474`		`- raise NotImplementedError("Only implemented for one repetition. " + f"Number of repetitions is {str(self.n_rep)}.")`
`475`	`473`
`476`	`474`	`Y_tilde, D_tilde = self._partial_out()`
`477`	`475`
`478`	`476`	`D_basis = basis * D_tilde`
`479`	`477`	`model = DoubleMLBLP(`
`480`		`- orth_signal=Y_tilde.reshape(-1),`
	`478`	`+ orth_signal=Y_tilde,`
`481`	`479`	`basis=D_basis,`
`482`	`480`	`is_gate=is_gate,`
`483`	`481`	`)`