Update estimator factories

lixfz · lixfz · commit d48c2e45a731 · 2023-01-03T18:34:08.000+08:00
diff --git a/tests/estimator_factory_test.py b/tests/estimator_factory_test.py
@@ -1,3 +1,5 @@
+import inspect
+
 import pytest
 
 from ylearn.estimator_model import ESTIMATOR_FACTORIES
@@ -33,7 +35,7 @@ def test_Xb_Yc(key):
 
 
 @if_policy_tree_ready
-@pytest.mark.parametrize('key', ['tree', ])
+@pytest.mark.parametrize('key', ['tree', 'grf'])
 def test_Xb_Yc_tree(key):
     data, test_data, outcome, treatment, adjustment, covariate = _dgp.generate_data_x1b_y1()
 
@@ -42,7 +44,12 @@ def test_Xb_Yc_tree(key):
                   adjustment=adjustment, covariate=covariate, random_state=123)
     assert est is not None
 
-    est.fit(data, outcome, treatment, adjustment=adjustment, covariate=covariate, n_jobs=1)
+    fit_options = {}
+    sig = inspect.signature(est.fit)
+    if 'n_jobs' in sig.parameters.keys():
+        fit_options['n_jobs'] = 1
+
+    est.fit(data, outcome, treatment, adjustment=adjustment, covariate=covariate, **fit_options)
     effect = est.estimate(test_data)
     assert effect.shape[0] == len(test_data)
 
@@ -62,3 +69,21 @@ def test_Xb_Yb(key):
     est.fit(data, outcome, treatment, adjustment=adjustment, covariate=covariate, n_jobs=1)
     effect = est.estimate(test_data)
     assert effect.shape[0] == len(test_data)
+
+
+@if_policy_tree_ready
+@pytest.mark.parametrize('key', ['tree',  'grf', ])
+def test_Xb_Yb_tree(key):
+    data, test_data, outcome, treatment, adjustment, covariate = _dgp.generate_data_x1b_y1()
+    m = data[outcome].values.mean()
+    data[outcome] = (data[outcome] > m).astype('int')
+    test_data[outcome] = (test_data[outcome] > m).astype('int')
+
+    factory = ESTIMATOR_FACTORIES[key]()
+    est = factory(data, outcome[0], treatment, 'binary', 'binary',
+                  adjustment=adjustment, covariate=covariate, random_state=123)
+    assert est is not None
+
+    est.fit(data, outcome, treatment, adjustment=adjustment, covariate=covariate)
+    effect = est.estimate(test_data)
+    assert effect.shape[0] == len(test_data)
diff --git a/tests/why_test.py b/tests/why_test.py
@@ -172,7 +172,8 @@ def test_policy_interpreter_discrete_x2_yb_tlearner():
     data[outcome] = (data[outcome] > m).astype('int')
     test_data[outcome] = (test_data[outcome] > m).astype('int')
     # why = Why()
-    why = Why(estimator='ml', estimator_options=dict(learner='t', model='lr'))
+    # why = Why(estimator='ml', estimator_options=dict(learner='t', model='lr'))
+    why = Why(estimator='tlearner', estimator_options=dict(model='lr'))
     why.fit(data, outcome[0], treatment=treatment, adjustment=adjustment, covariate=covariate)
 
     pi = why.policy_interpreter(test_data)
@@ -189,7 +190,6 @@ def test_policy_interpreter_discrete_x2_yb_dml():
     data[outcome] = (data[outcome] > m).astype('int')
     test_data[outcome] = (test_data[outcome] > m).astype('int')
     why = Why(estimator='dml')
-    # why = Why(estimator='ml', estimator_options=dict(learner='t', model='lr'))
     why.fit(data, outcome[0], treatment=treatment, adjustment=adjustment, covariate=covariate)
 
     pi = why.policy_interpreter(test_data)
diff --git a/ylearn/estimator_model/_factory.py b/ylearn/estimator_model/_factory.py
@@ -157,6 +157,8 @@ def __call__(self, data, outcome, treatment, y_task, x_task,
         is_discrete_treatment = x_task if isinstance(x_task, bool) else x_task != const.TASK_REGRESSION
         is_discrete_outcome = y_task if isinstance(y_task, bool) else y_task != const.TASK_REGRESSION
 
+        assert is_discrete_treatment, 'SLearner support discrete treatment only.'
+
         return PermutedSLearner(
             model=self._model(data, task=y_task, estimator=self.model, random_state=random_state),
             is_discrete_outcome=is_discrete_outcome,
@@ -179,6 +181,8 @@ def __call__(self, data, outcome, treatment, y_task, x_task,
         is_discrete_treatment = x_task if isinstance(x_task, bool) else x_task != const.TASK_REGRESSION
         is_discrete_outcome = y_task if isinstance(y_task, bool) else y_task != const.TASK_REGRESSION
 
+        assert is_discrete_treatment, 'TLearner support discrete treatment only.'
+
         return PermutedTLearner(
             model=self._model(data, task=y_task, estimator=self.model, random_state=random_state),
             is_discrete_outcome=is_discrete_outcome,
@@ -202,6 +206,8 @@ def __call__(self, data, outcome, treatment, y_task, x_task,
         is_discrete_treatment = x_task if isinstance(x_task, bool) else x_task != const.TASK_REGRESSION
         is_discrete_outcome = y_task if isinstance(y_task, bool) else y_task != const.TASK_REGRESSION
 
+        assert is_discrete_treatment, 'XLearner support discrete treatment only.'
+
         if is_discrete_outcome:
             final_proba_model = self._model(
                 data, task=const.TASK_REGRESSION, estimator=self.final_proba_model, random_state=random_state)
@@ -229,12 +235,41 @@ def __call__(self, data, outcome, treatment, y_task, x_task,
                  adjustment=None, covariate=None, instrument=None, random_state=None):
         from ylearn.estimator_model._permuted import PermutedCausalTree
 
+        is_discrete_treatment = x_task if isinstance(x_task, bool) else x_task != const.TASK_REGRESSION
+        is_discrete_outcome = y_task if isinstance(y_task, bool) else y_task != const.TASK_REGRESSION
+
+        assert is_discrete_treatment, 'CausalTree support discrete treatment only.'
+
         options = self.options.copy()
         if random_state is not None:
             options['random_state'] = random_state
+        # options['is_discrete_outcome'] = is_discrete_outcome
+        # options['is_discrete_treatment'] = is_discrete_treatment
+
         return PermutedCausalTree(**options)
 
 
+@register()
+class GrfFactory(BaseEstimatorFactory):
+    def __init__(self, **kwargs):
+        self.options = kwargs.copy()
+
+    def __call__(self, data, outcome, treatment, y_task, x_task,
+                 adjustment=None, covariate=None, instrument=None, random_state=None):
+        from ylearn.estimator_model._generalized_forest import GRForest
+
+        is_discrete_treatment = x_task if isinstance(x_task, bool) else x_task != const.TASK_REGRESSION
+        is_discrete_outcome = y_task if isinstance(y_task, bool) else y_task != const.TASK_REGRESSION
+
+        options = self.options.copy()
+        if random_state is not None:
+            options['random_state'] = random_state
+        options['is_discrete_outcome'] = is_discrete_outcome
+        options['is_discrete_treatment'] = is_discrete_treatment
+
+        return GRForest(**options)
+
+
 @register()
 @register(name='bound')
 class ApproxBoundFactory(BaseEstimatorFactory):