Fixes to BITE, Frobenius, CODEC

ZmeiGorynych · ZmeiGorynych · commit fc33b98d35cf · 2025-01-19T11:29:22.000+01:00
diff --git a/causaltune/optimiser.py b/causaltune/optimiser.py
@@ -94,6 +94,7 @@ def __init__(
         test_size=None,
         num_samples=-1,
         propensity_model="dummy",
+        propensity_automl_estimators: Optional[List[str]] = None,
         outcome_model="nested",
         components_task="regression",
         components_verbose=0,
@@ -185,6 +186,7 @@ def __init__(
         self._settings["component_models"]["n_jobs"] = components_njobs
         self._settings["component_models"]["time_budget"] = components_time_budget
         self._settings["component_models"]["eval_method"] = "holdout"
+        self._settings["propensity_automl_estimators"] = propensity_automl_estimators
 
         if 0 < train_size < 1:
             component_test_size = 1 - train_size
@@ -224,9 +226,11 @@ def init_propensity_model(self, propensity_model: str):
         if propensity_model == "dummy":
             self.propensity_model = DummyClassifier(strategy="prior")
         elif propensity_model == "auto":
-            self.propensity_model = AutoML(
-                **{**self._settings["component_models"], "task": "classification"}
-            )
+            automl_args = {**self._settings["component_models"], "task": "classification"}
+            if self._settings["propensity_automl_estimators"]:
+                automl_args["estimator_list"] = self._settings["propensity_automl_estimators"]
+
+            self.propensity_model = AutoML(**automl_args)
         elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
             self.propensity_model = propensity_model
         else:
diff --git a/causaltune/score/bite.py b/causaltune/score/bite.py
@@ -9,15 +9,20 @@ def bite(
     working_df: pd.DataFrame,
     treatment_name: str,
     outcome_name: str,
+    min_N: int = 10,
+    max_N: int = 1000,
+    num_N: int = 20,
     N_values: Optional[List[int]] = None,
+    clip_propensity: float = 0.05,
 ) -> float:
+    max_N = int(min(max_N, len(working_df) / 10))
     if N_values is None:
-        N_values = exponential_spacing(10, 100, 20)
+        N_values = exponential_spacing(min_N, max_N, num_N)
     # Calculate weights with clipping to avoid extremes
     working_df["weights"] = np.where(
         working_df[treatment_name] == 1,
-        1 / np.clip(working_df["propensity"], 0.05, 0.95),
-        1 / np.clip(1 - working_df["propensity"], 0.05, 0.95),
+        1 / np.clip(working_df["propensity"], clip_propensity, 1 - clip_propensity),
+        1 / np.clip(1 - working_df["propensity"], clip_propensity, 1 - clip_propensity),
     )
 
     kendall_tau_values = []
diff --git a/causaltune/score/scoring.py b/causaltune/score/scoring.py
@@ -14,17 +14,17 @@
 from causaltune.score.thompson import thompson_policy, extract_means_stds
 from causaltune.thirdparty.causalml import metrics
 from causaltune.score.erupt import ERUPT
-from causaltune.score.bite import bite
+from .bite import bite
 from causaltune.utils import treatment_values, psw_joint_weights
 
 import dcor
 
 from scipy.spatial import distance
 from sklearn.neighbors import NearestNeighbors
-
-
 from sklearn.preprocessing import StandardScaler
 
+logger = logging.getLogger(__name__)
+
 
 class DummyEstimator:
     def __init__(self, cate_estimate: np.ndarray, effect_intervals: Optional[np.ndarray] = None):
@@ -93,7 +93,7 @@ def __init__(
         Access methods and attributes via `CausalTune.scorer`.
 
         """
-
+        logger.info("Initializing Scorer")
         self.problem = problem
         self.multivalue = multivalue
         self.causal_model = copy.deepcopy(causal_model)
@@ -341,8 +341,8 @@ def frobenius_norm_score(
         # Normalize features
         select_cols = estimate.estimator._effect_modifier_names + ["yhat"]
         scaler = StandardScaler()
-        Y0X_1_normalized = scaler.fit_transform(Y0X_1[select_cols])
-        Y0X_0_normalized = scaler.transform(Y0X_0[select_cols])
+        Y0X_0_normalized = scaler.fit_transform(Y0X_0[select_cols])
+        Y0X_1_normalized = scaler.transform(Y0X_1[select_cols])
 
         # Calculate pairwise differences
         differences_xy = Y0X_1_normalized[:, np.newaxis, :] - Y0X_0_normalized[np.newaxis, :, :]
@@ -927,7 +927,7 @@ def codec_score(estimate: CausalEstimate, df: pd.DataFrame) -> float:
         if standard_deviations < 0.01:
             return np.inf
 
-        return Scorer.codec(Y, Z, X)
+        return abs(Scorer.codec(Y, Z, X))
 
     @staticmethod
     def auc_make_score(
@@ -945,7 +945,7 @@ def auc_make_score(
             float: area under the uplift curve
 
         """
-
+        print("running auuc_score")
         est = estimate.estimator
         new_df = pd.DataFrame()
         new_df["y"] = df[est._outcome_name]
diff --git a/notebooks/RunExperiments/cluster_config.yaml b/notebooks/RunExperiments/cluster_config.yaml
@@ -6,7 +6,7 @@ cluster_name: default
 
 # The maximum number of workers nodes to launch in addition to the head
 # node.
-max_workers: 9
+max_workers: 8
 
 # The autoscaler will scale up the cluster faster with higher upscaling speed.
 # E.g., if the task requires adding more nodes then autoscaler will gradually
@@ -93,7 +93,7 @@ available_node_types:
         min_workers: 1
         # The maximum number of worker nodes of this type to launch.
         # This takes precedence over min_workers.
-        max_workers: 9
+        max_workers: 8
         # The resources provided by this node type.
         resources: {"CPU": 2}
         # Provider-specific config for the head node, e.g. instance type. By default
@@ -161,7 +161,7 @@ initialization_commands: []
 
 # List of shell commands to run to set up nodes.
 setup_commands:
-    - pip install causaltune catboost ray[tune]
+    - pip install causaltune catboost ray[tune] flaml[blendsearch]
 
     # Note: if you're developing Ray, you probably want to create a Docker image that
     # has your Ray repo pre-cloned. Then, you can replace the pip installs
diff --git a/notebooks/RunExperiments/runners/experiment_plots.py b/notebooks/RunExperiments/runners/experiment_plots.py
@@ -70,7 +70,7 @@ def generate_plots(
         "bite": "BITE",
         "policy_risk": "Policy\nRisk",
         "energy_distance": "Energy\nDistance",
-        "psw_energy_distance": "PSW\nEnergy\nDistance",
+        "psw_energy_distance": "Energy\nDistance",
         "norm_erupt": "Normalized\nERUPT",
     }
 
diff --git a/notebooks/RunExperiments/runners/experiment_runner.py b/notebooks/RunExperiments/runners/experiment_runner.py
@@ -12,6 +12,9 @@
 from sklearn.model_selection import train_test_split
 
 
+sys.path.insert(0, os.getcwd())
+import causaltune  # noqa: E402
+
 from causaltune import CausalTune
 from causaltune.data_utils import CausalityDataset
 from causaltune.models.passthrough import passthrough_model
@@ -112,6 +115,7 @@ def run_experiment(
     estimators: List[str],
     dataset_path: str,
     use_ray: bool,
+    propensity_automl_estimators: Optional[List[str]] = None,
 ):
     # Process datasets
     data_sets = {}
@@ -125,6 +129,7 @@ def run_experiment(
         name = " ".join(parts[1:])
         file_path = f"{dataset_path}/{size}/{name}.pkl"
         data_sets[f"{size} {name}"] = load_dataset(file_path)
+    run_kind = dataset.split("_")[1]
 
     out_dir = f"../EXPERIMENT_RESULTS_{args.identifier}"
     os.makedirs(out_dir, exist_ok=True)
@@ -136,24 +141,22 @@ def run_experiment(
     already_running = False
     if use_ray:
         try:
-            runner = ray.get_actor("TaskRunner")
+            runner = ray.get_actor(f"TaskRunner {run_kind}")
             print("\n" * 4)
             print(
                 "!!! Found an existing detached TaskRunner. Will assume the tasks have already been submitted."
             )
             print(
-                "!!! If you want to re-run the experiments from scratch, "
-                'run ray.kill(ray.get_actor("TaskRunner", namespace="{}")) or recreate the cluster.'.format(
-                    RAY_NAMESPACE
-                )
+                f"!!! If you want to re-run the experiments from scratch, "
+                'run ray.kill(ray.get_actor("TaskRunner {run_kind}", namespace="{RAY_NAMESPACE}")) or recreate the cluster.'
             )
             print("\n" * 4)
             already_running = True
         except ValueError:
             print("Ray: no detached TaskRunner found, creating...")
             # This thing will be alive even if the host program exits
-            # Must be killed explicitly: ray.kill(ray.get_actor("TaskRunner"))
-            runner = TaskRunner.options(name="TaskRunner", lifetime="detached").remote()
+            # Must be killed explicitly: ray.kill(ray.get_actor(f"TaskRunner {run_kind}"))
+            runner = TaskRunner.options(name=f"TaskRunner {run_kind}", lifetime="detached").remote()
 
     out = []
     if not already_running:
@@ -190,6 +193,7 @@ def run_experiment(
                             args.components_time_budget,
                             out_fn,
                             estimators,
+                            propensity_automl_estimators,
                         )
                     )
                 else:
@@ -202,6 +206,7 @@ def run_experiment(
                         args.components_time_budget,
                         out_fn,
                         estimators,
+                        propensity_automl_estimators,
                     )
                     out.append(results)
 
@@ -238,6 +243,7 @@ def run_batch(
     estimators: List[str],
     dataset_path: str,
     use_ray: bool = False,
+    propensity_automl_estimators: Optional[List[str]] = None,
 ):
     args = parse_arguments()
     args.identifier = identifier
@@ -255,12 +261,19 @@ def run_batch(
         # Assuming we port-mapped already by running ray dashboard
         ray.init(
             "ray://localhost:10001",
-            runtime_env={"working_dir": ".", "pip": ["causaltune", "catboost", "ray[tune]"]},
+            runtime_env={
+                "working_dir": ".",
+                "pip": ["causaltune", "catboost", "ray[tune]", "flaml[blendsearch]"],
+            },
             namespace=RAY_NAMESPACE,
         )
 
     out_dir = run_experiment(
-        args, estimators=estimators, dataset_path=dataset_path, use_ray=use_ray
+        args,
+        estimators=estimators,
+        dataset_path=dataset_path,
+        use_ray=use_ray,
+        propensity_automl_estimators=propensity_automl_estimators,
     )
     return out_dir
 
@@ -275,8 +288,8 @@ class TaskRunner:
     def __init__(self):
         self.futures = {}
 
-    def remote_single_run(self, *args):
-        ref = remote_single_run.remote(*args)
+    def remote_single_run(self, *args, **kwargs):
+        ref = remote_single_run.remote(*args, **kwargs)
         self.futures[ref.hex()] = ref
         return ref.hex()
 
@@ -310,6 +323,7 @@ def single_run(
     components_time_budget: int,
     out_fn: str,
     estimators: List[str],
+    propensity_automl_estimators: Optional[List[str]] = None,
     outcome_model: str = "auto",
     i_run: int = 1,
 ):
@@ -342,6 +356,7 @@ def single_run(
             store_all_estimators=True,
             propensity_model=propensity_model,
             outcome_model=outcome_model,
+            propensity_automl_estimators=propensity_automl_estimators,
             use_ray=False,
         )
 
diff --git a/notebooks/RunExperiments/runners/kc_no_meta.py b/notebooks/RunExperiments/runners/kc_no_meta.py
@@ -0,0 +1,39 @@
+import os
+
+from experiment_runner import run_batch, get_estimator_list
+from experiment_plots import generate_plots
+
+identifier = "Egor_test"
+kind = "KC"
+metrics = [
+    "erupt",
+    # "greedy_erupt",  # regular erupt was made probabilistic,
+    "policy_risk",  # NEW
+    "qini",
+    "auc",
+    "psw_energy_distance",
+    "frobenius_norm",  # NEW
+    "codec",  # NEW
+    "bite",  # NEW
+]
+estimators = get_estimator_list(kind, exclude_patterns=["SLearner", "TLearner", "XLearner"])
+ptt_estimators = [
+    "lgbm",
+    "lrl2",
+]
+
+use_ray = True
+out_dir = run_batch(
+    identifier,
+    kind,
+    metrics,
+    estimators=estimators,
+    propensity_automl_estimators=ptt_estimators,
+    dataset_path=os.path.realpath("../RunDatasets"),
+    use_ray=use_ray,
+)
+# plot results
+# upper_bounds = {"MSE": 1e2, "policy_risk": 0.2}
+# lower_bounds = {"erupt": 0.06, "bite": 0.75}
+generate_plots(os.path.join(out_dir, kind))  # , upper_bounds, lower_bounds)
+print("yay!")
diff --git a/notebooks/RunExperiments/runners/rct.py b/notebooks/RunExperiments/runners/rct.py
@@ -18,7 +18,7 @@
     "bite",  # NEW
 ]
 estimators = get_estimator_list(kind)
-use_ray = False
+use_ray = True
 out_dir = run_batch(
     identifier,
     kind,
diff --git a/notebooks/RunExperiments/runners/rct_no_meta.py b/notebooks/RunExperiments/runners/rct_no_meta.py
@@ -19,7 +19,7 @@
 
 estimators = get_estimator_list(kind, exclude_patterns=["SLearner", "TLearner", "XLearner"])
 
-use_ray = False
+use_ray = True
 out_dir = run_batch(
     identifier,
     kind,

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,7 @@ def generate_plots(`
`70`	`70`	`"bite": "BITE",`
`71`	`71`	`"policy_risk": "Policy\nRisk",`
`72`	`72`	`"energy_distance": "Energy\nDistance",`
`73`		`- "psw_energy_distance": "PSW\nEnergy\nDistance",`
	`73`	`+ "psw_energy_distance": "Energy\nDistance",`
`74`	`74`	`"norm_erupt": "Normalized\nERUPT",`
`75`	`75`	`}`
`76`	`76`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@`
`18`	`18`	`"bite", # NEW`
`19`	`19`	`]`
`20`	`20`	`estimators = get_estimator_list(kind)`
`21`		`-use_ray = False`
	`21`	`+use_ray = True`
`22`	`22`	`out_dir = run_batch(`
`23`	`23`	`identifier,`
`24`	`24`	`kind,`