Skip to content

Commit fc33b98

Browse files
committed
Fixes to BITE, Frobenius, CODEC
1 parent 06b9028 commit fc33b98

File tree

9 files changed

+94
-31
lines changed

9 files changed

+94
-31
lines changed

causaltune/optimiser.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def __init__(
9494
test_size=None,
9595
num_samples=-1,
9696
propensity_model="dummy",
97+
propensity_automl_estimators: Optional[List[str]] = None,
9798
outcome_model="nested",
9899
components_task="regression",
99100
components_verbose=0,
@@ -185,6 +186,7 @@ def __init__(
185186
self._settings["component_models"]["n_jobs"] = components_njobs
186187
self._settings["component_models"]["time_budget"] = components_time_budget
187188
self._settings["component_models"]["eval_method"] = "holdout"
189+
self._settings["propensity_automl_estimators"] = propensity_automl_estimators
188190

189191
if 0 < train_size < 1:
190192
component_test_size = 1 - train_size
@@ -224,9 +226,11 @@ def init_propensity_model(self, propensity_model: str):
224226
if propensity_model == "dummy":
225227
self.propensity_model = DummyClassifier(strategy="prior")
226228
elif propensity_model == "auto":
227-
self.propensity_model = AutoML(
228-
**{**self._settings["component_models"], "task": "classification"}
229-
)
229+
automl_args = {**self._settings["component_models"], "task": "classification"}
230+
if self._settings["propensity_automl_estimators"]:
231+
automl_args["estimator_list"] = self._settings["propensity_automl_estimators"]
232+
233+
self.propensity_model = AutoML(**automl_args)
230234
elif hasattr(propensity_model, "fit") and hasattr(propensity_model, "predict_proba"):
231235
self.propensity_model = propensity_model
232236
else:

causaltune/score/bite.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,20 @@ def bite(
99
working_df: pd.DataFrame,
1010
treatment_name: str,
1111
outcome_name: str,
12+
min_N: int = 10,
13+
max_N: int = 1000,
14+
num_N: int = 20,
1215
N_values: Optional[List[int]] = None,
16+
clip_propensity: float = 0.05,
1317
) -> float:
18+
max_N = int(min(max_N, len(working_df) / 10))
1419
if N_values is None:
15-
N_values = exponential_spacing(10, 100, 20)
20+
N_values = exponential_spacing(min_N, max_N, num_N)
1621
# Calculate weights with clipping to avoid extremes
1722
working_df["weights"] = np.where(
1823
working_df[treatment_name] == 1,
19-
1 / np.clip(working_df["propensity"], 0.05, 0.95),
20-
1 / np.clip(1 - working_df["propensity"], 0.05, 0.95),
24+
1 / np.clip(working_df["propensity"], clip_propensity, 1 - clip_propensity),
25+
1 / np.clip(1 - working_df["propensity"], clip_propensity, 1 - clip_propensity),
2126
)
2227

2328
kendall_tau_values = []

causaltune/score/scoring.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414
from causaltune.score.thompson import thompson_policy, extract_means_stds
1515
from causaltune.thirdparty.causalml import metrics
1616
from causaltune.score.erupt import ERUPT
17-
from causaltune.score.bite import bite
17+
from .bite import bite
1818
from causaltune.utils import treatment_values, psw_joint_weights
1919

2020
import dcor
2121

2222
from scipy.spatial import distance
2323
from sklearn.neighbors import NearestNeighbors
24-
25-
2624
from sklearn.preprocessing import StandardScaler
2725

26+
logger = logging.getLogger(__name__)
27+
2828

2929
class DummyEstimator:
3030
def __init__(self, cate_estimate: np.ndarray, effect_intervals: Optional[np.ndarray] = None):
@@ -93,7 +93,7 @@ def __init__(
9393
Access methods and attributes via `CausalTune.scorer`.
9494
9595
"""
96-
96+
logger.info("Initializing Scorer")
9797
self.problem = problem
9898
self.multivalue = multivalue
9999
self.causal_model = copy.deepcopy(causal_model)
@@ -341,8 +341,8 @@ def frobenius_norm_score(
341341
# Normalize features
342342
select_cols = estimate.estimator._effect_modifier_names + ["yhat"]
343343
scaler = StandardScaler()
344-
Y0X_1_normalized = scaler.fit_transform(Y0X_1[select_cols])
345-
Y0X_0_normalized = scaler.transform(Y0X_0[select_cols])
344+
Y0X_0_normalized = scaler.fit_transform(Y0X_0[select_cols])
345+
Y0X_1_normalized = scaler.transform(Y0X_1[select_cols])
346346

347347
# Calculate pairwise differences
348348
differences_xy = Y0X_1_normalized[:, np.newaxis, :] - Y0X_0_normalized[np.newaxis, :, :]
@@ -927,7 +927,7 @@ def codec_score(estimate: CausalEstimate, df: pd.DataFrame) -> float:
927927
if standard_deviations < 0.01:
928928
return np.inf
929929

930-
return Scorer.codec(Y, Z, X)
930+
return abs(Scorer.codec(Y, Z, X))
931931

932932
@staticmethod
933933
def auc_make_score(
@@ -945,7 +945,7 @@ def auc_make_score(
945945
float: area under the uplift curve
946946
947947
"""
948-
948+
print("running auuc_score")
949949
est = estimate.estimator
950950
new_df = pd.DataFrame()
951951
new_df["y"] = df[est._outcome_name]

notebooks/RunExperiments/cluster_config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ cluster_name: default
66

77
# The maximum number of workers nodes to launch in addition to the head
88
# node.
9-
max_workers: 9
9+
max_workers: 8
1010

1111
# The autoscaler will scale up the cluster faster with higher upscaling speed.
1212
# E.g., if the task requires adding more nodes then autoscaler will gradually
@@ -93,7 +93,7 @@ available_node_types:
9393
min_workers: 1
9494
# The maximum number of worker nodes of this type to launch.
9595
# This takes precedence over min_workers.
96-
max_workers: 9
96+
max_workers: 8
9797
# The resources provided by this node type.
9898
resources: {"CPU": 2}
9999
# Provider-specific config for the head node, e.g. instance type. By default
@@ -161,7 +161,7 @@ initialization_commands: []
161161

162162
# List of shell commands to run to set up nodes.
163163
setup_commands:
164-
- pip install causaltune catboost ray[tune]
164+
- pip install causaltune catboost ray[tune] flaml[blendsearch]
165165

166166
# Note: if you're developing Ray, you probably want to create a Docker image that
167167
# has your Ray repo pre-cloned. Then, you can replace the pip installs

notebooks/RunExperiments/runners/experiment_plots.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def generate_plots(
7070
"bite": "BITE",
7171
"policy_risk": "Policy\nRisk",
7272
"energy_distance": "Energy\nDistance",
73-
"psw_energy_distance": "PSW\nEnergy\nDistance",
73+
"psw_energy_distance": "Energy\nDistance",
7474
"norm_erupt": "Normalized\nERUPT",
7575
}
7676

notebooks/RunExperiments/runners/experiment_runner.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from sklearn.model_selection import train_test_split
1313

1414

15+
sys.path.insert(0, os.getcwd())
16+
import causaltune # noqa: E402
17+
1518
from causaltune import CausalTune
1619
from causaltune.data_utils import CausalityDataset
1720
from causaltune.models.passthrough import passthrough_model
@@ -112,6 +115,7 @@ def run_experiment(
112115
estimators: List[str],
113116
dataset_path: str,
114117
use_ray: bool,
118+
propensity_automl_estimators: Optional[List[str]] = None,
115119
):
116120
# Process datasets
117121
data_sets = {}
@@ -125,6 +129,7 @@ def run_experiment(
125129
name = " ".join(parts[1:])
126130
file_path = f"{dataset_path}/{size}/{name}.pkl"
127131
data_sets[f"{size} {name}"] = load_dataset(file_path)
132+
run_kind = dataset.split("_")[1]
128133

129134
out_dir = f"../EXPERIMENT_RESULTS_{args.identifier}"
130135
os.makedirs(out_dir, exist_ok=True)
@@ -136,24 +141,22 @@ def run_experiment(
136141
already_running = False
137142
if use_ray:
138143
try:
139-
runner = ray.get_actor("TaskRunner")
144+
runner = ray.get_actor(f"TaskRunner {run_kind}")
140145
print("\n" * 4)
141146
print(
142147
"!!! Found an existing detached TaskRunner. Will assume the tasks have already been submitted."
143148
)
144149
print(
145-
"!!! If you want to re-run the experiments from scratch, "
146-
'run ray.kill(ray.get_actor("TaskRunner", namespace="{}")) or recreate the cluster.'.format(
147-
RAY_NAMESPACE
148-
)
150+
f"!!! If you want to re-run the experiments from scratch, "
151+
'run ray.kill(ray.get_actor("TaskRunner {run_kind}", namespace="{RAY_NAMESPACE}")) or recreate the cluster.'
149152
)
150153
print("\n" * 4)
151154
already_running = True
152155
except ValueError:
153156
print("Ray: no detached TaskRunner found, creating...")
154157
# This thing will be alive even if the host program exits
155-
# Must be killed explicitly: ray.kill(ray.get_actor("TaskRunner"))
156-
runner = TaskRunner.options(name="TaskRunner", lifetime="detached").remote()
158+
# Must be killed explicitly: ray.kill(ray.get_actor(f"TaskRunner {run_kind}"))
159+
runner = TaskRunner.options(name=f"TaskRunner {run_kind}", lifetime="detached").remote()
157160

158161
out = []
159162
if not already_running:
@@ -190,6 +193,7 @@ def run_experiment(
190193
args.components_time_budget,
191194
out_fn,
192195
estimators,
196+
propensity_automl_estimators,
193197
)
194198
)
195199
else:
@@ -202,6 +206,7 @@ def run_experiment(
202206
args.components_time_budget,
203207
out_fn,
204208
estimators,
209+
propensity_automl_estimators,
205210
)
206211
out.append(results)
207212

@@ -238,6 +243,7 @@ def run_batch(
238243
estimators: List[str],
239244
dataset_path: str,
240245
use_ray: bool = False,
246+
propensity_automl_estimators: Optional[List[str]] = None,
241247
):
242248
args = parse_arguments()
243249
args.identifier = identifier
@@ -255,12 +261,19 @@ def run_batch(
255261
# Assuming we port-mapped already by running ray dashboard
256262
ray.init(
257263
"ray://localhost:10001",
258-
runtime_env={"working_dir": ".", "pip": ["causaltune", "catboost", "ray[tune]"]},
264+
runtime_env={
265+
"working_dir": ".",
266+
"pip": ["causaltune", "catboost", "ray[tune]", "flaml[blendsearch]"],
267+
},
259268
namespace=RAY_NAMESPACE,
260269
)
261270

262271
out_dir = run_experiment(
263-
args, estimators=estimators, dataset_path=dataset_path, use_ray=use_ray
272+
args,
273+
estimators=estimators,
274+
dataset_path=dataset_path,
275+
use_ray=use_ray,
276+
propensity_automl_estimators=propensity_automl_estimators,
264277
)
265278
return out_dir
266279

@@ -275,8 +288,8 @@ class TaskRunner:
275288
def __init__(self):
276289
self.futures = {}
277290

278-
def remote_single_run(self, *args):
279-
ref = remote_single_run.remote(*args)
291+
def remote_single_run(self, *args, **kwargs):
292+
ref = remote_single_run.remote(*args, **kwargs)
280293
self.futures[ref.hex()] = ref
281294
return ref.hex()
282295

@@ -310,6 +323,7 @@ def single_run(
310323
components_time_budget: int,
311324
out_fn: str,
312325
estimators: List[str],
326+
propensity_automl_estimators: Optional[List[str]] = None,
313327
outcome_model: str = "auto",
314328
i_run: int = 1,
315329
):
@@ -342,6 +356,7 @@ def single_run(
342356
store_all_estimators=True,
343357
propensity_model=propensity_model,
344358
outcome_model=outcome_model,
359+
propensity_automl_estimators=propensity_automl_estimators,
345360
use_ray=False,
346361
)
347362

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import os
2+
3+
from experiment_runner import run_batch, get_estimator_list
4+
from experiment_plots import generate_plots
5+
6+
identifier = "Egor_test"
7+
kind = "KC"
8+
metrics = [
9+
"erupt",
10+
# "greedy_erupt", # regular erupt was made probabilistic,
11+
"policy_risk", # NEW
12+
"qini",
13+
"auc",
14+
"psw_energy_distance",
15+
"frobenius_norm", # NEW
16+
"codec", # NEW
17+
"bite", # NEW
18+
]
19+
estimators = get_estimator_list(kind, exclude_patterns=["SLearner", "TLearner", "XLearner"])
20+
ptt_estimators = [
21+
"lgbm",
22+
"lrl2",
23+
]
24+
25+
use_ray = True
26+
out_dir = run_batch(
27+
identifier,
28+
kind,
29+
metrics,
30+
estimators=estimators,
31+
propensity_automl_estimators=ptt_estimators,
32+
dataset_path=os.path.realpath("../RunDatasets"),
33+
use_ray=use_ray,
34+
)
35+
# plot results
36+
# upper_bounds = {"MSE": 1e2, "policy_risk": 0.2}
37+
# lower_bounds = {"erupt": 0.06, "bite": 0.75}
38+
generate_plots(os.path.join(out_dir, kind)) # , upper_bounds, lower_bounds)
39+
print("yay!")

notebooks/RunExperiments/runners/rct.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
"bite", # NEW
1919
]
2020
estimators = get_estimator_list(kind)
21-
use_ray = False
21+
use_ray = True
2222
out_dir = run_batch(
2323
identifier,
2424
kind,

notebooks/RunExperiments/runners/rct_no_meta.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
estimators = get_estimator_list(kind, exclude_patterns=["SLearner", "TLearner", "XLearner"])
2121

22-
use_ray = False
22+
use_ray = True
2323
out_dir = run_batch(
2424
identifier,
2525
kind,

0 commit comments

Comments
 (0)