1717
1818from econml .inference import BootstrapInference
1919
20- from joblib import Parallel , delayed
21-
2220from causaltune .search .params import SimpleParamService
2321from causaltune .score .scoring import Scorer , metrics_to_minimize
2422from causaltune .utils import treatment_is_multivalue
3432from causaltune .dataset_processor import CausalityDatasetProcessor
3533from causaltune .models .passthrough import feature_filter
3634
37- # tune.run = run
38-
3935
4036# Patched from sklearn.linear_model._base to adjust rtol and atol values
41- def _check_precomputed_gram_matrix (
42- X , precompute , X_offset , X_scale , rtol = 1e-4 , atol = 1e-2
43- ):
37+ def _check_precomputed_gram_matrix (X , precompute , X_offset , X_scale , rtol = 1e-4 , atol = 1e-2 ):
4438 n_features = X .shape [1 ]
4539 f1 = n_features // 2
4640 f2 = min (f1 + 1 , n_features - 1 )
@@ -177,24 +171,17 @@ def __init__(
177171 self ._settings ["tuner" ]["time_budget_s" ] = time_budget
178172 self ._settings ["tuner" ]["num_samples" ] = num_samples
179173 self ._settings ["tuner" ]["verbose" ] = verbose
180- self ._settings ["tuner" ][
181- "use_ray"
182- ] = use_ray # requires ray to be installed via pip install flaml[ray]
183174 self ._settings ["tuner" ]["resources_per_trial" ] = (
184175 resources_per_trial if resources_per_trial is not None else {"cpu" : 0.5 }
185176 )
186177 self ._settings ["try_init_configs" ] = try_init_configs
187- self ._settings ["include_experimental_estimators" ] = (
188- include_experimental_estimators
189- )
178+ self ._settings ["include_experimental_estimators" ] = include_experimental_estimators
190179
191180 # params for FLAML on component models:
192181 self ._settings ["component_models" ] = {}
193182 self ._settings ["component_models" ]["task" ] = components_task
194183 self ._settings ["component_models" ]["verbose" ] = components_verbose
195- self ._settings ["component_models" ][
196- "pred_time_limit"
197- ] = components_pred_time_limit
184+ self ._settings ["component_models" ]["pred_time_limit" ] = components_pred_time_limit
198185 self ._settings ["component_models" ]["n_jobs" ] = components_njobs
199186 self ._settings ["component_models" ]["time_budget" ] = components_time_budget
200187 self ._settings ["component_models" ]["eval_method" ] = "holdout"
@@ -221,6 +208,7 @@ def __init__(
221208 self .causal_model = None
222209 self .identified_estimand = None
223210 self .problem = None
211+ self .use_ray = use_ray
224212 # properties that are used to resume fits (warm start)
225213 self .resume_scores = []
226214 self .resume_cfg = []
@@ -239,9 +227,7 @@ def init_propensity_model(self, propensity_model: str):
239227 self .propensity_model = AutoML (
240228 ** {** self ._settings ["component_models" ], "task" : "classification" }
241229 )
242- elif hasattr (propensity_model , "fit" ) and hasattr (
243- propensity_model , "predict_proba"
244- ):
230+ elif hasattr (propensity_model , "fit" ) and hasattr (propensity_model , "predict_proba" ):
245231 self .propensity_model = propensity_model
246232 else :
247233 raise ValueError (
@@ -266,9 +252,7 @@ def init_outcome_model(self, outcome_model):
266252 # The current default behavior
267253 return self .auto_outcome_model ()
268254 else :
269- raise ValueError (
270- 'outcome_model valid values are None, "auto", or an estimator object'
271- )
255+ raise ValueError ('outcome_model valid values are None, "auto", or an estimator object' )
272256
273257 def auto_outcome_model (self ):
274258 data = self .data
@@ -303,6 +287,7 @@ def fit(
303287 preprocess : bool = False ,
304288 encoder_type : Optional [str ] = None ,
305289 encoder_outcome : Optional [str ] = None ,
290+ use_ray : Optional [bool ] = None ,
306291 ):
307292 """Performs AutoML on list of causal inference estimators
308293 - If estimator has a search space specified in its parameters, HPO is performed on the whole model.
@@ -326,6 +311,9 @@ def fit(
326311 Returns:
327312 None
328313 """
314+ if use_ray is not None :
315+ self .use_ray = use_ray
316+
329317 if outcome is None and isinstance (data , CausalityDataset ):
330318 outcome = data .outcomes [0 ]
331319
@@ -344,19 +332,15 @@ def fit(
344332 if preprocess :
345333 data = copy .deepcopy (data )
346334 self .dataset_processor = CausalityDatasetProcessor ()
347- self .dataset_processor .fit (
348- data , encoder_type = encoder_type , outcome = encoder_outcome
349- )
335+ self .dataset_processor .fit (data , encoder_type = encoder_type , outcome = encoder_outcome )
350336 data = self .dataset_processor .transform (data )
351337 else :
352338 self .dataset_processor = None
353339
354340 self .data = data
355341 treatment_values = data .treatment_values
356342
357- assert (
358- len (treatment_values ) > 1
359- ), "Treatment must take at least 2 values, eg 0 and 1!"
343+ assert len (treatment_values ) > 1 , "Treatment must take at least 2 values, eg 0 and 1!"
360344
361345 self ._control_value = treatment_values [0 ]
362346 self ._treatment_values = list (treatment_values [1 :])
@@ -378,8 +362,8 @@ def fit(
378362
379363 self .init_propensity_model (self ._settings ["propensity_model" ])
380364
381- self .identified_estimand : IdentifiedEstimand = (
382- self . causal_model . identify_effect ( proceed_when_unidentifiable = True )
365+ self .identified_estimand : IdentifiedEstimand = self . causal_model . identify_effect (
366+ proceed_when_unidentifiable = True
383367 )
384368
385369 if bool (self .identified_estimand .estimands ["iv" ]) and bool (data .instruments ):
@@ -450,9 +434,7 @@ def fit(
450434 and self ._settings ["tuner" ]["num_samples" ] == - 1
451435 ):
452436 self ._settings ["tuner" ]["time_budget_s" ] = (
453- 2.5
454- * len (self .estimator_list )
455- * self ._settings ["component_models" ]["time_budget" ]
437+ 2.5 * len (self .estimator_list ) * self ._settings ["component_models" ]["time_budget" ]
456438 )
457439
458440 cmtb = self ._settings ["component_models" ]["time_budget" ]
@@ -485,9 +467,7 @@ def fit(
485467 # )
486468 # )
487469
488- search_space = self .cfg .search_space (
489- self .estimator_list , data_size = data .data .shape
490- )
470+ search_space = self .cfg .search_space (self .estimator_list , data_size = data .data .shape )
491471 init_cfg = (
492472 self .cfg .default_configs (self .estimator_list , data_size = data .data .shape )
493473 if self ._settings ["try_init_configs" ]
@@ -507,14 +487,12 @@ def fit(
507487 self ._tune_with_config ,
508488 search_space ,
509489 metric = self .metric ,
490+ # use_ray=self.use_ray,
510491 cost_attr = "evaluation_cost" ,
511- points_to_evaluate = (
512- init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg
513- ),
514- evaluated_rewards = (
515- [] if len (self .resume_scores ) == 0 else self .resume_scores
516- ),
492+ points_to_evaluate = (init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg ),
493+ evaluated_rewards = ([] if len (self .resume_scores ) == 0 else self .resume_scores ),
517494 mode = ("min" if self .metric in metrics_to_minimize () else "max" ),
495+ # resources_per_trial= {"cpu": 1} if self.use_ray else None,
518496 low_cost_partial_config = {},
519497 ** self ._settings ["tuner" ],
520498 )
@@ -529,12 +507,8 @@ def fit(
529507 self ._tune_with_config ,
530508 search_space ,
531509 metric = self .metric ,
532- points_to_evaluate = (
533- init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg
534- ),
535- evaluated_rewards = (
536- [] if len (self .resume_scores ) == 0 else self .resume_scores
537- ),
510+ points_to_evaluate = (init_cfg if len (self .resume_cfg ) == 0 else self .resume_cfg ),
511+ evaluated_rewards = ([] if len (self .resume_scores ) == 0 else self .resume_scores ),
538512 mode = ("min" if self .metric in metrics_to_minimize () else "max" ),
539513 low_cost_partial_config = {},
540514 ** self ._settings ["tuner" ],
@@ -568,18 +542,25 @@ def _tune_with_config(self, config: dict) -> dict:
568542 Returns:
569543 (dict): values of metrics after optimisation
570544 """
571- estimates = Parallel (n_jobs = 2 , backend = "threading" )(
572- delayed (self ._estimate_effect )(config ) for i in range (1 )
573- )[0 ]
545+ from causaltune .remote import remote_exec
546+
547+ if self .use_ray :
548+ # flaml.tune handles the interaction with Ray itself
549+ # estimates = self._estimate_effect(config)
550+ estimates = remote_exec (CausalTune ._estimate_effect , (self , config ), self .use_ray )
551+ else :
552+ estimates = remote_exec (CausalTune ._estimate_effect , (self , config ), self .use_ray )
553+
554+ # Parallel(n_jobs=2, backend="threading")(
555+ # delayed(self._estimate_effect)(config) for i in range(1)
556+ # ))[0]
574557
575558 if "exception" not in estimates :
576559 est_name = estimates ["estimator_name" ]
577560 current_score = estimates [self .metric ]
578561
579562 estimates ["optimization_score" ] = current_score
580- estimates ["evaluation_cost" ] = (
581- 1e8 # will be overwritten for successful runs
582- )
563+ estimates ["evaluation_cost" ] = 1e8 # will be overwritten for successful runs
583564
584565 # Initialize best_score if this is the first estimator for this name
585566 if est_name not in self ._best_estimators :
@@ -611,22 +592,19 @@ def _tune_with_config(self, config: dict) -> dict:
611592 "codec" ,
612593 "policy_risk" ,
613594 ]:
614- is_better = (
615- np .isfinite ( current_score ) and current_score < best_score
616- ) or ( np . isinf ( best_score ) and np . isfinite ( current_score ))
595+ is_better = (np . isfinite ( current_score ) and current_score < best_score ) or (
596+ np .isinf ( best_score ) and np . isfinite ( current_score )
597+ )
617598 else :
618- is_better = (
619- np .isfinite ( current_score ) and current_score > best_score
620- ) or ( np . isinf ( best_score ) and np . isfinite ( current_score ))
599+ is_better = (np . isfinite ( current_score ) and current_score > best_score ) or (
600+ np .isinf ( best_score ) and np . isfinite ( current_score )
601+ )
621602
622603 # Store the estimator if we're storing all, if it's better, or if it's the first valid (non-inf) estimator
623604 if (
624605 self ._settings ["store_all" ]
625606 or is_better
626- or (
627- self ._best_estimators [est_name ][1 ] is None
628- and np .isfinite (current_score )
629- )
607+ or (self ._best_estimators [est_name ][1 ] is None and np .isfinite (current_score ))
630608 ):
631609 self ._best_estimators [est_name ] = (
632610 current_score ,
@@ -658,9 +636,7 @@ def _estimate_effect(self, config):
658636 # Do we need an boject property for this, instead of a local var?
659637 self .estimator_name = config ["estimator" ]["estimator_name" ]
660638 outcome_model = self .init_outcome_model (self ._settings ["outcome_model" ])
661- method_params = self .cfg .method_params (
662- config , outcome_model , self .propensity_model
663- )
639+ method_params = self .cfg .method_params (config , outcome_model , self .propensity_model )
664640
665641 try : #
666642 # This calls the causal model's estimate_effect method
@@ -697,9 +673,7 @@ def _estimate_effect(self, config):
697673 }
698674
699675 def _compute_metrics (self , estimator , df : pd .DataFrame ) -> dict :
700- return self .scorer .make_scores (
701- estimator , df , self .metrics_to_report , r_scorer = None
702- )
676+ return self .scorer .make_scores (estimator , df , self .metrics_to_report , r_scorer = None )
703677
704678 def score_dataset (self , df : pd .DataFrame , dataset_name : str ):
705679 """
@@ -714,13 +688,9 @@ def score_dataset(self, df: pd.DataFrame, dataset_name: str):
714688 """
715689 for scr in self .scores .values ():
716690 if scr ["estimator" ] is None :
717- warnings .warn (
718- "Skipping scoring for estimator %s" , scr ["estimator_name" ]
719- )
691+ warnings .warn ("Skipping scoring for estimator %s" , scr ["estimator_name" ])
720692 else :
721- scr ["scores" ][dataset_name ] = self ._compute_metrics (
722- scr ["estimator" ], df
723- )
693+ scr ["scores" ][dataset_name ] = self ._compute_metrics (scr ["estimator" ], df )
724694
725695 @property
726696 def best_estimator (self ) -> str :
@@ -793,9 +763,7 @@ def effect(self, df, *args, **kwargs):
793763 """
794764 return self .model .effect (df , * args , ** kwargs )
795765
796- def predict (
797- self , cd : CausalityDataset , preprocess : Optional [bool ] = False , * args , ** kwargs
798- ):
766+ def predict (self , cd : CausalityDataset , preprocess : Optional [bool ] = False , * args , ** kwargs ):
799767 """Heterogeneous Treatment Effects for data CausalityDataset
800768
801769 Args:
0 commit comments