77from typing import Optional
88
99import dill
10- import numpy as np
1110import pandas as pd
1211
1312from bluecast .ai .agents .data_analyst import DataAnalystAgent
@@ -116,14 +115,20 @@ def _apply_smart_sampling(self) -> None:
116115 target = self .context .target_col
117116 if target in df .columns and df [target ].nunique () <= 20 :
118117 # Stratified sampling for classification
119- sample_df = df .groupby (target , group_keys = False ).apply (
120- lambda x : x .sample (
121- n = min (len (x ), max (1 , int (max_rows * len (x ) / n_rows ))),
122- random_state = 42 ,
118+ sample_df = (
119+ df .groupby (target , group_keys = False )
120+ .apply (
121+ lambda x : x .sample (
122+ n = min (len (x ), max (1 , int (max_rows * len (x ) / n_rows ))),
123+ random_state = 42 ,
124+ )
123125 )
124- ).reset_index (drop = True )
126+ .reset_index (drop = True )
127+ )
125128 else :
126- sample_df = df .sample (n = max_rows , random_state = 42 ).reset_index (drop = True )
129+ sample_df = df .sample (n = max_rows , random_state = 42 ).reset_index (
130+ drop = True
131+ )
127132
128133 msg = (
129134 f"Dataset sampled: { n_rows } -> { len (sample_df )} rows "
@@ -167,7 +172,8 @@ def _save_checkpoint(self, step_name: str) -> None:
167172 with open (path , "wb" ) as f :
168173 dill .dump (self .context , f )
169174 self .context .log (
170- "Orchestrator" , f"Checkpoint saved after '{ step_name } '" ,
175+ "Orchestrator" ,
176+ f"Checkpoint saved after '{ step_name } '" ,
171177 event_type = "checkpoint" ,
172178 )
173179 if self .config .verbose :
@@ -202,8 +208,13 @@ def _load_checkpoint(self) -> bool:
202208
203209 # Re-attach context to all agents
204210 for agent in [
205- self .planner , self .analyst , self .engineer ,
206- self .builder , self .evaluator , self .researcher , self .reporter ,
211+ self .planner ,
212+ self .analyst ,
213+ self .engineer ,
214+ self .builder ,
215+ self .evaluator ,
216+ self .researcher ,
217+ self .reporter ,
207218 ]:
208219 agent .context = self .context
209220
@@ -233,7 +244,7 @@ def run(self) -> BlueCastAIResult:
233244 print ("BlueCastAI - Multi-Agent AutoML Pipeline" )
234245 print ("=" * 60 )
235246
236- resumed = self ._load_checkpoint ()
247+ self ._load_checkpoint ()
237248
238249 # --- Step 0: Smart sampling ---
239250 if not self ._is_step_done ("sampling" ):
@@ -273,7 +284,9 @@ def run(self) -> BlueCastAIResult:
273284
274285 # --- Step 5: Build-Evaluate-Improve loop ---
275286 if not self ._is_step_done ("build_loop" ):
276- max_iterations = plan .get ("max_iterations" , self .config .get_max_iterations ())
287+ max_iterations = plan .get (
288+ "max_iterations" , self .config .get_max_iterations ()
289+ )
277290 self ._step_build_loop (plan , max_iterations )
278291 self ._save_checkpoint ("build_loop" )
279292
@@ -324,8 +337,10 @@ def _step_plan(self) -> dict:
324337
325338 self .context .class_problem = plan .get ("class_problem" , "binary" )
326339 self .context .log (
327- "Orchestrator" , f"Plan: { json .dumps (plan , indent = 2 )} " ,
328- event_type = "plan" , metadata = {"plan" : plan },
340+ "Orchestrator" ,
341+ f"Plan: { json .dumps (plan , indent = 2 )} " ,
342+ event_type = "plan" ,
343+ metadata = {"plan" : plan },
329344 )
330345
331346 if self .config .verbose :
@@ -341,7 +356,11 @@ def _step_plan(self) -> dict:
341356 def _reconstruct_plan (self ) -> dict :
342357 """Reconstruct the plan from structured log metadata."""
343358 for entry in self .context .structured_log :
344- if entry .event_type == "plan" and entry .metadata and "plan" in entry .metadata :
359+ if (
360+ entry .event_type == "plan"
361+ and entry .metadata
362+ and "plan" in entry .metadata
363+ ):
345364 return entry .metadata ["plan" ]
346365 return self .planner ._default_plan ()
347366
@@ -362,7 +381,14 @@ def _step_analyze(self) -> None:
362381 )
363382 self .context .data_profile = {"summary" : result }
364383
365- for keyword in ["leakage" , "imbalance" , "missing" , "null" , "duplicate" , "constant" ]:
384+ for keyword in [
385+ "leakage" ,
386+ "imbalance" ,
387+ "missing" ,
388+ "null" ,
389+ "duplicate" ,
390+ "constant" ,
391+ ]:
366392 if keyword in result .lower ():
367393 self .context .data_warnings .append (
368394 f"Data analyst flagged: { keyword } detected"
@@ -373,7 +399,9 @@ def _step_feature_engineer(self, plan: dict) -> None:
373399 print ("\n Step 4: Engineering features..." )
374400
375401 hints = plan .get ("feature_engineering_hints" , [])
376- hint_text = "\n " .join (f"- { h } " for h in hints ) if hints else "Use your judgment."
402+ hint_text = (
403+ "\n " .join (f"- { h } " for h in hints ) if hints else "Use your judgment."
404+ )
377405
378406 task = (
379407 f"Create useful features for this { self .context .class_problem } problem.\n "
@@ -383,7 +411,11 @@ def _step_feature_engineer(self, plan: dict) -> None:
383411 self .engineer .run (task )
384412
385413 if self .context .engineered_df is not None and self .config .verbose :
386- orig_cols = len (self .context .df_train .columns )
414+ orig_cols = (
415+ len (self .context .df_train .columns )
416+ if self .context .df_train is not None
417+ else 0
418+ )
387419 new_cols = len (self .context .engineered_df .columns )
388420 print (f" Features: { orig_cols } -> { new_cols } columns" )
389421
@@ -398,7 +430,9 @@ def _step_build_loop(self, plan: dict, max_iterations: int) -> None:
398430 build_task = self ._create_build_task (plan , iteration )
399431 self .builder .run (build_task )
400432
401- latest_run = self .context .run_history [- 1 ] if self .context .run_history else None
433+ latest_run = (
434+ self .context .run_history [- 1 ] if self .context .run_history else None
435+ )
402436 if latest_run and self .config .verbose :
403437 status = "OK" if latest_run ["success" ] else "FAILED"
404438 print (f" Result [{ status } ]: { latest_run .get ('metrics' , {})} " )
0 commit comments