ThomasMeissnerDS
diff --git a/‎bluecast/ai/__init__.py‎
Lines changed: 15 additions & 4 deletions b/‎bluecast/ai/__init__.py‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎bluecast/ai/agents/base.py‎
Lines changed: 16 additions & 7 deletions b/‎bluecast/ai/agents/base.py‎
Lines changed: 16 additions & 7 deletions
diff --git a/‎bluecast/ai/agents/feature_engineer.py‎
Lines changed: 11 additions & 3 deletions b/‎bluecast/ai/agents/feature_engineer.py‎
Lines changed: 11 additions & 3 deletions
diff --git a/‎bluecast/ai/agents/pipeline_builder.py‎
Lines changed: 11 additions & 4 deletions b/‎bluecast/ai/agents/pipeline_builder.py‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎bluecast/ai/agents/reporter.py‎
Lines changed: 16 additions & 5 deletions b/‎bluecast/ai/agents/reporter.py‎
Lines changed: 16 additions & 5 deletions
diff --git a/‎bluecast/ai/agents/researcher.py‎
Lines changed: 3 additions & 1 deletion b/‎bluecast/ai/agents/researcher.py‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎bluecast/ai/context.py‎
Lines changed: 1 addition & 1 deletion b/‎bluecast/ai/context.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bluecast/ai/orchestrator.py‎
Lines changed: 53 additions & 19 deletions b/‎bluecast/ai/orchestrator.py‎
Lines changed: 53 additions & 19 deletions
@@ -35,15 +35,26 @@ def _create_provider(config: AIConfig):
 
     if config.provider == "gemini":
         from bluecast.ai.providers.gemini import GeminiProvider
-        return GeminiProvider(api_key=config.api_key, model=model, temperature=config.temperature)
+
+        return GeminiProvider(
+            api_key=config.api_key, model=model, temperature=config.temperature
+        )
     elif config.provider == "openai":
         from bluecast.ai.providers.openai_provider import OpenAIProvider
-        return OpenAIProvider(api_key=config.api_key, model=model, temperature=config.temperature)
+
+        return OpenAIProvider(
+            api_key=config.api_key, model=model, temperature=config.temperature
+        )
     elif config.provider == "anthropic":
         from bluecast.ai.providers.anthropic_provider import AnthropicProvider
-        return AnthropicProvider(api_key=config.api_key, model=model, temperature=config.temperature)
+
+        return AnthropicProvider(
+            api_key=config.api_key, model=model, temperature=config.temperature
+        )
     else:
-        raise ValueError(f"Unknown provider: {config.provider}. Use 'gemini', 'openai', or 'anthropic'.")
+        raise ValueError(
+            f"Unknown provider: {config.provider}. Use 'gemini', 'openai', or 'anthropic'."
+        )
 
 
 class BlueCastAI:
 
@@ -6,7 +6,12 @@
 from typing import Any, Callable, Dict, List, Optional
 
 from bluecast.ai.context import SharedContext
-from bluecast.ai.providers.base import BaseLLMProvider, LLMResponse, Message, ToolDefinition
+from bluecast.ai.providers.base import (
+    BaseLLMProvider,
+    LLMResponse,
+    Message,
+    ToolDefinition,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -36,15 +41,15 @@ def __init__(
     @property
     @abstractmethod
     def name(self) -> str:
-        ...
+        pass
 
     @abstractmethod
     def system_prompt(self) -> str:
-        ...
+        pass
 
     @abstractmethod
     def get_tools(self) -> List[ToolDefinition]:
-        ...
+        pass
 
     def register_tool_impl(self, name: str, func: Callable) -> None:
         self._tool_implementations[name] = func
@@ -75,7 +80,9 @@ def run(self, task: str) -> str:
             print(f"  [{self.name}] Starting: {task[:80]}...")
 
         self.context.log(
-            self.name, task[:500], event_type="task",
+            self.name,
+            task[:500],
+            event_type="task",
             metadata={"full_task_length": len(task)},
         )
 
@@ -87,7 +94,7 @@ def run(self, task: str) -> str:
         tools = self.get_tools()
         response: Optional[LLMResponse] = None
 
-        for iteration in range(MAX_TOOL_ITERATIONS):
+        for _iteration in range(MAX_TOOL_ITERATIONS):
             response = self.llm.chat(messages, tools=tools if tools else None)
 
             if response.has_tool_calls:
@@ -145,7 +152,9 @@ def run(self, task: str) -> str:
 
         final_text = response.text if response else "Agent reached max tool iterations."
         self.context.log(
-            self.name, final_text[:300], event_type="error",
+            self.name,
+            final_text[:300],
+            event_type="error",
             metadata={"reason": "max_iterations_reached"},
         )
         return final_text
@@ -1,6 +1,5 @@
 """Feature engineer agent: creates new features based on data analysis."""
 
-import json
 from typing import List
 
 from bluecast.ai.agents.base import BaseAgent
@@ -19,8 +18,15 @@ def __init__(self, *args, **kwargs):
     def _create_feature_wrapper(self, feature_code: str, description: str = "", **kw):
         if self.context.engineered_df is not None:
             df = self.context.engineered_df
-        else:
+        elif self.context.df_train is not None:
             df = self.context.df_train.copy()
+        else:
+            return {
+                "success": False,
+                "new_columns": [],
+                "shape": [],
+                "error": "No training data available.",
+            }
 
         result = tool_create_feature(df, feature_code)
         if result["success"]:
@@ -40,7 +46,9 @@ def system_prompt(self) -> str:
         profile = self.context.data_profile or "Not yet profiled."
         hints = ""
         if self.context.data_warnings:
-            hints = "\nData warnings:\n" + "\n".join(f"- {w}" for w in self.context.data_warnings)
+            hints = "\nData warnings:\n" + "\n".join(
+                f"- {w}" for w in self.context.data_warnings
+            )
 
         return f"""You are a feature engineer for the BlueCast AutoML framework.
 
 
@@ -1,6 +1,5 @@
 """Pipeline builder agent: generates and runs BlueCast pipelines."""
 
-import json
 from typing import List
 
 from bluecast.ai.agents.base import BaseAgent
@@ -17,7 +16,11 @@ def __init__(self, *args, **kwargs):
         )
 
     def _build_wrapper(self, **config):
-        df = self.context.engineered_df if self.context.engineered_df is not None else self.context.df_train
+        df = (
+            self.context.engineered_df
+            if self.context.engineered_df is not None
+            else self.context.df_train
+        )
         result = tool_build_and_run_pipeline(df, self.context.target_col, config)
 
         run_record = {
@@ -77,7 +80,9 @@ def _generate_pipeline_code(self, config: dict) -> None:
 
         strategy = config.get("ensemble_strategy", "mean")
         if config.get("use_cv", True):
-            lines.append(f"ensemble_config = EnsembleConfig(ensemble_strategy=\"{strategy}\")")
+            lines.append(
+                f'ensemble_config = EnsembleConfig(ensemble_strategy="{strategy}")'
+            )
             lines.append("")
 
         lines.append("pipeline = BlueCastAuto(")
@@ -88,7 +93,9 @@ def _generate_pipeline_code(self, config: dict) -> None:
             lines.append("    ensemble_config=ensemble_config,")
         lines.append(")")
         lines.append("")
-        lines.append(f"pipeline.fit_eval(df_train, target_col=\"{self.context.target_col}\")")
+        lines.append(
+            f'pipeline.fit_eval(df_train, target_col="{self.context.target_col}")'
+        )
 
         self.context.pipeline_code = "\n".join(lines)
 
 
@@ -67,13 +67,20 @@ def build_report_task(self) -> str:
             )
 
         if self.context.data_profile:
-            sections.append(f"\nData profile:\n{json.dumps(self.context.data_profile, indent=2, default=str)[:3000]}")
+            sections.append(
+                f"\nData profile:\n{json.dumps(self.context.data_profile, indent=2, default=str)[:3000]}"
+            )
 
         if self.context.data_warnings:
-            sections.append("\nWarnings:\n" + "\n".join(f"- {w}" for w in self.context.data_warnings))
+            sections.append(
+                "\nWarnings:\n"
+                + "\n".join(f"- {w}" for w in self.context.data_warnings)
+            )
 
         if self.context.feature_engineering_code:
-            sections.append(f"\nFeature engineering code:\n```python\n{self.context.feature_engineering_code}\n```")
+            sections.append(
+                f"\nFeature engineering code:\n```python\n{self.context.feature_engineering_code}\n```"
+            )
         else:
             sections.append("\nNo feature engineering was applied.")
 
@@ -90,10 +97,14 @@ def build_report_task(self) -> str:
             sections.append(f"\nBest metrics: {self.context.best_metrics}")
 
         if self.context.web_research:
-            sections.append(f"\nWeb research findings:\n{self.context.web_research[:1000]}")
+            sections.append(
+                f"\nWeb research findings:\n{self.context.web_research[:1000]}"
+            )
 
         if self.context.pipeline_code:
-            sections.append(f"\nGenerated pipeline code:\n```python\n{self.context.pipeline_code}\n```")
+            sections.append(
+                f"\nGenerated pipeline code:\n```python\n{self.context.pipeline_code}\n```"
+            )
 
         # Include a selection of the structured log
         log_entries = self.context.structured_log[-30:]
 
@@ -10,7 +10,9 @@
 class ResearcherAgent(BaseAgent):
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        self.register_tool_impl("web_search", lambda query, **kw: tool_web_search(query))
+        self.register_tool_impl(
+            "web_search", lambda query, **kw: tool_web_search(query)
+        )
 
     @property
     def name(self) -> str:
 
@@ -116,7 +116,7 @@ def get_data_summary(self) -> str:
         df = self.get_working_df()
         lines = []
 
-        if self.was_sampled:
+        if self.was_sampled and self.original_shape is not None:
             lines.append(
                 f"NOTE: Working on a stratified sample of {df.shape[0]} rows "
                 f"(original: {self.original_shape[0]} rows x {self.original_shape[1]} cols). "
 
@@ -7,7 +7,6 @@
 from typing import Optional
 
 import dill
-import numpy as np
 import pandas as pd
 
 from bluecast.ai.agents.data_analyst import DataAnalystAgent
@@ -116,14 +115,20 @@ def _apply_smart_sampling(self) -> None:
             target = self.context.target_col
             if target in df.columns and df[target].nunique() <= 20:
                 # Stratified sampling for classification
-                sample_df = df.groupby(target, group_keys=False).apply(
-                    lambda x: x.sample(
-                        n=min(len(x), max(1, int(max_rows * len(x) / n_rows))),
-                        random_state=42,
+                sample_df = (
+                    df.groupby(target, group_keys=False)
+                    .apply(
+                        lambda x: x.sample(
+                            n=min(len(x), max(1, int(max_rows * len(x) / n_rows))),
+                            random_state=42,
+                        )
                     )
-                ).reset_index(drop=True)
+                    .reset_index(drop=True)
+                )
             else:
-                sample_df = df.sample(n=max_rows, random_state=42).reset_index(drop=True)
+                sample_df = df.sample(n=max_rows, random_state=42).reset_index(
+                    drop=True
+                )
 
             msg = (
                 f"Dataset sampled: {n_rows} -> {len(sample_df)} rows "
@@ -167,7 +172,8 @@ def _save_checkpoint(self, step_name: str) -> None:
             with open(path, "wb") as f:
                 dill.dump(self.context, f)
             self.context.log(
-                "Orchestrator", f"Checkpoint saved after '{step_name}'",
+                "Orchestrator",
+                f"Checkpoint saved after '{step_name}'",
                 event_type="checkpoint",
             )
             if self.config.verbose:
@@ -202,8 +208,13 @@ def _load_checkpoint(self) -> bool:
 
             # Re-attach context to all agents
             for agent in [
-                self.planner, self.analyst, self.engineer,
-                self.builder, self.evaluator, self.researcher, self.reporter,
+                self.planner,
+                self.analyst,
+                self.engineer,
+                self.builder,
+                self.evaluator,
+                self.researcher,
+                self.reporter,
             ]:
                 agent.context = self.context
 
@@ -233,7 +244,7 @@ def run(self) -> BlueCastAIResult:
             print("BlueCastAI - Multi-Agent AutoML Pipeline")
             print("=" * 60)
 
-        resumed = self._load_checkpoint()
+        self._load_checkpoint()
 
         # --- Step 0: Smart sampling ---
         if not self._is_step_done("sampling"):
@@ -273,7 +284,9 @@ def run(self) -> BlueCastAIResult:
 
         # --- Step 5: Build-Evaluate-Improve loop ---
         if not self._is_step_done("build_loop"):
-            max_iterations = plan.get("max_iterations", self.config.get_max_iterations())
+            max_iterations = plan.get(
+                "max_iterations", self.config.get_max_iterations()
+            )
             self._step_build_loop(plan, max_iterations)
             self._save_checkpoint("build_loop")
 
@@ -324,8 +337,10 @@ def _step_plan(self) -> dict:
 
         self.context.class_problem = plan.get("class_problem", "binary")
         self.context.log(
-            "Orchestrator", f"Plan: {json.dumps(plan, indent=2)}",
-            event_type="plan", metadata={"plan": plan},
+            "Orchestrator",
+            f"Plan: {json.dumps(plan, indent=2)}",
+            event_type="plan",
+            metadata={"plan": plan},
         )
 
         if self.config.verbose:
@@ -341,7 +356,11 @@ def _step_plan(self) -> dict:
     def _reconstruct_plan(self) -> dict:
         """Reconstruct the plan from structured log metadata."""
         for entry in self.context.structured_log:
-            if entry.event_type == "plan" and entry.metadata and "plan" in entry.metadata:
+            if (
+                entry.event_type == "plan"
+                and entry.metadata
+                and "plan" in entry.metadata
+            ):
                 return entry.metadata["plan"]
         return self.planner._default_plan()
 
@@ -362,7 +381,14 @@ def _step_analyze(self) -> None:
         )
         self.context.data_profile = {"summary": result}
 
-        for keyword in ["leakage", "imbalance", "missing", "null", "duplicate", "constant"]:
+        for keyword in [
+            "leakage",
+            "imbalance",
+            "missing",
+            "null",
+            "duplicate",
+            "constant",
+        ]:
             if keyword in result.lower():
                 self.context.data_warnings.append(
                     f"Data analyst flagged: {keyword} detected"
@@ -373,7 +399,9 @@ def _step_feature_engineer(self, plan: dict) -> None:
             print("\nStep 4: Engineering features...")
 
         hints = plan.get("feature_engineering_hints", [])
-        hint_text = "\n".join(f"- {h}" for h in hints) if hints else "Use your judgment."
+        hint_text = (
+            "\n".join(f"- {h}" for h in hints) if hints else "Use your judgment."
+        )
 
         task = (
             f"Create useful features for this {self.context.class_problem} problem.\n"
@@ -383,7 +411,11 @@ def _step_feature_engineer(self, plan: dict) -> None:
         self.engineer.run(task)
 
         if self.context.engineered_df is not None and self.config.verbose:
-            orig_cols = len(self.context.df_train.columns)
+            orig_cols = (
+                len(self.context.df_train.columns)
+                if self.context.df_train is not None
+                else 0
+            )
             new_cols = len(self.context.engineered_df.columns)
             print(f"  Features: {orig_cols} -> {new_cols} columns")
 
@@ -398,7 +430,9 @@ def _step_build_loop(self, plan: dict, max_iterations: int) -> None:
             build_task = self._create_build_task(plan, iteration)
             self.builder.run(build_task)
 
-            latest_run = self.context.run_history[-1] if self.context.run_history else None
+            latest_run = (
+                self.context.run_history[-1] if self.context.run_history else None
+            )
             if latest_run and self.config.verbose:
                 status = "OK" if latest_run["success"] else "FAILED"
                 print(f"    Result [{status}]: {latest_run.get('metrics', {})}")