Tool timeout, custom deployment cleanup, DAEnv from_task update (#17)

ludomitch · web-flow · commit 5d70e7d8deec · 2025-05-06T23:41:14.000-07:00
diff --git a/src/fhda/Dockerfile.custom_deployment b/src/fhda/Dockerfile.custom_deployment
@@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1.4
-FROM python:3.12-slim AS base
+FROM python:3.12-slim@sha256:bae1a061b657f403aaacb1069a7f67d91f7ef5725ab17ca36abc5f1b2797ff92 AS base
 
 WORKDIR /app
 ENV PYTHONUNBUFFERED=1
@@ -120,6 +120,12 @@ RUN --mount=type=cache,target=/app/miniconda/pkgs \
 
 ENV UV_COMPILE_BYTECODE=1
 ENV UV_LINK_MODE=copy
+RUN mamba clean -all -y && \
+    rm -rf /app/miniconda/pkgs/* && \
+    find /app/miniconda \( -type d -name __pycache__ -o -type d -name tests -o -type d -name '*.tests' -o -type d -name 'test' \) -exec rm -rf {} + && \
+    find /app/miniconda -type f -name '*.a' -delete && \
+    find /app/miniconda -type f -name '*.js.map' -delete
+
 
 FROM base AS builder
 
diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py
@@ -1,4 +1,3 @@
-import hashlib
 import logging
 import shutil
 from typing import Any, cast
@@ -16,7 +15,7 @@
 from futurehouse_client import FutureHouseClient
 
 from .notebook_env import NBEnvironment
-from .utils import NBLanguage, MultipleChoiceQuestion, nb_to_html
+from .utils import NBLanguage, MultipleChoiceQuestion
 from . import prompts
 from . import config as cfg
 
@@ -150,7 +149,7 @@ def export_frame(self) -> Frame:
                 "done": self.state.done,
                 "total_reward": self.state.total_reward,
                 "nb_state": self.state.nb,
-                "nb_state_html": nb_to_html(self.state.nb),
+                # "nb_state_html": nb_to_html(self.state.nb), # temporarily disabled
                 "nb_runtime_errors": self.state.notebook_runtime_errors,
             },
             info={
@@ -168,6 +167,8 @@ def from_task(
         cls,
         task: str,
         gcs_artifact_path: str | None = None,
+        trajectory_id: str | None = None,
+        user_id: str | None = None,
         environment_config: dict[str, Any] | None = None,
     ) -> "DataAnalysisEnv":
         """
@@ -178,9 +179,11 @@ def from_task(
             gcs_artifact_path: The path to the GCS artifact – required for evaluation on crow jobs
             environment_config: A JSON string of environment configuration
         """
-        logger.info("User task: %s", task[:100])
+        logger.info("User task: %s", task[:50])
         logger.info("GCS artifact path: %s", gcs_artifact_path)
         logger.info("environment_config: %s", environment_config)
+        logger.info("trajectory_id: %s", trajectory_id)
+        logger.info("user_id: %s", user_id)
         # Track cost of running the environment
         enable_cost_tracking()
         if (
@@ -190,6 +193,22 @@ def from_task(
                 "Running crow jobs without gcs_artifact_path is not supported"
             )
 
+        if user_id is None:
+            user_id = "default_user"
+        if trajectory_id is None:
+            trajectory_id = f"{gcs_artifact_path}-{time.time()}"
+
+        # Always create a new directory for the trajectory
+        trajectory_path = (
+            cfg.DATA_STORAGE_PATH / "user_trajectories" / user_id / trajectory_id
+        )
+        logger.info("Trajectory path: %s", trajectory_path)
+        trajectory_path.mkdir(parents=True, exist_ok=True)
+        for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir():
+            if item.is_file():
+                shutil.copy2(item, trajectory_path)
+            elif item.is_dir():
+                shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)
         if environment_config:
             kwargs = {
                 k: v
@@ -200,39 +219,27 @@ def from_task(
             kwargs = {}
             environment_config = {}
         logger.info("Filtered kwargs: %s", kwargs)
-        task_hash = hashlib.sha256(task.encode()).hexdigest()
-        if environment_config.get("eval", False):
-            logger.info("Eval mode is True")
-            # Create a temporary directory in GCP mounted storage volume
-            trajectory_path = cfg.DATA_STORAGE_PATH / f"{task_hash}-{time.time()}"
-            trajectory_path.mkdir(parents=True, exist_ok=True)
-            for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir():
-                if item.is_file():
-                    shutil.copy2(item, trajectory_path)
-                elif item.is_dir():
-                    shutil.copytree(
-                        item, trajectory_path / item.name, dirs_exist_ok=True
-                    )
-        else:
-            logger.info("Eval mode is False")
-            # Use the GCP folder created when uploading the data via the platform
-            trajectory_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path
-            # Augment incoming user query with CoT instructions
+
+        language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
+        # Overwrite the language in the kwargs with NBLanguage enum
+        kwargs["language"] = language
+        logger.info("Language: %s", language.name)
+
+        if not environment_config.get("eval", False):
+            logger.info(
+                "Platform job detected, augmenting user query with CoT instructions"
+            )
+            # If running via the platform, augment incoming user query with CoT instructions
             task = (
-                f"Here is the user query to address:\n"
+                f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
+                f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
+                f"Here is the research question to address:\n"
                 f"<query>\n"
                 f"{task}\n"
                 f"</query>\n"
-                f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
-                f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
             )
-        logger.info("Trajectory path: %s", trajectory_path)
         nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
         logger.info("NB path: %s", nb_path)
-        language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
-        # Overwrite the language in the kwargs with NBLanguage enum
-        kwargs["language"] = language
-        logger.info("Language: %s", language.name)
 
         if trajectory_path.exists():
             files = list(trajectory_path.iterdir())
@@ -245,12 +252,14 @@ def from_task(
             raise ValueError(f"Trajectory path does not exist: {trajectory_path}")
 
         return cls(
-            problem_id=f"data-analysis-task-{task_hash}",
+            problem_id=f"data-analysis-task-{trajectory_id}",
             problem=task,
             eval_mode=EvalAnswerMode.LLM,
             nb_path=nb_path,
             work_dir=trajectory_path,
-            system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
+            system_prompt=environment_config.get(
+                "system_prompt", prompts.CAPSULE_SYSTEM_PROMPT_QUERY
+            ),
             use_tmp_work_dir=False,
             **kwargs,
         )
diff --git a/src/fhda/notebook_env.py b/src/fhda/notebook_env.py
@@ -6,6 +6,7 @@
 from pathlib import Path
 from tempfile import mkdtemp
 from typing import Any, ClassVar, Self, cast
+import asyncio
 
 import aiodocker
 import nbformat
@@ -334,11 +335,17 @@ async def _run_notebook_docker(self) -> str:
 
     async def _run_notebook_local(self, cell_idx: int | None = None) -> str:
         """Run notebook using local kernel."""
-        client = self.state.kernel_manager.client()
-        client.start_channels()
-        error_messages = await utils.nbformat_run_notebook(
-            cells=self.state.cells, client=client, cell_idx=cell_idx
-        )
+        try:
+            async with asyncio.timeout(self.EXEC_TIMEOUT):
+                client = self.state.kernel_manager.client()
+                client.start_channels()
+                error_messages = await utils.nbformat_run_notebook(
+                    cells=self.state.cells, client=client, cell_idx=cell_idx
+                )
+        except TimeoutError as err:
+            raise TimeoutError(
+                f"Notebook execution timed out after {self.EXEC_TIMEOUT} seconds"
+            ) from err
         if error_messages:
             self.state.notebook_runtime_errors.extend(error_messages)
         self.state.save_nb()
diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py
@@ -18,9 +18,10 @@
 """
 
 CAPSULE_SYSTEM_PROMPT_QUERY = """
-You are an expert data scientist.
-Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query
-The notebook should contain all necessary artifacts (plots, tables, print outputs, code commentary) to fully answer the query.
+You are an expert bioinformatician and seasoned biological data scientist.
+Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a user query.
+The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions.
+Take your time to think through the question and the data before writing any code, explore the data rigorously and defend your conclusions rigorously.
 """
 
 # Guidelines for R code output optimization
@@ -59,6 +60,7 @@
 - If you need to install packages, use pip or mamba.
 - All cells are by default {language} cells. Use {language} or bash tools for all analysis.
 - You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
+- You can only create code cells, no markdown cells.
 """
 
 
diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py
@@ -12,7 +12,7 @@
 )
 from futurehouse_client.models.app import TaskQueuesConfig
 
-HIGH = False
+HIGH = True
 ENVIRONMENT = "DEV"
 
 ENV_VARS = {
@@ -85,8 +85,9 @@ def rename_dockerfile(path: Path, new_name: str):
         auth_type=AuthType.API_KEY,
         api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"],
     )
-
-    if not HIGH:
+    if HIGH:
+        print("Using custom deployment Dockerfile")
+    else:
         dockerfile_path = Path("src/fhda/Dockerfile.custom_deployment")
         rename_dockerfile(dockerfile_path, "Dockerfile_skip.custom_deployment")