Fix custom deployment (#8)

ludomitch · web-flow · commit b76ad46e389f · 2025-04-14T22:43:29.000-07:00
diff --git a/pyproject.toml b/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
   "aiodocker==0.24.0",
   "fhaviary[server]==0.18.1",
   "fhlmi==0.26.0",
+  "fh-llm-client==0.0.11", # TODO deprecate this
   "ldp==0.23.0",
   "pandas==2.2.3",
   "numpy==2.2.3",
@@ -22,7 +23,8 @@ dependencies = [
   "google-auth==2.38.0",
   "google-cloud-storage==3.0.0",
   "google-cloud-secret-manager==2.23.0",
-  "crow-client>=0.3.14",
+  "crow-client>=0.3.14", # TODO deprecate this
+  "futurehouse-client", # TODO pin this
   "jupyter==1.1.1",
   "nbconvert==7.16.6",
   "notebook==7.3.2",
diff --git a/src/fhda/Dockerfile.custom_deployment b/src/fhda/Dockerfile.custom_deployment
@@ -31,7 +31,6 @@ ENV PYTHONPATH="/app/miniconda/lib/python3.12/site-packages:${PYTHONPATH:-}"
 RUN pip3 install --no-cache-dir uv==0.5.21
 RUN conda install -c conda-forge mamba -y
 
-
 # Install R and kernels in the crow_env environment
 RUN mamba install -c conda-forge -y \
     r-base=4.3.3 \
@@ -91,6 +90,7 @@ RUN mamba install -c conda-forge -c bioconda -y \
     gseapy=1.1.4 \
     blast=2.16.0 \
     clipkit=2.3.0 \
+    clustalo=1.2.4 \
     fastqc=0.12.1 \
     iqtree=2.3.6 \
     mafft=7.526 \
@@ -116,7 +116,7 @@ FROM base AS builder
 
 ARG MODULE_NAME
 ARG USE_INTERNAL_DEPS
-ARG USE_GIT_CROW_CLIENT
+ARG USE_GIT_FUTUREHOUSE_CLIENT
 
 
 RUN mkdir -p ~/.ssh && \
@@ -150,12 +150,12 @@ RUN --mount=type=cache,target=/root/.cache/uv \
     else \
         echo 'Skipping aviary_internal install'; \
     fi && \
-    if [ "$USE_GIT_CROW_CLIENT" = "true" ]; then \
+    if [ "$USE_GIT_FUTUREHOUSE_CLIENT" = "true" ]; then \
         git clone git@github.com:Future-House/crow-ecosystem.git /app/crow-ecosystem && \
-        cd /app/crow-ecosystem/packages/crow-client && \
+        cd /app/crow-ecosystem/packages/futurehouse-client && \
         uv pip install --system -e .; \
     else \
-        uv pip install --system crow-client; \
+        uv pip install --system futurehouse-client; \
     fi
 
 WORKDIR /app/${MODULE_NAME}
@@ -174,6 +174,11 @@ RUN --mount=type=ssh \
 RUN find /app -type l -delete && \
     rm -rf /app/.git
 
+# Fix futurehouse_client imports by creating a symlink if we're using the git version
+RUN if [ "$USE_GIT_FUTUREHOUSE_CLIENT" = "true" ]; then \
+    ln -sf /app/crow-ecosystem/packages/futurehouse-client/futurehouse_client /app/miniconda/lib/python3.12/site-packages/futurehouse_client; \
+    fi
+
 FROM base AS runtime
 
 COPY --from=builder /app/ /app/
diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py
@@ -117,7 +117,7 @@ def from_task(
             gcs_artifact_path: The path to the GCS artifact – required for evaluation on crow jobs
             environment_config: A JSON string of environment configuration
         """
-        logger.info("User task: %s", task)
+        logger.info("User task: %s", task[:100])
         logger.info("GCS artifact path: %s", gcs_artifact_path)
         logger.info("environment_config: %s", environment_config)
         # Track cost of running the environment
@@ -137,9 +137,10 @@ def from_task(
             }
         else:
             kwargs = {}
+            environment_config = {}
         logger.info("Filtered kwargs: %s", kwargs)
         task_hash = hashlib.sha256(task.encode()).hexdigest()
-        if kwargs.get("eval", False):
+        if environment_config.get("eval", False):
             logger.info("Eval mode is True")
             # Create a temporary directory in GCP mounted storage volume
             trajectory_path = cfg.DATA_STORAGE_PATH / f"{task_hash}-{time.time()}"
diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py
@@ -70,7 +70,8 @@
 - Ensure each cell executes successfully before moving to the next.
 - Assume you already have the packages you need installed and only install new ones if you receive errors.
 - If you need to install packages, use pip.
-- All cells are by default Python cells. Use python for all analysis.
+- All cells are by default Python cells. Use python or bash tools for all analysis.
+- You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
 """
 
 GENERAL_NOTEBOOK_GUIDELINES_R = """
@@ -223,7 +224,7 @@
 
 4. Execute Analysis Plan:
 <analysis_planning>
-- For each step in your analysis plan, list the Python functions and libraries you'll use.
+- For each step in your analysis plan, list the Python or bash functions and libraries you'll use.
 - Think about how to structure your code for readability and efficiency.
 - Plan how to document your code with clear comments.
 - Consider how to present results clearly, using tables or visualizations where appropriate.
diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py
@@ -60,7 +60,7 @@ def rename_dockerfile(path: Path, new_name: str):
 if __name__ == "__main__":
     client = CrowClient(
         # stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])),
-        stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")),
+        stage=Stage.from_string(os.environ.get("CROW_ENV", "PROD")),
         organization="FutureHouse",
         auth_type=AuthType.API_KEY,
         api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"],
diff --git a/src/scripts/platform_eval.py b/src/scripts/platform_eval.py
@@ -7,8 +7,8 @@
 import pandas as pd
 import logging
 from pathlib import Path
-from crow_client import CrowClient
-from crow_client.models import AuthType, Stage, JobResponse
+from crow_client import CrowClient, JobResponseVerbose
+from crow_client.models import AuthType, Stage
 from aviary.utils import MultipleChoiceQuestion, eval_answer, EvalAnswerMode
 
 
@@ -77,7 +77,7 @@ async def fetch_jobs_batch(
         List of fetched jobs
     """
 
-    async def get_job_async(job_id: str) -> JobResponse:
+    async def get_job_async(job_id: str) -> JobResponseVerbose:
         return await asyncio.to_thread(
             client.get_job, job_id, False, True
         )  # False for history, True for verbose
@@ -327,7 +327,7 @@ async def main(
     parser.add_argument(
         "--job-file-path",
         type=str,
-        default="local/bixbench_runs/baseline-3.7-single-cell-run2-20250325-065452.json",
+        default="local/bixbench_runs/bb50k_v2-20250412-094827.json",
         help="Path to Job data file with all the job IDs",
     )
     parser.add_argument(
@@ -337,7 +337,7 @@ async def main(
         help="Path to save evaluation results",
     )
     parser.add_argument(
-        "--batch-size", type=int, default=50, help="Batch size for job requests"
+        "--batch-size", type=int, default=200, help="Batch size for job requests"
     )
     parser.add_argument(
         "--api-key",
diff --git a/src/scripts/platform_run_jobs.py b/src/scripts/platform_run_jobs.py
@@ -17,8 +17,8 @@
 logger = logging.getLogger(__name__)
 
 ENV = "PROD"
-JOB_NAME = "job-futurehouse-data-analysis-crow"
-CROW_STAGE = getattr(Stage, "LOCAL")  # TODO: Change to ENV
+JOB_NAME = "job-futurehouse-data-analysis-crow-high"
+CROW_STAGE = getattr(Stage, ENV)
 API_KEY = os.environ.get(f"CROW_API_KEY_{ENV}")
 DATASET_NAME = "bb50k"
 if DATASET_NAME == "bixbench":
@@ -142,19 +142,14 @@ async def load_bb50k_data(
     open_question: bool = True,
 ) -> list[dict[str, Any]]:
     """Load the BixBench dataset."""
-    data = json.load(
-        open(
-            "local/bb50k/ngs_analysis_rna_seq_dge_dataset_0_qa_metadata_questions_20250404_210834.json"
-        )
-    )
-    data = data["questions"]
+    data = json.load(open("local/bb50k/single_dataset_per_wf.json"))
     processed_data = []
     for i in data:
         processed_data.append(
             {
-                "data_folder": GCS_ARTIFACT_PATH + "dataset0",
+                "data_folder": f"{GCS_ARTIFACT_PATH}/{i['workflow']}/{i['dataset'].replace('dataset_', '')}",
                 "short_id": i["qa_id"],
-                "categories": i["generator_class"],
+                "generator_class": i["generator_class"],
                 "uuid": i["qa_id"],
                 "domain": i["domain"],
                 "workflow": i["workflow"],
@@ -248,7 +243,7 @@ async def main():
         raise ValueError(f"Dataset {DATASET_NAME} not supported")
 
     if MINI_MODE:
-        data = data[:5]
+        data = data[:2]
 
     jobs = await submit_jobs(data)
     await save_results(jobs, RESULTS_FILE)