Future-House · ludomitch · Apr 17, 2025 · Apr 17, 2025
diff --git a/src/fhda/Dockerfile.pinned b/src/fhda/Dockerfile.pinned
@@ -81,13 +81,13 @@ COPY kernel_requirements.txt .
 RUN mamba install -c conda-forge --file kernel_requirements.txt -y
 
 # Install pip packages
-RUN pip install aiodocker ldp==0.23.0 fhaviary[server]==0.18.1 crow-client==0.3.6
+RUN pip install aiodocker ldp==0.26.0 fhaviary[server]==0.19.0 futurehouse-client==0.3.14
 
 # Certain tools are not easily installable via conda. A common practice for
 # bioinformaticians is to use udocker to run certain heavy duty omics processing
 # tools in an isolated environment
-RUN udocker --allow-root install && \
-    udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
+# RUN udocker --allow-root install && \
+#     udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
 
 WORKDIR /workspace
 

diff --git a/src/fhda/config.py b/src/fhda/config.py
@@ -2,7 +2,6 @@
 from pathlib import Path
 
 USE_DOCKER = bool(os.getenv("USE_DOCKER", "true").lower() == "true")
-USE_R = bool(os.getenv("USE_R", "false").lower() == "true")
 NB_ENVIRONMENT_DOCKER_IMAGE = os.getenv(
     "NB_ENVIRONMENT_DOCKER_IMAGE", "futurehouse/bixbench:aviary-notebook-env"
 )

diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py
@@ -162,13 +162,16 @@ def from_task(
                 f"<query>\n"
                 f"{task}\n"
                 f"</query>\n"
-                f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n"
-                f"{prompts.GENERAL_NOTEBOOK_GUIDELINES}"
+                f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
+                f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
             )
         logger.info("Trajectory path: %s", trajectory_path)
         nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
         logger.info("NB path: %s", nb_path)
-        language = NBLanguage.PYTHON  # In future, this should be a hyperparameter
+        language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
+        # Overwrite the language in the kwargs with NBLanguage enum
+        kwargs["language"] = language
+        logger.info("Language: %s", language.name)
         if language == NBLanguage.R:
             task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"
 
@@ -188,7 +191,6 @@ def from_task(
             eval_mode=EvalAnswerMode.LLM,
             nb_path=nb_path,
             work_dir=trajectory_path,
-            language=language,
             system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
             use_tmp_work_dir=False,
             **kwargs,

diff --git a/src/fhda/notebook_env.py b/src/fhda/notebook_env.py
@@ -49,9 +49,6 @@ def __init__(
             self.reload_nb()
         else:
             self.nb = nbformat.v4.new_notebook()
-            if cfg.USE_R:
-                # Add initial cell with rpy2 extension load
-                nbformat.v4.new_code_cell(source="%load_ext rpy2.ipython")
             self.nb.metadata.kernelspec = self.language.make_kernelspec()
         self.notebook_runtime_errors: list[str] = []
 

diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py
@@ -54,38 +54,11 @@
 - Check dataframe shapes before printing. Use head() for large dataframes.
 - Ensure each cell executes successfully before moving to the next.
 - Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use mamba or conda.
-IMPORTANT: R vs Python vs bash
-- You can use either Python, R or bash cells to complete the analysis.
-- All cells are by default Python cells. However, you can use both bash and R cells by adding %%bash or %%R to the first line of the cell.
-- The first cell has already been loaded with %load_ext rpy2.ipython so you can use %%R cells from the second cell onwards
-"""
-
-# General notebook guidelines
-GENERAL_NOTEBOOK_GUIDELINES_PYTHON = """
-General Guidelines:
-- Write small to medium-sized cells for easier debugging.
-- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
-- Check dataframe shapes before printing. Use head() for large dataframes.
-- Ensure each cell executes successfully before moving to the next.
-- Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use pip.
-- All cells are by default Python cells. Use python or bash tools for all analysis.
+- If you need to install packages, use pip or mamba.
+- All cells are by default {language} cells. Use {language} or bash tools for all analysis.
 - You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
 """
 
-GENERAL_NOTEBOOK_GUIDELINES_R = """
-General Guidelines:
-- Write small to medium-sized cells for easier debugging.
-- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
-- Check dataframe shapes before printing. Use head() for large dataframes.
-- Ensure each cell executes successfully before moving to the next.
-- Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use mamba or conda.
-IMPORTANT: Use R cells for all analysis.
-- All cells are by default R cells.
-"""
-
 
 AVOID_IMAGES = """
 AVOID USING PLOTS/IMAGES. USE TABLES AND PRINT OUTPUTS INSTEAD AS MUCH AS POSSIBLE.
@@ -139,68 +112,7 @@
 2. Load Data and Perform Descriptive Statistics:
 <analysis_planning>
 - Identify which data files are most relevant to resolving the task. List these files.
-- Plan how to load these files efficiently in R or Python.
-- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
-- Consider potential issues like missing data or unexpected formats. How will you handle each?
-- Plan how to present this information clearly in the notebook.
-- Write down key statistics you expect to see and how you'll interpret them.
-- Consider potential data quality issues and how you'll address them.
-</analysis_planning>
-Execute your plan to load data and perform descriptive statistics.
-
-3. Develop Analysis Plan:
-<analysis_planning>
-- Break down each task into testable components. List these components.
-- For each component, list appropriate statistical tests or visualizations.
-- Consider alternative approaches for each component and justify your choices.
-- Identify potential confounding factors and how to address them.
-- Plan the sequence of your analysis steps, explaining the rationale for each.
-- Consider how this analysis plan will be documented in the notebook.
-- List potential statistical assumptions for your chosen methods and how you'll test them.
-- Think about how your analysis plan addresses your original task.
-</analysis_planning>
-Write out your analysis plan as comments in the notebook.
-
-4. Execute Analysis Plan:
-<analysis_planning>
-- For each step in your analysis plan, list the R, Python or bash functions and libraries you'll use.
-- Think about how to structure your code for readability and efficiency.
-- Plan how to document your code with clear comments.
-- Consider how to present results clearly, using tables or visualizations where appropriate.
-- Ensure that all outputs are clearly labeled and explained in the context of the task.
-- Plan how you'll interpret each result in relation to the original task.
-- Consider potential unexpected results and how you'll handle them.
-</analysis_planning>
-Execute your analysis plan, creating new cells as needed.
-
-5. Conclude and Submit Answer:
-<thought_process>
-- Reflect on how your results relate to the original task.
-- Consider any limitations or uncertainties in your analysis.
-- Plan a concise summary of your findings.
-- Think about how to phrase your conclusion as clear statements.
-- Ensure that the notebook contains all necessary information for another model to derive these answers.
-- Consider any additional insights or patterns you've noticed during the analysis.
-- Think about potential follow-up questions or areas for further investigation.
-</thought_process>
-"""
-
-CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON = """
-Follow these steps to create your notebook, using chain-of-thought reasoning at each stage:
-
-1. List Directory Contents:
-<analysis_planning>
-- Consider how to use the list_workdir tool to recursively list the directory contents.
-- Think about how to organize and present this information clearly in the notebook.
-- List potential challenges in interpreting the directory structure.
-- Consider how the directory structure might inform your approach to the analysis.
-</analysis_planning>
-Place the output of the list_workdir tool inside <directory_contents> tags.
-
-2. Load Data and Perform Descriptive Statistics:
-<analysis_planning>
-- Identify which data files are most relevant to resolving the task. List these files.
-- Plan how to load these files efficiently in Python.
+- Plan how to load these files efficiently in {language}.
 - List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
 - Consider potential issues like missing data or unexpected formats. How will you handle each?
 - Plan how to present this information clearly in the notebook.
@@ -224,7 +136,7 @@
 
 4. Execute Analysis Plan:
 <analysis_planning>
-- For each step in your analysis plan, list the Python or bash functions and libraries you'll use.
+- For each step in your analysis plan, list the {language} or bash functions and libraries you'll use.
 - Think about how to structure your code for readability and efficiency.
 - Plan how to document your code with clear comments.
 - Consider how to present results clearly, using tables or visualizations where appropriate.

diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py
@@ -17,7 +17,6 @@
 ENV_VARS = {
     "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
     "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
-    "USE_R": "false",
     "USE_DOCKER": "false",
     "STAGE": "PROD",
 }

diff --git a/src/scripts/platform_run_jobs.py b/src/scripts/platform_run_jobs.py
@@ -31,6 +31,7 @@
     SUBMIT_ANSWER_PROMPT = prompts.SUBMIT_ANSWER_SINGLE
 else:
     raise ValueError(f"Dataset {DATASET_NAME} not supported")
+NB_LANGUAGE = "PYTHON"
 MODEL = "claude-3-7-sonnet-latest"
 TEMPERATURE = 1
 NUM_RETRIES = 3
@@ -68,9 +69,9 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
             {formatted_question}
             </query>
 
-            {prompts.CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON}
+            {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=NB_LANGUAGE)}
             {SUBMIT_ANSWER_PROMPT}
-            {prompts.GENERAL_NOTEBOOK_GUIDELINES_PYTHON}"""
+            {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=NB_LANGUAGE)}"""
 
     if AVOID_IMAGES:
         task += prompts.AVOID_IMAGES
@@ -95,7 +96,11 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
             agent=agent,
             max_steps=MAX_STEPS,
             upload_id=capsule["data_folder"],
-            environment_config={"run_notebook_on_edit": False, "eval": True},
+            environment_config={
+                "run_notebook_on_edit": False,
+                "eval": True,
+                "language": NB_LANGUAGE,
+            },
         ),
     )
     return job_data

diff --git a/tutorial/example.ipynb b/tutorial/example.ipynb
@@ -35,21 +35,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ENVIRONMENT CONFIGURATION\n",
-    "\n",
-    "# Set your API keys\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
-    "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
-    "# If using docker, be sure to pull the image from docker hub first\n",
-    "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
-    "# This image includes many bioinformatics and data science packages\n",
-    "cfg.USE_DOCKER = False\n",
-    "\n",
-    "\n",
-    "def setup_data_analysis_env(query: str, dataset_folder: Path):\n",
+    "def setup_data_analysis_env(\n",
+    "    query: str, dataset_folder: Path, language: NBLanguage = NBLanguage.PYTHON\n",
+    "):\n",
     "    # Hash the task to get a unique identifier\n",
     "    task_hash = hashlib.sha256(query.encode()).hexdigest()\n",
-    "    trajectory_path = Path(\"tmp_results_dir\") / f\"{task_hash}-{time.time()}\"\n",
+    "    trajectory_path = (\n",
+    "        Path(os.path.abspath(\"tmp_results_dir\")) / f\"{task_hash}-{time.time()}\"\n",
+    "    )\n",
     "    trajectory_path.mkdir(parents=True, exist_ok=True)\n",
     "    nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME\n",
     "    # Copy task data to trajectory path\n",
@@ -58,7 +51,6 @@
     "            shutil.copy2(item, trajectory_path)\n",
     "        elif item.is_dir():\n",
     "            shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n",
-    "\n",
     "    # Augment incoming task with CoT instructions\n",
     "    augmented_task = f\"\"\"\\\n",
     "    Here is the user query to address:\n",
@@ -68,11 +60,9 @@
     "    {query}\n",
     "    </query>\n",
     "\n",
-    "    {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
-    "    {prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
+    "    {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language.name)}\n",
+    "    {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language.name)}\"\"\"\n",
     "\n",
-    "    # This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
-    "    language = NBLanguage.PYTHON\n",
     "    if language == NBLanguage.R:\n",
     "        augmented_task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n",
     "\n",
@@ -85,11 +75,32 @@
     "        language=language,\n",
     "        system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,\n",
     "        use_tmp_work_dir=False,\n",
-    "        # run_notebook_on_edit=False,\n",
+    "        run_notebook_on_edit=True if cfg.USE_DOCKER else False,\n",
     "    )\n",
     "    return dae"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ENVIRONMENT CONFIGURATION\n",
+    "\n",
+    "# Set your API keys\n",
+    "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
+    "# os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+    "# If using docker, be sure to pull the image from docker hub first\n",
+    "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
+    "# This image includes many bioinformatics and data science packages\n",
+    "cfg.USE_DOCKER = False\n",
+    "# This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
+    "LANGUAGE = NBLanguage.R\n",
+    "MAX_STEPS = 3\n",
+    "MODEL_NAME = \"claude-3-7-sonnet-latest\""
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -106,16 +117,16 @@
     "\n",
     "dataset_folder = Path(\"dataset\")\n",
     "query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n",
-    "environment = setup_data_analysis_env(query, dataset_folder)\n",
+    "environment = setup_data_analysis_env(query, dataset_folder, LANGUAGE)\n",
     "\n",
     "agent = AgentConfig(\n",
     "    agent_type=\"ReActAgent\",\n",
     "    agent_kwargs={\n",
     "        \"llm_model\": {\n",
     "            \"parallel_tool_calls\": False,\n",
-    "            \"num_retries\": 5,\n",
+    "            \"num_retries\": 3,\n",
     "            \"temperature\": 1.0,\n",
-    "            \"name\": \"claude-3-7-sonnet-latest\",\n",
+    "            \"name\": MODEL_NAME,\n",
     "        },\n",
     "        \"hide_old_env_states\": True,\n",
     "    },\n",
@@ -125,7 +136,9 @@
     "rollout = RolloutManager(agent=agent)\n",
     "\n",
     "# You can see the notebook updating live in the tmp_results_dir folder\n",
-    "result = await rollout.sample_trajectories(environments=[environment], max_steps=3)\n",
+    "result = await rollout.sample_trajectories(\n",
+    "    environments=[environment], max_steps=MAX_STEPS\n",
+    ")\n",
     "\n",
     "print(\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n",
     "print(f\"Final agent answer:\\n{environment.state.answer}\")"
@@ -207,9 +220,11 @@
     "\n",
     "# CONFIGURATION\n",
     "CROW_STAGE = Stage.PROD\n",
-    "API_KEY = os.environ.get(\"CROW_API_KEY_PROD\")\n",
+    "API_KEY = \"\"\n",
     "JOB_NAME = \"job-futurehouse-data-analysis-crow-high\"\n",
     "MAX_STEPS = 25\n",
+    "LANGUAGE = \"R\"\n",
+    "DATA_GCS_LOCATION = \"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\"\n",
     "\n",
     "\n",
     "client = FutureHouseClient(\n",
@@ -227,21 +242,30 @@
     "Make a discovery using this dataset.\n",
     "</query>\n",
     "\n",
-    "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
-    "{prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
+    "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n",
+    "{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\"\"\n",
     "\n",
     "job_data = TaskRequest(\n",
     "    name=JOB_NAME,\n",
     "    query=task,\n",
     "    runtime_config=RuntimeConfig(\n",
     "        max_steps=MAX_STEPS,\n",
-    "        upload_id=\"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\",  # This is just an example dataset\n",
-    "        environment_config={\"run_notebook_on_edit\": False, \"eval\": True},\n",
+    "        upload_id=DATA_GCS_LOCATION,  # This is just an example dataset\n",
+    "        environment_config={\n",
+    "            \"run_notebook_on_edit\": False,\n",
+    "            \"eval\": True,\n",
+    "            \"language\": LANGUAGE,\n",
+    "        },\n",
+    "        # timeout=600,\n",
     "    ),\n",
     ")\n",
     "job_id = client.create_task(job_data)\n",
-    "while client.get_task(job_id).status != \"success\":\n",
+    "status = \"in progress\"\n",
+    "while status == \"in progress\":\n",
+    "    print(\"Waiting for task to complete... checking again in 15 seconds\")\n",
     "    time.sleep(15)\n",
+    "    status = client.get_task(job_id).status\n",
+    "\n",
     "job_result = client.get_task(job_id, verbose=True)\n",
     "answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n",
     "print(\n",