diff --git a/src/fhda/Dockerfile.pinned b/src/fhda/Dockerfile.pinned
index 64aa8e9..1add015 100644
--- a/src/fhda/Dockerfile.pinned
+++ b/src/fhda/Dockerfile.pinned
@@ -81,13 +81,13 @@ COPY kernel_requirements.txt .
RUN mamba install -c conda-forge --file kernel_requirements.txt -y
# Install pip packages
-RUN pip install aiodocker ldp==0.23.0 fhaviary[server]==0.18.1 crow-client==0.3.6
+RUN pip install aiodocker ldp==0.26.0 fhaviary[server]==0.19.0 futurehouse-client==0.3.14
# Certain tools are not easily installable via conda. A common practice for
# bioinformaticians is to use udocker to run certain heavy duty omics processing
# tools in an isolated environment
-RUN udocker --allow-root install && \
- udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
+# RUN udocker --allow-root install && \
+# udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
WORKDIR /workspace
diff --git a/src/fhda/config.py b/src/fhda/config.py
index 4b0226b..0c62055 100644
--- a/src/fhda/config.py
+++ b/src/fhda/config.py
@@ -2,7 +2,6 @@
from pathlib import Path
USE_DOCKER = bool(os.getenv("USE_DOCKER", "true").lower() == "true")
-USE_R = bool(os.getenv("USE_R", "false").lower() == "true")
NB_ENVIRONMENT_DOCKER_IMAGE = os.getenv(
"NB_ENVIRONMENT_DOCKER_IMAGE", "futurehouse/bixbench:aviary-notebook-env"
)
diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py
index f74cb2d..4b71dfd 100644
--- a/src/fhda/data_analysis_env.py
+++ b/src/fhda/data_analysis_env.py
@@ -162,13 +162,16 @@ def from_task(
f"\n"
f"{task}\n"
f"\n"
- f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n"
- f"{prompts.GENERAL_NOTEBOOK_GUIDELINES}"
+ f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
+ f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
)
logger.info("Trajectory path: %s", trajectory_path)
nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
logger.info("NB path: %s", nb_path)
- language = NBLanguage.PYTHON # In future, this should be a hyperparameter
+ language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
+ # Overwrite the language in the kwargs with NBLanguage enum
+ kwargs["language"] = language
+ logger.info("Language: %s", language.name)
if language == NBLanguage.R:
task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"
@@ -188,7 +191,6 @@ def from_task(
eval_mode=EvalAnswerMode.LLM,
nb_path=nb_path,
work_dir=trajectory_path,
- language=language,
system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
use_tmp_work_dir=False,
**kwargs,
diff --git a/src/fhda/notebook_env.py b/src/fhda/notebook_env.py
index 081ee30..43865f0 100644
--- a/src/fhda/notebook_env.py
+++ b/src/fhda/notebook_env.py
@@ -49,9 +49,6 @@ def __init__(
self.reload_nb()
else:
self.nb = nbformat.v4.new_notebook()
- if cfg.USE_R:
- # Add initial cell with rpy2 extension load
- nbformat.v4.new_code_cell(source="%load_ext rpy2.ipython")
self.nb.metadata.kernelspec = self.language.make_kernelspec()
self.notebook_runtime_errors: list[str] = []
diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py
index 349dcdb..f0ed2b5 100644
--- a/src/fhda/prompts.py
+++ b/src/fhda/prompts.py
@@ -54,38 +54,11 @@
- Check dataframe shapes before printing. Use head() for large dataframes.
- Ensure each cell executes successfully before moving to the next.
- Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use mamba or conda.
-IMPORTANT: R vs Python vs bash
-- You can use either Python, R or bash cells to complete the analysis.
-- All cells are by default Python cells. However, you can use both bash and R cells by adding %%bash or %%R to the first line of the cell.
-- The first cell has already been loaded with %load_ext rpy2.ipython so you can use %%R cells from the second cell onwards
-"""
-
-# General notebook guidelines
-GENERAL_NOTEBOOK_GUIDELINES_PYTHON = """
-General Guidelines:
-- Write small to medium-sized cells for easier debugging.
-- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
-- Check dataframe shapes before printing. Use head() for large dataframes.
-- Ensure each cell executes successfully before moving to the next.
-- Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use pip.
-- All cells are by default Python cells. Use python or bash tools for all analysis.
+- If you need to install packages, use pip or mamba.
+- All cells are by default {language} cells. Use {language} or bash tools for all analysis.
- You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
"""
-GENERAL_NOTEBOOK_GUIDELINES_R = """
-General Guidelines:
-- Write small to medium-sized cells for easier debugging.
-- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
-- Check dataframe shapes before printing. Use head() for large dataframes.
-- Ensure each cell executes successfully before moving to the next.
-- Assume you already have the packages you need installed and only install new ones if you receive errors.
-- If you need to install packages, use mamba or conda.
-IMPORTANT: Use R cells for all analysis.
-- All cells are by default R cells.
-"""
-
AVOID_IMAGES = """
AVOID USING PLOTS/IMAGES. USE TABLES AND PRINT OUTPUTS INSTEAD AS MUCH AS POSSIBLE.
@@ -139,68 +112,7 @@
2. Load Data and Perform Descriptive Statistics:
- Identify which data files are most relevant to resolving the task. List these files.
-- Plan how to load these files efficiently in R or Python.
-- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
-- Consider potential issues like missing data or unexpected formats. How will you handle each?
-- Plan how to present this information clearly in the notebook.
-- Write down key statistics you expect to see and how you'll interpret them.
-- Consider potential data quality issues and how you'll address them.
-
-Execute your plan to load data and perform descriptive statistics.
-
-3. Develop Analysis Plan:
-
-- Break down each task into testable components. List these components.
-- For each component, list appropriate statistical tests or visualizations.
-- Consider alternative approaches for each component and justify your choices.
-- Identify potential confounding factors and how to address them.
-- Plan the sequence of your analysis steps, explaining the rationale for each.
-- Consider how this analysis plan will be documented in the notebook.
-- List potential statistical assumptions for your chosen methods and how you'll test them.
-- Think about how your analysis plan addresses your original task.
-
-Write out your analysis plan as comments in the notebook.
-
-4. Execute Analysis Plan:
-
-- For each step in your analysis plan, list the R, Python or bash functions and libraries you'll use.
-- Think about how to structure your code for readability and efficiency.
-- Plan how to document your code with clear comments.
-- Consider how to present results clearly, using tables or visualizations where appropriate.
-- Ensure that all outputs are clearly labeled and explained in the context of the task.
-- Plan how you'll interpret each result in relation to the original task.
-- Consider potential unexpected results and how you'll handle them.
-
-Execute your analysis plan, creating new cells as needed.
-
-5. Conclude and Submit Answer:
-
-- Reflect on how your results relate to the original task.
-- Consider any limitations or uncertainties in your analysis.
-- Plan a concise summary of your findings.
-- Think about how to phrase your conclusion as clear statements.
-- Ensure that the notebook contains all necessary information for another model to derive these answers.
-- Consider any additional insights or patterns you've noticed during the analysis.
-- Think about potential follow-up questions or areas for further investigation.
-
-"""
-
-CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON = """
-Follow these steps to create your notebook, using chain-of-thought reasoning at each stage:
-
-1. List Directory Contents:
-
-- Consider how to use the list_workdir tool to recursively list the directory contents.
-- Think about how to organize and present this information clearly in the notebook.
-- List potential challenges in interpreting the directory structure.
-- Consider how the directory structure might inform your approach to the analysis.
-
-Place the output of the list_workdir tool inside tags.
-
-2. Load Data and Perform Descriptive Statistics:
-
-- Identify which data files are most relevant to resolving the task. List these files.
-- Plan how to load these files efficiently in Python.
+- Plan how to load these files efficiently in {language}.
- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
- Consider potential issues like missing data or unexpected formats. How will you handle each?
- Plan how to present this information clearly in the notebook.
@@ -224,7 +136,7 @@
4. Execute Analysis Plan:
-- For each step in your analysis plan, list the Python or bash functions and libraries you'll use.
+- For each step in your analysis plan, list the {language} or bash functions and libraries you'll use.
- Think about how to structure your code for readability and efficiency.
- Plan how to document your code with clear comments.
- Consider how to present results clearly, using tables or visualizations where appropriate.
diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py
index 7ec5626..88981c9 100644
--- a/src/scripts/deploy.py
+++ b/src/scripts/deploy.py
@@ -17,7 +17,6 @@
ENV_VARS = {
"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
"ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
- "USE_R": "false",
"USE_DOCKER": "false",
"STAGE": "PROD",
}
diff --git a/src/scripts/platform_run_jobs.py b/src/scripts/platform_run_jobs.py
index b7ebca1..cfa2d89 100644
--- a/src/scripts/platform_run_jobs.py
+++ b/src/scripts/platform_run_jobs.py
@@ -31,6 +31,7 @@
SUBMIT_ANSWER_PROMPT = prompts.SUBMIT_ANSWER_SINGLE
else:
raise ValueError(f"Dataset {DATASET_NAME} not supported")
+NB_LANGUAGE = "PYTHON"
MODEL = "claude-3-7-sonnet-latest"
TEMPERATURE = 1
NUM_RETRIES = 3
@@ -68,9 +69,9 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
{formatted_question}
- {prompts.CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON}
+ {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=NB_LANGUAGE)}
{SUBMIT_ANSWER_PROMPT}
- {prompts.GENERAL_NOTEBOOK_GUIDELINES_PYTHON}"""
+ {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=NB_LANGUAGE)}"""
if AVOID_IMAGES:
task += prompts.AVOID_IMAGES
@@ -95,7 +96,11 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
agent=agent,
max_steps=MAX_STEPS,
upload_id=capsule["data_folder"],
- environment_config={"run_notebook_on_edit": False, "eval": True},
+ environment_config={
+ "run_notebook_on_edit": False,
+ "eval": True,
+ "language": NB_LANGUAGE,
+ },
),
)
return job_data
diff --git a/tutorial/example.ipynb b/tutorial/example.ipynb
index f5e38d0..701b4f8 100644
--- a/tutorial/example.ipynb
+++ b/tutorial/example.ipynb
@@ -35,21 +35,14 @@
"metadata": {},
"outputs": [],
"source": [
- "# ENVIRONMENT CONFIGURATION\n",
- "\n",
- "# Set your API keys\n",
- "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
- "os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
- "# If using docker, be sure to pull the image from docker hub first\n",
- "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
- "# This image includes many bioinformatics and data science packages\n",
- "cfg.USE_DOCKER = False\n",
- "\n",
- "\n",
- "def setup_data_analysis_env(query: str, dataset_folder: Path):\n",
+ "def setup_data_analysis_env(\n",
+ " query: str, dataset_folder: Path, language: NBLanguage = NBLanguage.PYTHON\n",
+ "):\n",
" # Hash the task to get a unique identifier\n",
" task_hash = hashlib.sha256(query.encode()).hexdigest()\n",
- " trajectory_path = Path(\"tmp_results_dir\") / f\"{task_hash}-{time.time()}\"\n",
+ " trajectory_path = (\n",
+ " Path(os.path.abspath(\"tmp_results_dir\")) / f\"{task_hash}-{time.time()}\"\n",
+ " )\n",
" trajectory_path.mkdir(parents=True, exist_ok=True)\n",
" nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME\n",
" # Copy task data to trajectory path\n",
@@ -58,7 +51,6 @@
" shutil.copy2(item, trajectory_path)\n",
" elif item.is_dir():\n",
" shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n",
- "\n",
" # Augment incoming task with CoT instructions\n",
" augmented_task = f\"\"\"\\\n",
" Here is the user query to address:\n",
@@ -68,11 +60,9 @@
" {query}\n",
" \n",
"\n",
- " {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
- " {prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
+ " {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language.name)}\n",
+ " {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language.name)}\"\"\"\n",
"\n",
- " # This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
- " language = NBLanguage.PYTHON\n",
" if language == NBLanguage.R:\n",
" augmented_task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n",
"\n",
@@ -85,11 +75,32 @@
" language=language,\n",
" system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,\n",
" use_tmp_work_dir=False,\n",
- " # run_notebook_on_edit=False,\n",
+ " run_notebook_on_edit=True if cfg.USE_DOCKER else False,\n",
" )\n",
" return dae"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# ENVIRONMENT CONFIGURATION\n",
+ "\n",
+ "# Set your API keys\n",
+ "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
+ "# os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
+ "# If using docker, be sure to pull the image from docker hub first\n",
+ "# docker pull futurehouse/bixbench:aviary-notebook-env\n",
+ "# This image includes many bioinformatics and data science packages\n",
+ "cfg.USE_DOCKER = False\n",
+ "# This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
+ "LANGUAGE = NBLanguage.R\n",
+ "MAX_STEPS = 3\n",
+ "MODEL_NAME = \"claude-3-7-sonnet-latest\""
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -106,16 +117,16 @@
"\n",
"dataset_folder = Path(\"dataset\")\n",
"query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n",
- "environment = setup_data_analysis_env(query, dataset_folder)\n",
+ "environment = setup_data_analysis_env(query, dataset_folder, LANGUAGE)\n",
"\n",
"agent = AgentConfig(\n",
" agent_type=\"ReActAgent\",\n",
" agent_kwargs={\n",
" \"llm_model\": {\n",
" \"parallel_tool_calls\": False,\n",
- " \"num_retries\": 5,\n",
+ " \"num_retries\": 3,\n",
" \"temperature\": 1.0,\n",
- " \"name\": \"claude-3-7-sonnet-latest\",\n",
+ " \"name\": MODEL_NAME,\n",
" },\n",
" \"hide_old_env_states\": True,\n",
" },\n",
@@ -125,7 +136,9 @@
"rollout = RolloutManager(agent=agent)\n",
"\n",
"# You can see the notebook updating live in the tmp_results_dir folder\n",
- "result = await rollout.sample_trajectories(environments=[environment], max_steps=3)\n",
+ "result = await rollout.sample_trajectories(\n",
+ " environments=[environment], max_steps=MAX_STEPS\n",
+ ")\n",
"\n",
"print(\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n",
"print(f\"Final agent answer:\\n{environment.state.answer}\")"
@@ -207,9 +220,11 @@
"\n",
"# CONFIGURATION\n",
"CROW_STAGE = Stage.PROD\n",
- "API_KEY = os.environ.get(\"CROW_API_KEY_PROD\")\n",
+ "API_KEY = \"\"\n",
"JOB_NAME = \"job-futurehouse-data-analysis-crow-high\"\n",
"MAX_STEPS = 25\n",
+ "LANGUAGE = \"R\"\n",
+ "DATA_GCS_LOCATION = \"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\"\n",
"\n",
"\n",
"client = FutureHouseClient(\n",
@@ -227,21 +242,30 @@
"Make a discovery using this dataset.\n",
"\n",
"\n",
- "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
- "{prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
+ "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n",
+ "{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\"\"\n",
"\n",
"job_data = TaskRequest(\n",
" name=JOB_NAME,\n",
" query=task,\n",
" runtime_config=RuntimeConfig(\n",
" max_steps=MAX_STEPS,\n",
- " upload_id=\"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\", # This is just an example dataset\n",
- " environment_config={\"run_notebook_on_edit\": False, \"eval\": True},\n",
+ " upload_id=DATA_GCS_LOCATION, # This is just an example dataset\n",
+ " environment_config={\n",
+ " \"run_notebook_on_edit\": False,\n",
+ " \"eval\": True,\n",
+ " \"language\": LANGUAGE,\n",
+ " },\n",
+ " # timeout=600,\n",
" ),\n",
")\n",
"job_id = client.create_task(job_data)\n",
- "while client.get_task(job_id).status != \"success\":\n",
+ "status = \"in progress\"\n",
+ "while status == \"in progress\":\n",
+ " print(\"Waiting for task to complete... checking again in 15 seconds\")\n",
" time.sleep(15)\n",
+ " status = client.get_task(job_id).status\n",
+ "\n",
"job_result = client.get_task(job_id, verbose=True)\n",
"answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n",
"print(\n",