Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/fhda/Dockerfile.pinned
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,13 @@ COPY kernel_requirements.txt .
RUN mamba install -c conda-forge --file kernel_requirements.txt -y

# Install pip packages
RUN pip install aiodocker ldp==0.23.0 fhaviary[server]==0.18.1 crow-client==0.3.6
RUN pip install aiodocker ldp==0.26.0 fhaviary[server]==0.19.0 futurehouse-client==0.3.14

# Certain tools are not easily installable via conda. A common practice for
# bioinformaticians is to use udocker to run certain heavy duty omics processing
# tools in an isolated environment
RUN udocker --allow-root install && \
udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1
# RUN udocker --allow-root install && \
# udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1

WORKDIR /workspace

Expand Down
1 change: 0 additions & 1 deletion src/fhda/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from pathlib import Path

USE_DOCKER = bool(os.getenv("USE_DOCKER", "true").lower() == "true")
USE_R = bool(os.getenv("USE_R", "false").lower() == "true")
NB_ENVIRONMENT_DOCKER_IMAGE = os.getenv(
"NB_ENVIRONMENT_DOCKER_IMAGE", "futurehouse/bixbench:aviary-notebook-env"
)
Expand Down
10 changes: 6 additions & 4 deletions src/fhda/data_analysis_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,13 +162,16 @@ def from_task(
f"<query>\n"
f"{task}\n"
f"</query>\n"
f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n"
f"{prompts.GENERAL_NOTEBOOK_GUIDELINES}"
f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n"
f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}"
)
logger.info("Trajectory path: %s", trajectory_path)
nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME
logger.info("NB path: %s", nb_path)
language = NBLanguage.PYTHON # In future, this should be a hyperparameter
language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
# Overwrite the language in the kwargs with NBLanguage enum
kwargs["language"] = language
logger.info("Language: %s", language.name)
if language == NBLanguage.R:
task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}"

Expand All @@ -188,7 +191,6 @@ def from_task(
eval_mode=EvalAnswerMode.LLM,
nb_path=nb_path,
work_dir=trajectory_path,
language=language,
system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,
use_tmp_work_dir=False,
**kwargs,
Expand Down
3 changes: 0 additions & 3 deletions src/fhda/notebook_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ def __init__(
self.reload_nb()
else:
self.nb = nbformat.v4.new_notebook()
if cfg.USE_R:
# Add initial cell with rpy2 extension load
nbformat.v4.new_code_cell(source="%load_ext rpy2.ipython")
self.nb.metadata.kernelspec = self.language.make_kernelspec()
self.notebook_runtime_errors: list[str] = []

Expand Down
96 changes: 4 additions & 92 deletions src/fhda/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,38 +54,11 @@
- Check dataframe shapes before printing. Use head() for large dataframes.
- Ensure each cell executes successfully before moving to the next.
- Assume you already have the packages you need installed and only install new ones if you receive errors.
- If you need to install packages, use mamba or conda.
IMPORTANT: R vs Python vs bash
- You can use either Python, R or bash cells to complete the analysis.
- All cells are by default Python cells. However, you can use both bash and R cells by adding %%bash or %%R to the first line of the cell.
- The first cell has already been loaded with %load_ext rpy2.ipython so you can use %%R cells from the second cell onwards
"""

# General notebook guidelines
GENERAL_NOTEBOOK_GUIDELINES_PYTHON = """
General Guidelines:
- Write small to medium-sized cells for easier debugging.
- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
- Check dataframe shapes before printing. Use head() for large dataframes.
- Ensure each cell executes successfully before moving to the next.
- Assume you already have the packages you need installed and only install new ones if you receive errors.
- If you need to install packages, use pip.
- All cells are by default Python cells. Use python or bash tools for all analysis.
- If you need to install packages, use pip or mamba.
- All cells are by default {language} cells. Use {language} or bash tools for all analysis.
- You can use bash cells by adding %%bash to the first line of the cell or running a subprocess.
"""

GENERAL_NOTEBOOK_GUIDELINES_R = """
General Guidelines:
- Write small to medium-sized cells for easier debugging.
- Edit existing cells by their index number when fixing bugs, rather than creating new ones.
- Check dataframe shapes before printing. Use head() for large dataframes.
- Ensure each cell executes successfully before moving to the next.
- Assume you already have the packages you need installed and only install new ones if you receive errors.
- If you need to install packages, use mamba or conda.
IMPORTANT: Use R cells for all analysis.
- All cells are by default R cells.
"""


AVOID_IMAGES = """
AVOID USING PLOTS/IMAGES. USE TABLES AND PRINT OUTPUTS INSTEAD AS MUCH AS POSSIBLE.
Expand Down Expand Up @@ -139,68 +112,7 @@
2. Load Data and Perform Descriptive Statistics:
<analysis_planning>
- Identify which data files are most relevant to resolving the task. List these files.
- Plan how to load these files efficiently in R or Python.
- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
- Consider potential issues like missing data or unexpected formats. How will you handle each?
- Plan how to present this information clearly in the notebook.
- Write down key statistics you expect to see and how you'll interpret them.
- Consider potential data quality issues and how you'll address them.
</analysis_planning>
Execute your plan to load data and perform descriptive statistics.

3. Develop Analysis Plan:
<analysis_planning>
- Break down each task into testable components. List these components.
- For each component, list appropriate statistical tests or visualizations.
- Consider alternative approaches for each component and justify your choices.
- Identify potential confounding factors and how to address them.
- Plan the sequence of your analysis steps, explaining the rationale for each.
- Consider how this analysis plan will be documented in the notebook.
- List potential statistical assumptions for your chosen methods and how you'll test them.
- Think about how your analysis plan addresses your original task.
</analysis_planning>
Write out your analysis plan as comments in the notebook.

4. Execute Analysis Plan:
<analysis_planning>
- For each step in your analysis plan, list the R, Python or bash functions and libraries you'll use.
- Think about how to structure your code for readability and efficiency.
- Plan how to document your code with clear comments.
- Consider how to present results clearly, using tables or visualizations where appropriate.
- Ensure that all outputs are clearly labeled and explained in the context of the task.
- Plan how you'll interpret each result in relation to the original task.
- Consider potential unexpected results and how you'll handle them.
</analysis_planning>
Execute your analysis plan, creating new cells as needed.

5. Conclude and Submit Answer:
<thought_process>
- Reflect on how your results relate to the original task.
- Consider any limitations or uncertainties in your analysis.
- Plan a concise summary of your findings.
- Think about how to phrase your conclusion as clear statements.
- Ensure that the notebook contains all necessary information for another model to derive these answers.
- Consider any additional insights or patterns you've noticed during the analysis.
- Think about potential follow-up questions or areas for further investigation.
</thought_process>
"""

CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON = """
Follow these steps to create your notebook, using chain-of-thought reasoning at each stage:

1. List Directory Contents:
<analysis_planning>
- Consider how to use the list_workdir tool to recursively list the directory contents.
- Think about how to organize and present this information clearly in the notebook.
- List potential challenges in interpreting the directory structure.
- Consider how the directory structure might inform your approach to the analysis.
</analysis_planning>
Place the output of the list_workdir tool inside <directory_contents> tags.

2. Load Data and Perform Descriptive Statistics:
<analysis_planning>
- Identify which data files are most relevant to resolving the task. List these files.
- Plan how to load these files efficiently in Python.
- Plan how to load these files efficiently in {language}.
- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()).
- Consider potential issues like missing data or unexpected formats. How will you handle each?
- Plan how to present this information clearly in the notebook.
Expand All @@ -224,7 +136,7 @@

4. Execute Analysis Plan:
<analysis_planning>
- For each step in your analysis plan, list the Python or bash functions and libraries you'll use.
- For each step in your analysis plan, list the {language} or bash functions and libraries you'll use.
- Think about how to structure your code for readability and efficiency.
- Plan how to document your code with clear comments.
- Consider how to present results clearly, using tables or visualizations where appropriate.
Expand Down
1 change: 0 additions & 1 deletion src/scripts/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
ENV_VARS = {
"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
"ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
"USE_R": "false",
"USE_DOCKER": "false",
"STAGE": "PROD",
}
Expand Down
11 changes: 8 additions & 3 deletions src/scripts/platform_run_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
SUBMIT_ANSWER_PROMPT = prompts.SUBMIT_ANSWER_SINGLE
else:
raise ValueError(f"Dataset {DATASET_NAME} not supported")
NB_LANGUAGE = "PYTHON"
MODEL = "claude-3-7-sonnet-latest"
TEMPERATURE = 1
NUM_RETRIES = 3
Expand Down Expand Up @@ -68,9 +69,9 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
{formatted_question}
</query>

{prompts.CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON}
{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=NB_LANGUAGE)}
{SUBMIT_ANSWER_PROMPT}
{prompts.GENERAL_NOTEBOOK_GUIDELINES_PYTHON}"""
{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=NB_LANGUAGE)}"""

if AVOID_IMAGES:
task += prompts.AVOID_IMAGES
Expand All @@ -95,7 +96,11 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest:
agent=agent,
max_steps=MAX_STEPS,
upload_id=capsule["data_folder"],
environment_config={"run_notebook_on_edit": False, "eval": True},
environment_config={
"run_notebook_on_edit": False,
"eval": True,
"language": NB_LANGUAGE,
},
),
)
return job_data
Expand Down
82 changes: 53 additions & 29 deletions tutorial/example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,14 @@
"metadata": {},
"outputs": [],
"source": [
"# ENVIRONMENT CONFIGURATION\n",
"\n",
"# Set your API keys\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
"os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"# If using docker, be sure to pull the image from docker hub first\n",
"# docker pull futurehouse/bixbench:aviary-notebook-env\n",
"# This image includes many bioinformatics and data science packages\n",
"cfg.USE_DOCKER = False\n",
"\n",
"\n",
"def setup_data_analysis_env(query: str, dataset_folder: Path):\n",
"def setup_data_analysis_env(\n",
" query: str, dataset_folder: Path, language: NBLanguage = NBLanguage.PYTHON\n",
"):\n",
" # Hash the task to get a unique identifier\n",
" task_hash = hashlib.sha256(query.encode()).hexdigest()\n",
" trajectory_path = Path(\"tmp_results_dir\") / f\"{task_hash}-{time.time()}\"\n",
" trajectory_path = (\n",
" Path(os.path.abspath(\"tmp_results_dir\")) / f\"{task_hash}-{time.time()}\"\n",
" )\n",
" trajectory_path.mkdir(parents=True, exist_ok=True)\n",
" nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME\n",
" # Copy task data to trajectory path\n",
Expand All @@ -58,7 +51,6 @@
" shutil.copy2(item, trajectory_path)\n",
" elif item.is_dir():\n",
" shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n",
"\n",
" # Augment incoming task with CoT instructions\n",
" augmented_task = f\"\"\"\\\n",
" Here is the user query to address:\n",
Expand All @@ -68,11 +60,9 @@
" {query}\n",
" </query>\n",
"\n",
" {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
" {prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
" {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language.name)}\n",
" {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language.name)}\"\"\"\n",
"\n",
" # This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
" language = NBLanguage.PYTHON\n",
" if language == NBLanguage.R:\n",
" augmented_task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n",
"\n",
Expand All @@ -85,11 +75,32 @@
" language=language,\n",
" system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,\n",
" use_tmp_work_dir=False,\n",
" # run_notebook_on_edit=False,\n",
" run_notebook_on_edit=True if cfg.USE_DOCKER else False,\n",
" )\n",
" return dae"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ENVIRONMENT CONFIGURATION\n",
"\n",
"# Set your API keys\n",
"os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n",
"# os.environ[\"OPENAI_API_KEY\"] = \"\"\n",
"# If using docker, be sure to pull the image from docker hub first\n",
"# docker pull futurehouse/bixbench:aviary-notebook-env\n",
"# This image includes many bioinformatics and data science packages\n",
"cfg.USE_DOCKER = False\n",
"# This can be R or PYTHON in Docker or with a local kernel if you have R installed\n",
"LANGUAGE = NBLanguage.R\n",
"MAX_STEPS = 3\n",
"MODEL_NAME = \"claude-3-7-sonnet-latest\""
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -106,16 +117,16 @@
"\n",
"dataset_folder = Path(\"dataset\")\n",
"query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n",
"environment = setup_data_analysis_env(query, dataset_folder)\n",
"environment = setup_data_analysis_env(query, dataset_folder, LANGUAGE)\n",
"\n",
"agent = AgentConfig(\n",
" agent_type=\"ReActAgent\",\n",
" agent_kwargs={\n",
" \"llm_model\": {\n",
" \"parallel_tool_calls\": False,\n",
" \"num_retries\": 5,\n",
" \"num_retries\": 3,\n",
" \"temperature\": 1.0,\n",
" \"name\": \"claude-3-7-sonnet-latest\",\n",
" \"name\": MODEL_NAME,\n",
" },\n",
" \"hide_old_env_states\": True,\n",
" },\n",
Expand All @@ -125,7 +136,9 @@
"rollout = RolloutManager(agent=agent)\n",
"\n",
"# You can see the notebook updating live in the tmp_results_dir folder\n",
"result = await rollout.sample_trajectories(environments=[environment], max_steps=3)\n",
"result = await rollout.sample_trajectories(\n",
" environments=[environment], max_steps=MAX_STEPS\n",
")\n",
"\n",
"print(\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n",
"print(f\"Final agent answer:\\n{environment.state.answer}\")"
Expand Down Expand Up @@ -207,9 +220,11 @@
"\n",
"# CONFIGURATION\n",
"CROW_STAGE = Stage.PROD\n",
"API_KEY = os.environ.get(\"CROW_API_KEY_PROD\")\n",
"API_KEY = \"\"\n",
"JOB_NAME = \"job-futurehouse-data-analysis-crow-high\"\n",
"MAX_STEPS = 25\n",
"LANGUAGE = \"R\"\n",
"DATA_GCS_LOCATION = \"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\"\n",
"\n",
"\n",
"client = FutureHouseClient(\n",
Expand All @@ -227,21 +242,30 @@
"Make a discovery using this dataset.\n",
"</query>\n",
"\n",
"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n",
"{prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n",
"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n",
"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\"\"\n",
"\n",
"job_data = TaskRequest(\n",
" name=JOB_NAME,\n",
" query=task,\n",
" runtime_config=RuntimeConfig(\n",
" max_steps=MAX_STEPS,\n",
" upload_id=\"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\", # This is just an example dataset\n",
" environment_config={\"run_notebook_on_edit\": False, \"eval\": True},\n",
" upload_id=DATA_GCS_LOCATION, # This is just an example dataset\n",
" environment_config={\n",
" \"run_notebook_on_edit\": False,\n",
" \"eval\": True,\n",
" \"language\": LANGUAGE,\n",
" },\n",
" # timeout=600,\n",
" ),\n",
")\n",
"job_id = client.create_task(job_data)\n",
"while client.get_task(job_id).status != \"success\":\n",
"status = \"in progress\"\n",
"while status == \"in progress\":\n",
" print(\"Waiting for task to complete... checking again in 15 seconds\")\n",
" time.sleep(15)\n",
" status = client.get_task(job_id).status\n",
"\n",
"job_result = client.get_task(job_id, verbose=True)\n",
"answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n",
"print(\n",
Expand Down