diff --git a/src/fhda/Dockerfile.pinned b/src/fhda/Dockerfile.pinned index 64aa8e9..1add015 100644 --- a/src/fhda/Dockerfile.pinned +++ b/src/fhda/Dockerfile.pinned @@ -81,13 +81,13 @@ COPY kernel_requirements.txt . RUN mamba install -c conda-forge --file kernel_requirements.txt -y # Install pip packages -RUN pip install aiodocker ldp==0.23.0 fhaviary[server]==0.18.1 crow-client==0.3.6 +RUN pip install aiodocker ldp==0.26.0 fhaviary[server]==0.19.0 futurehouse-client==0.3.14 # Certain tools are not easily installable via conda. A common practice for # bioinformaticians is to use udocker to run certain heavy duty omics processing # tools in an isolated environment -RUN udocker --allow-root install && \ - udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1 +# RUN udocker --allow-root install && \ +# udocker --allow-root pull ezlabgva/busco:v5.8.0_cv1 WORKDIR /workspace diff --git a/src/fhda/config.py b/src/fhda/config.py index 4b0226b..0c62055 100644 --- a/src/fhda/config.py +++ b/src/fhda/config.py @@ -2,7 +2,6 @@ from pathlib import Path USE_DOCKER = bool(os.getenv("USE_DOCKER", "true").lower() == "true") -USE_R = bool(os.getenv("USE_R", "false").lower() == "true") NB_ENVIRONMENT_DOCKER_IMAGE = os.getenv( "NB_ENVIRONMENT_DOCKER_IMAGE", "futurehouse/bixbench:aviary-notebook-env" ) diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py index f74cb2d..4b71dfd 100644 --- a/src/fhda/data_analysis_env.py +++ b/src/fhda/data_analysis_env.py @@ -162,13 +162,16 @@ def from_task( f"\n" f"{task}\n" f"\n" - f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n" - f"{prompts.GENERAL_NOTEBOOK_GUIDELINES}" + f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n" + f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}" ) logger.info("Trajectory path: %s", trajectory_path) nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME logger.info("NB path: %s", nb_path) - language = NBLanguage.PYTHON # In future, this should be a hyperparameter + language = getattr(NBLanguage, environment_config.get("language", "PYTHON")) + # Overwrite the language in the kwargs with NBLanguage enum + kwargs["language"] = language + logger.info("Language: %s", language.name) if language == NBLanguage.R: task += f"\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}" @@ -188,7 +191,6 @@ def from_task( eval_mode=EvalAnswerMode.LLM, nb_path=nb_path, work_dir=trajectory_path, - language=language, system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY, use_tmp_work_dir=False, **kwargs, diff --git a/src/fhda/notebook_env.py b/src/fhda/notebook_env.py index 081ee30..43865f0 100644 --- a/src/fhda/notebook_env.py +++ b/src/fhda/notebook_env.py @@ -49,9 +49,6 @@ def __init__( self.reload_nb() else: self.nb = nbformat.v4.new_notebook() - if cfg.USE_R: - # Add initial cell with rpy2 extension load - nbformat.v4.new_code_cell(source="%load_ext rpy2.ipython") self.nb.metadata.kernelspec = self.language.make_kernelspec() self.notebook_runtime_errors: list[str] = [] diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py index 349dcdb..f0ed2b5 100644 --- a/src/fhda/prompts.py +++ b/src/fhda/prompts.py @@ -54,38 +54,11 @@ - Check dataframe shapes before printing. Use head() for large dataframes. - Ensure each cell executes successfully before moving to the next. - Assume you already have the packages you need installed and only install new ones if you receive errors. -- If you need to install packages, use mamba or conda. -IMPORTANT: R vs Python vs bash -- You can use either Python, R or bash cells to complete the analysis. -- All cells are by default Python cells. However, you can use both bash and R cells by adding %%bash or %%R to the first line of the cell. -- The first cell has already been loaded with %load_ext rpy2.ipython so you can use %%R cells from the second cell onwards -""" - -# General notebook guidelines -GENERAL_NOTEBOOK_GUIDELINES_PYTHON = """ -General Guidelines: -- Write small to medium-sized cells for easier debugging. -- Edit existing cells by their index number when fixing bugs, rather than creating new ones. -- Check dataframe shapes before printing. Use head() for large dataframes. -- Ensure each cell executes successfully before moving to the next. -- Assume you already have the packages you need installed and only install new ones if you receive errors. -- If you need to install packages, use pip. -- All cells are by default Python cells. Use python or bash tools for all analysis. +- If you need to install packages, use pip or mamba. +- All cells are by default {language} cells. Use {language} or bash tools for all analysis. - You can use bash cells by adding %%bash to the first line of the cell or running a subprocess. """ -GENERAL_NOTEBOOK_GUIDELINES_R = """ -General Guidelines: -- Write small to medium-sized cells for easier debugging. -- Edit existing cells by their index number when fixing bugs, rather than creating new ones. -- Check dataframe shapes before printing. Use head() for large dataframes. -- Ensure each cell executes successfully before moving to the next. -- Assume you already have the packages you need installed and only install new ones if you receive errors. -- If you need to install packages, use mamba or conda. -IMPORTANT: Use R cells for all analysis. -- All cells are by default R cells. -""" - AVOID_IMAGES = """ AVOID USING PLOTS/IMAGES. USE TABLES AND PRINT OUTPUTS INSTEAD AS MUCH AS POSSIBLE. @@ -139,68 +112,7 @@ 2. Load Data and Perform Descriptive Statistics: - Identify which data files are most relevant to resolving the task. List these files. -- Plan how to load these files efficiently in R or Python. -- List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()). -- Consider potential issues like missing data or unexpected formats. How will you handle each? -- Plan how to present this information clearly in the notebook. -- Write down key statistics you expect to see and how you'll interpret them. -- Consider potential data quality issues and how you'll address them. - -Execute your plan to load data and perform descriptive statistics. - -3. Develop Analysis Plan: - -- Break down each task into testable components. List these components. -- For each component, list appropriate statistical tests or visualizations. -- Consider alternative approaches for each component and justify your choices. -- Identify potential confounding factors and how to address them. -- Plan the sequence of your analysis steps, explaining the rationale for each. -- Consider how this analysis plan will be documented in the notebook. -- List potential statistical assumptions for your chosen methods and how you'll test them. -- Think about how your analysis plan addresses your original task. - -Write out your analysis plan as comments in the notebook. - -4. Execute Analysis Plan: - -- For each step in your analysis plan, list the R, Python or bash functions and libraries you'll use. -- Think about how to structure your code for readability and efficiency. -- Plan how to document your code with clear comments. -- Consider how to present results clearly, using tables or visualizations where appropriate. -- Ensure that all outputs are clearly labeled and explained in the context of the task. -- Plan how you'll interpret each result in relation to the original task. -- Consider potential unexpected results and how you'll handle them. - -Execute your analysis plan, creating new cells as needed. - -5. Conclude and Submit Answer: - -- Reflect on how your results relate to the original task. -- Consider any limitations or uncertainties in your analysis. -- Plan a concise summary of your findings. -- Think about how to phrase your conclusion as clear statements. -- Ensure that the notebook contains all necessary information for another model to derive these answers. -- Consider any additional insights or patterns you've noticed during the analysis. -- Think about potential follow-up questions or areas for further investigation. - -""" - -CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON = """ -Follow these steps to create your notebook, using chain-of-thought reasoning at each stage: - -1. List Directory Contents: - -- Consider how to use the list_workdir tool to recursively list the directory contents. -- Think about how to organize and present this information clearly in the notebook. -- List potential challenges in interpreting the directory structure. -- Consider how the directory structure might inform your approach to the analysis. - -Place the output of the list_workdir tool inside tags. - -2. Load Data and Perform Descriptive Statistics: - -- Identify which data files are most relevant to resolving the task. List these files. -- Plan how to load these files efficiently in Python. +- Plan how to load these files efficiently in {language}. - List the specific descriptive statistics you plan to use (e.g., summary(), str(), head()). - Consider potential issues like missing data or unexpected formats. How will you handle each? - Plan how to present this information clearly in the notebook. @@ -224,7 +136,7 @@ 4. Execute Analysis Plan: -- For each step in your analysis plan, list the Python or bash functions and libraries you'll use. +- For each step in your analysis plan, list the {language} or bash functions and libraries you'll use. - Think about how to structure your code for readability and efficiency. - Plan how to document your code with clear comments. - Consider how to present results clearly, using tables or visualizations where appropriate. diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py index 7ec5626..88981c9 100644 --- a/src/scripts/deploy.py +++ b/src/scripts/deploy.py @@ -17,7 +17,6 @@ ENV_VARS = { "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"], "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"], - "USE_R": "false", "USE_DOCKER": "false", "STAGE": "PROD", } diff --git a/src/scripts/platform_run_jobs.py b/src/scripts/platform_run_jobs.py index b7ebca1..cfa2d89 100644 --- a/src/scripts/platform_run_jobs.py +++ b/src/scripts/platform_run_jobs.py @@ -31,6 +31,7 @@ SUBMIT_ANSWER_PROMPT = prompts.SUBMIT_ANSWER_SINGLE else: raise ValueError(f"Dataset {DATASET_NAME} not supported") +NB_LANGUAGE = "PYTHON" MODEL = "claude-3-7-sonnet-latest" TEMPERATURE = 1 NUM_RETRIES = 3 @@ -68,9 +69,9 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest: {formatted_question} - {prompts.CHAIN_OF_THOUGHT_AGNOSTIC_PYTHON} + {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=NB_LANGUAGE)} {SUBMIT_ANSWER_PROMPT} - {prompts.GENERAL_NOTEBOOK_GUIDELINES_PYTHON}""" + {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=NB_LANGUAGE)}""" if AVOID_IMAGES: task += prompts.AVOID_IMAGES @@ -95,7 +96,11 @@ async def prepare_job(capsule: dict[str, Any]) -> JobRequest: agent=agent, max_steps=MAX_STEPS, upload_id=capsule["data_folder"], - environment_config={"run_notebook_on_edit": False, "eval": True}, + environment_config={ + "run_notebook_on_edit": False, + "eval": True, + "language": NB_LANGUAGE, + }, ), ) return job_data diff --git a/tutorial/example.ipynb b/tutorial/example.ipynb index f5e38d0..701b4f8 100644 --- a/tutorial/example.ipynb +++ b/tutorial/example.ipynb @@ -35,21 +35,14 @@ "metadata": {}, "outputs": [], "source": [ - "# ENVIRONMENT CONFIGURATION\n", - "\n", - "# Set your API keys\n", - "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n", - "os.environ[\"OPENAI_API_KEY\"] = \"\"\n", - "# If using docker, be sure to pull the image from docker hub first\n", - "# docker pull futurehouse/bixbench:aviary-notebook-env\n", - "# This image includes many bioinformatics and data science packages\n", - "cfg.USE_DOCKER = False\n", - "\n", - "\n", - "def setup_data_analysis_env(query: str, dataset_folder: Path):\n", + "def setup_data_analysis_env(\n", + " query: str, dataset_folder: Path, language: NBLanguage = NBLanguage.PYTHON\n", + "):\n", " # Hash the task to get a unique identifier\n", " task_hash = hashlib.sha256(query.encode()).hexdigest()\n", - " trajectory_path = Path(\"tmp_results_dir\") / f\"{task_hash}-{time.time()}\"\n", + " trajectory_path = (\n", + " Path(os.path.abspath(\"tmp_results_dir\")) / f\"{task_hash}-{time.time()}\"\n", + " )\n", " trajectory_path.mkdir(parents=True, exist_ok=True)\n", " nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME\n", " # Copy task data to trajectory path\n", @@ -58,7 +51,6 @@ " shutil.copy2(item, trajectory_path)\n", " elif item.is_dir():\n", " shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)\n", - "\n", " # Augment incoming task with CoT instructions\n", " augmented_task = f\"\"\"\\\n", " Here is the user query to address:\n", @@ -68,11 +60,9 @@ " {query}\n", " \n", "\n", - " {prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n", - " {prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n", + " {prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=language.name)}\n", + " {prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=language.name)}\"\"\"\n", "\n", - " # This can be R or PYTHON in Docker or with a local kernel if you have R installed\n", - " language = NBLanguage.PYTHON\n", " if language == NBLanguage.R:\n", " augmented_task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n", "\n", @@ -85,11 +75,32 @@ " language=language,\n", " system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY,\n", " use_tmp_work_dir=False,\n", - " # run_notebook_on_edit=False,\n", + " run_notebook_on_edit=True if cfg.USE_DOCKER else False,\n", " )\n", " return dae" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# ENVIRONMENT CONFIGURATION\n", + "\n", + "# Set your API keys\n", + "os.environ[\"ANTHROPIC_API_KEY\"] = \"\"\n", + "# os.environ[\"OPENAI_API_KEY\"] = \"\"\n", + "# If using docker, be sure to pull the image from docker hub first\n", + "# docker pull futurehouse/bixbench:aviary-notebook-env\n", + "# This image includes many bioinformatics and data science packages\n", + "cfg.USE_DOCKER = False\n", + "# This can be R or PYTHON in Docker or with a local kernel if you have R installed\n", + "LANGUAGE = NBLanguage.R\n", + "MAX_STEPS = 3\n", + "MODEL_NAME = \"claude-3-7-sonnet-latest\"" + ] + }, { "cell_type": "code", "execution_count": null, @@ -106,16 +117,16 @@ "\n", "dataset_folder = Path(\"dataset\")\n", "query = \"Analyze the dataset and give me an in depth analysis using pretty plots. I am particularly interested in crows.\"\n", - "environment = setup_data_analysis_env(query, dataset_folder)\n", + "environment = setup_data_analysis_env(query, dataset_folder, LANGUAGE)\n", "\n", "agent = AgentConfig(\n", " agent_type=\"ReActAgent\",\n", " agent_kwargs={\n", " \"llm_model\": {\n", " \"parallel_tool_calls\": False,\n", - " \"num_retries\": 5,\n", + " \"num_retries\": 3,\n", " \"temperature\": 1.0,\n", - " \"name\": \"claude-3-7-sonnet-latest\",\n", + " \"name\": MODEL_NAME,\n", " },\n", " \"hide_old_env_states\": True,\n", " },\n", @@ -125,7 +136,9 @@ "rollout = RolloutManager(agent=agent)\n", "\n", "# You can see the notebook updating live in the tmp_results_dir folder\n", - "result = await rollout.sample_trajectories(environments=[environment], max_steps=3)\n", + "result = await rollout.sample_trajectories(\n", + " environments=[environment], max_steps=MAX_STEPS\n", + ")\n", "\n", "print(\"Trajectory completed! Final notebook available at: \\n\", environment.nb_path)\n", "print(f\"Final agent answer:\\n{environment.state.answer}\")" @@ -207,9 +220,11 @@ "\n", "# CONFIGURATION\n", "CROW_STAGE = Stage.PROD\n", - "API_KEY = os.environ.get(\"CROW_API_KEY_PROD\")\n", + "API_KEY = \"\"\n", "JOB_NAME = \"job-futurehouse-data-analysis-crow-high\"\n", "MAX_STEPS = 25\n", + "LANGUAGE = \"R\"\n", + "DATA_GCS_LOCATION = \"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\"\n", "\n", "\n", "client = FutureHouseClient(\n", @@ -227,21 +242,30 @@ "Make a discovery using this dataset.\n", "\n", "\n", - "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC}\n", - "{prompts.GENERAL_NOTEBOOK_GUIDELINES}\"\"\"\n", + "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n", + "{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\"\"\n", "\n", "job_data = TaskRequest(\n", " name=JOB_NAME,\n", " query=task,\n", " runtime_config=RuntimeConfig(\n", " max_steps=MAX_STEPS,\n", - " upload_id=\"bixbench_data/CapsuleFolder-1d54e4a7-8b0f-4224-bd31-efcfded0d46c\", # This is just an example dataset\n", - " environment_config={\"run_notebook_on_edit\": False, \"eval\": True},\n", + " upload_id=DATA_GCS_LOCATION, # This is just an example dataset\n", + " environment_config={\n", + " \"run_notebook_on_edit\": False,\n", + " \"eval\": True,\n", + " \"language\": LANGUAGE,\n", + " },\n", + " # timeout=600,\n", " ),\n", ")\n", "job_id = client.create_task(job_data)\n", - "while client.get_task(job_id).status != \"success\":\n", + "status = \"in progress\"\n", + "while status == \"in progress\":\n", + " print(\"Waiting for task to complete... checking again in 15 seconds\")\n", " time.sleep(15)\n", + " status = client.get_task(job_id).status\n", + "\n", "job_result = client.get_task(job_id, verbose=True)\n", "answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n", "print(\n",