From f87e033e9de9af81d73319add32d26bc0604bce1 Mon Sep 17 00:00:00 2001 From: Ludovico Mitchener Date: Tue, 6 May 2025 23:29:34 -0700 Subject: [PATCH 1/4] Tool timeout, custom deployment cleanup, DAEnv from_task update --- src/fhda/Dockerfile.custom_deployment | 8 ++- src/fhda/data_analysis_env.py | 73 +++++++++++++++------------ src/fhda/notebook_env.py | 17 +++++-- src/fhda/prompts.py | 8 +-- src/scripts/deploy.py | 7 +-- 5 files changed, 69 insertions(+), 44 deletions(-) diff --git a/src/fhda/Dockerfile.custom_deployment b/src/fhda/Dockerfile.custom_deployment index 161b3e0..3914fef 100644 --- a/src/fhda/Dockerfile.custom_deployment +++ b/src/fhda/Dockerfile.custom_deployment @@ -1,5 +1,5 @@ # syntax=docker/dockerfile:1.4 -FROM python:3.12-slim AS base +FROM python:3.12-slim@sha256:bae1a061b657f403aaacb1069a7f67d91f7ef5725ab17ca36abc5f1b2797ff92 AS base WORKDIR /app ENV PYTHONUNBUFFERED=1 @@ -120,6 +120,12 @@ RUN --mount=type=cache,target=/app/miniconda/pkgs \ ENV UV_COMPILE_BYTECODE=1 ENV UV_LINK_MODE=copy +RUN mamba clean -all -y && \ + rm -rf /app/miniconda/pkgs/* && \ + find /app/miniconda \( -type d -name __pycache__ -o -type d -name tests -o -type d -name '*.tests' -o -type d -name 'test' \) -exec rm -rf {} + && \ + find /app/miniconda -type f -name '*.a' -delete && \ + find /app/miniconda -type f -name '*.js.map' -delete + FROM base AS builder diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py index 09ea07f..e827b40 100644 --- a/src/fhda/data_analysis_env.py +++ b/src/fhda/data_analysis_env.py @@ -1,4 +1,3 @@ -import hashlib import logging import shutil from typing import Any, cast @@ -16,7 +15,7 @@ from futurehouse_client import FutureHouseClient from .notebook_env import NBEnvironment -from .utils import NBLanguage, MultipleChoiceQuestion, nb_to_html +from .utils import NBLanguage, MultipleChoiceQuestion from . import prompts from . import config as cfg @@ -150,7 +149,7 @@ def export_frame(self) -> Frame: "done": self.state.done, "total_reward": self.state.total_reward, "nb_state": self.state.nb, - "nb_state_html": nb_to_html(self.state.nb), + # "nb_state_html": nb_to_html(self.state.nb), # temporarily disabled "nb_runtime_errors": self.state.notebook_runtime_errors, }, info={ @@ -168,6 +167,8 @@ def from_task( cls, task: str, gcs_artifact_path: str | None = None, + trajectory_id: str | None = None, + user_id: str | None = None, environment_config: dict[str, Any] | None = None, ) -> "DataAnalysisEnv": """ @@ -178,9 +179,11 @@ def from_task( gcs_artifact_path: The path to the GCS artifact – required for evaluation on crow jobs environment_config: A JSON string of environment configuration """ - logger.info("User task: %s", task[:100]) + logger.info("User task: %s", task[:50]) logger.info("GCS artifact path: %s", gcs_artifact_path) logger.info("environment_config: %s", environment_config) + logger.info("trajectory_id: %s", trajectory_id) + logger.info("user_id: %s", user_id) # Track cost of running the environment enable_cost_tracking() if ( @@ -190,6 +193,22 @@ def from_task( "Running crow jobs without gcs_artifact_path is not supported" ) + if user_id is None: + user_id = "default_user" + if trajectory_id is None: + trajectory_id = f"{gcs_artifact_path}-{time.time()}" + + # Always create a new directory for the trajectory + trajectory_path = ( + cfg.DATA_STORAGE_PATH / "user_trajectories" / user_id / trajectory_id + ) + logger.info("Trajectory path: %s", trajectory_path) + trajectory_path.mkdir(parents=True, exist_ok=True) + for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir(): + if item.is_file(): + shutil.copy2(item, trajectory_path) + elif item.is_dir(): + shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True) if environment_config: kwargs = { k: v @@ -200,39 +219,27 @@ def from_task( kwargs = {} environment_config = {} logger.info("Filtered kwargs: %s", kwargs) - task_hash = hashlib.sha256(task.encode()).hexdigest() - if environment_config.get("eval", False): - logger.info("Eval mode is True") - # Create a temporary directory in GCP mounted storage volume - trajectory_path = cfg.DATA_STORAGE_PATH / f"{task_hash}-{time.time()}" - trajectory_path.mkdir(parents=True, exist_ok=True) - for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir(): - if item.is_file(): - shutil.copy2(item, trajectory_path) - elif item.is_dir(): - shutil.copytree( - item, trajectory_path / item.name, dirs_exist_ok=True - ) - else: - logger.info("Eval mode is False") - # Use the GCP folder created when uploading the data via the platform - trajectory_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path - # Augment incoming user query with CoT instructions + + language = getattr(NBLanguage, environment_config.get("language", "PYTHON")) + # Overwrite the language in the kwargs with NBLanguage enum + kwargs["language"] = language + logger.info("Language: %s", language.name) + + if not environment_config.get("eval", False): + logger.info( + "Platform job detected, augmenting user query with CoT instructions" + ) + # If running via the platform, augment incoming user query with CoT instructions task = ( - f"Here is the user query to address:\n" + f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n" + f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}" + f"Here is the research question to address:\n" f"\n" f"{task}\n" f"\n" - f"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=kwargs.get('language', 'PYTHON'))}\n" - f"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=kwargs.get('language', 'PYTHON'))}" ) - logger.info("Trajectory path: %s", trajectory_path) nb_path = trajectory_path / NBEnvironment.NOTEBOOK_NAME logger.info("NB path: %s", nb_path) - language = getattr(NBLanguage, environment_config.get("language", "PYTHON")) - # Overwrite the language in the kwargs with NBLanguage enum - kwargs["language"] = language - logger.info("Language: %s", language.name) if trajectory_path.exists(): files = list(trajectory_path.iterdir()) @@ -245,12 +252,14 @@ def from_task( raise ValueError(f"Trajectory path does not exist: {trajectory_path}") return cls( - problem_id=f"data-analysis-task-{task_hash}", + problem_id=f"data-analysis-task-{trajectory_id}", problem=task, eval_mode=EvalAnswerMode.LLM, nb_path=nb_path, work_dir=trajectory_path, - system_prompt=prompts.CAPSULE_SYSTEM_PROMPT_QUERY, + system_prompt=environment_config.get( + "system_prompt", prompts.CAPSULE_SYSTEM_PROMPT_QUERY + ), use_tmp_work_dir=False, **kwargs, ) diff --git a/src/fhda/notebook_env.py b/src/fhda/notebook_env.py index b50c38e..f555138 100644 --- a/src/fhda/notebook_env.py +++ b/src/fhda/notebook_env.py @@ -6,6 +6,7 @@ from pathlib import Path from tempfile import mkdtemp from typing import Any, ClassVar, Self, cast +import asyncio import aiodocker import nbformat @@ -334,11 +335,17 @@ async def _run_notebook_docker(self) -> str: async def _run_notebook_local(self, cell_idx: int | None = None) -> str: """Run notebook using local kernel.""" - client = self.state.kernel_manager.client() - client.start_channels() - error_messages = await utils.nbformat_run_notebook( - cells=self.state.cells, client=client, cell_idx=cell_idx - ) + try: + async with asyncio.timeout(self.EXEC_TIMEOUT): + client = self.state.kernel_manager.client() + client.start_channels() + error_messages = await utils.nbformat_run_notebook( + cells=self.state.cells, client=client, cell_idx=cell_idx + ) + except TimeoutError as err: + raise TimeoutError( + f"Notebook execution timed out after {self.EXEC_TIMEOUT} seconds" + ) from err if error_messages: self.state.notebook_runtime_errors.extend(error_messages) self.state.save_nb() diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py index ec84e35..5523faa 100644 --- a/src/fhda/prompts.py +++ b/src/fhda/prompts.py @@ -18,9 +18,10 @@ """ CAPSULE_SYSTEM_PROMPT_QUERY = """ -You are an expert data scientist. -Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that thoroughly analyzes data to answer a user query -The notebook should contain all necessary artifacts (plots, tables, print outputs, code commentary) to fully answer the query. +You are an expert bioinformatician and seasoned biological data scientist. +Your task is to create a comprehensive Jupyter notebook named 'notebook.ipynb' that analyzes data to answer a user query. +The notebook should contain all necessary artifacts (plots, tables, print outputs) to fully answer these questions. +Take your time to think through the question and the data before writing any code, explore the data rigorously and defend your conclusions rigorously. """ # Guidelines for R code output optimization @@ -59,6 +60,7 @@ - If you need to install packages, use pip or mamba. - All cells are by default {language} cells. Use {language} or bash tools for all analysis. - You can use bash cells by adding %%bash to the first line of the cell or running a subprocess. +- You can only create code cells, no markdown cells. """ diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py index f26aeb4..68e1b98 100644 --- a/src/scripts/deploy.py +++ b/src/scripts/deploy.py @@ -12,7 +12,7 @@ ) from futurehouse_client.models.app import TaskQueuesConfig -HIGH = False +HIGH = True ENVIRONMENT = "DEV" ENV_VARS = { @@ -85,8 +85,9 @@ def rename_dockerfile(path: Path, new_name: str): auth_type=AuthType.API_KEY, api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"], ) - - if not HIGH: + if HIGH: + print("Using custom deployment Dockerfile") + else: dockerfile_path = Path("src/fhda/Dockerfile.custom_deployment") rename_dockerfile(dockerfile_path, "Dockerfile_skip.custom_deployment") From 71a7e642a97ad47b0e06f2dff7b61577f89b44b1 Mon Sep 17 00:00:00 2001 From: Ludovico Mitchener Date: Wed, 7 May 2025 22:52:38 -0700 Subject: [PATCH 2/4] Update deployment script and from_task updates --- pyproject.toml | 9 +- src/fhda/config.py | 2 +- src/fhda/data_analysis_env.py | 32 ++-- src/fhda/prompts.py | 7 +- src/scripts/deploy.py | 95 ++++++++++-- tutorial/platform_api.ipynb | 161 ++++++++++++-------- uv.lock | 267 ++-------------------------------- 7 files changed, 217 insertions(+), 356 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0e29413..a7e0d83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,20 +13,15 @@ dependencies = [ "pandas==2.2.3", "numpy==2.2.3", "matplotlib==3.10.0", - "scipy==1.15.2", - "seaborn==0.13.2", - "scikit-learn==1.6.1", - "statsmodels==0.14.4", "aiofiles==24.1.0", "google-auth==2.38.0", "google-cloud-storage==3.0.0", "google-cloud-secret-manager==2.23.0", - "futurehouse-client==0.3.15.dev71", + "futurehouse-client==0.3.18.dev25", "jupyter==1.1.1", "nbconvert==7.16.6", "notebook==7.3.2", - "nbformat==5.10.4", - "pydeseq2==0.5.0" + "nbformat==5.10.4" ] description = "Data analysis crow" name = "fhda" diff --git a/src/fhda/config.py b/src/fhda/config.py index a183223..7508480 100644 --- a/src/fhda/config.py +++ b/src/fhda/config.py @@ -25,4 +25,4 @@ # FutureHosue client config ENVIRONMENT = os.getenv("ENVIRONMENT", "prod") CROW_STAGE = getattr(Stage, ENVIRONMENT.upper(), Stage.PROD) -PLATFORM_API_KEY = os.getenv("CROW_API_KEY") +PLATFORM_API_KEY = os.getenv("CROW_API_KEY", None) diff --git a/src/fhda/data_analysis_env.py b/src/fhda/data_analysis_env.py index e827b40..0d70f7a 100644 --- a/src/fhda/data_analysis_env.py +++ b/src/fhda/data_analysis_env.py @@ -96,6 +96,10 @@ async def query_literature(self, query: str) -> str: Args: query: The scientific question to answer """ + + if cfg.PLATFORM_API_KEY is None: + raise Exception("Platform API key is not set") + logger.info("Running PQA query") client = FutureHouseClient( stage=cfg.CROW_STAGE, @@ -197,27 +201,33 @@ def from_task( user_id = "default_user" if trajectory_id is None: trajectory_id = f"{gcs_artifact_path}-{time.time()}" - + if environment_config: + kwargs = { + k: v + for k, v in environment_config.items() + if k in cfg.VALID_FROM_TASK_KWARGS + } + else: + kwargs = {} + environment_config = {} # Always create a new directory for the trajectory trajectory_path = ( cfg.DATA_STORAGE_PATH / "user_trajectories" / user_id / trajectory_id ) + if environment_config.get("gcs_override", False): + data_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path + else: + data_path = ( + cfg.DATA_STORAGE_PATH / "user_data" / user_id / gcs_artifact_path + ) logger.info("Trajectory path: %s", trajectory_path) + logger.info("Data path: %s", data_path) trajectory_path.mkdir(parents=True, exist_ok=True) - for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir(): + for item in data_path.iterdir(): if item.is_file(): shutil.copy2(item, trajectory_path) elif item.is_dir(): shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True) - if environment_config: - kwargs = { - k: v - for k, v in environment_config.items() - if k in cfg.VALID_FROM_TASK_KWARGS - } - else: - kwargs = {} - environment_config = {} logger.info("Filtered kwargs: %s", kwargs) language = getattr(NBLanguage, environment_config.get("language", "PYTHON")) diff --git a/src/fhda/prompts.py b/src/fhda/prompts.py index 5523faa..aa864f9 100644 --- a/src/fhda/prompts.py +++ b/src/fhda/prompts.py @@ -40,12 +40,7 @@ variable_name <- read_excel(".csv", col_names = FALSE, .name_repair = "minimal") ``` -3. When printing dataframes, always wrap them in print() statements: - ```r - print(head(dataframe)) - ``` - -4. Very important: always use the tidyverse package where possible. +3. Very important: always use the tidyverse package where possible. """ diff --git a/src/scripts/deploy.py b/src/scripts/deploy.py index 68e1b98..bad9496 100644 --- a/src/scripts/deploy.py +++ b/src/scripts/deploy.py @@ -12,12 +12,12 @@ ) from futurehouse_client.models.app import TaskQueuesConfig -HIGH = True +HIGH = False ENVIRONMENT = "DEV" ENV_VARS = { - "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"], - "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"], + # "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"], + # "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"], "USE_DOCKER": "false", "STAGE": ENVIRONMENT, "ENVIRONMENT": ENVIRONMENT, @@ -36,17 +36,85 @@ TEMPERATURE = 1 NUM_RETRIES = 3 -agent = AgentConfig( - agent_type="ReActAgent", - agent_kwargs={ +# agent = AgentConfig( +# agent_type="ReActAgent", +# agent_kwargs={ +# "llm_model": { +# "name": MODEL, +# "temperature": TEMPERATURE, +# "num_retries": NUM_RETRIES, +# }, +# "hide_old_env_states": True, +# }, +# ) + +AGENT_MODEL_LIST = [ + { + "model_name": "anthropic/claude-3-7-sonnet-20250219", + "litellm_params": { + "model": "anthropic/claude-3-7-sonnet-20250219", + "api_key": os.environ["ANTHROPIC_API_KEY"], + }, + }, + { + "model_name": "openai/gpt-4.1-2025-04-14", + "litellm_params": { + "model": "openai/gpt-4.1-2025-04-14", + "api_key": os.environ["OPENAI_API_KEY"], + }, + }, + { + "model_name": "anthropic/claude-3-5-sonnet-20241022", + "litellm_params": { + "model": "anthropic/claude-3-5-sonnet-20241022", + "api_key": os.environ["ANTHROPIC_API_KEY"], + }, + }, + { + "model_name": "openai/gpt-4o-2024-11-20", + "litellm_params": { + "model": "openai/gpt-4o-2024-11-20", + "api_key": os.environ["OPENAI_API_KEY"], + }, + }, +] + +AGENT_ROUTER_KWARGS = { + "set_verbose": True, + # fallback in list order if the main key fails + "fallbacks": [ + { + "openai/gpt-4.1-2025-04-14": [ + "anthropic/claude-3-7-sonnet-20250219", + "anthropic/claude-3-5-sonnet-20241022", + "openai/gpt-4o-2024-11-20", + ] + } + ], +} + +AGENT_CONFIG = { + "agent_type": "ReActAgent", + "agent_kwargs": { "llm_model": { - "name": MODEL, - "temperature": TEMPERATURE, - "num_retries": NUM_RETRIES, + "name": "anthropic/claude-3-7-sonnet-20250219", + "config": { + "model_list": AGENT_MODEL_LIST, + "router_kwargs": AGENT_ROUTER_KWARGS, + "fallbacks": [ + { + "openai/gpt-4.1-2025-04-14": [ + "anthropic/claude-3-7-sonnet-20250219", + "anthropic/claude-3-5-sonnet-20241022", + "openai/gpt-4o-2024-11-20", + ] + } + ], + }, }, "hide_old_env_states": True, }, -) +} CROWS_TO_DEPLOY = [ JobDeploymentConfig( @@ -55,8 +123,7 @@ name="data-analysis-crow-high" if HIGH else "data-analysis-crow", environment="src.fhda.data_analysis_env.DataAnalysisEnv", environment_variables=ENV_VARS, - # agent="ldp.agent.ReActAgent", - agent=agent, + agent=AgentConfig(**AGENT_CONFIG), # type: ignore container_config=CONTAINER_CONFIG, force=True, frame_paths=frame_paths, @@ -79,8 +146,8 @@ def rename_dockerfile(path: Path, new_name: str): if __name__ == "__main__": client = FutureHouseClient( - # stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])), - stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")), + stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])), + # stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")), organization="FutureHouse", auth_type=AuthType.API_KEY, api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"], diff --git a/tutorial/platform_api.ipynb b/tutorial/platform_api.ipynb index 6a963c7..c8f0eb5 100644 --- a/tutorial/platform_api.ipynb +++ b/tutorial/platform_api.ipynb @@ -6,87 +6,125 @@ "metadata": {}, "outputs": [], "source": [ - "# PLATFORM ROLLOUT\n", "import os\n", "import time\n", - "import json\n", + "\n", "from futurehouse_client import FutureHouseClient\n", "from futurehouse_client.models import Stage, TaskRequest, RuntimeConfig\n", "from futurehouse_client.models.app import AuthType\n", - "import fhda.prompts as prompts\n", - "from ldp.agent import AgentConfig\n", - "\n", - "# CONFIGURATION\n", - "CROW_STAGE = Stage.PROD # Don't change this\n", - "API_KEY = \"\" # Add your API key here\n", - "JOB_NAME = \"job-futurehouse-data-analysis-crow-high\" # Don't change this\n", - "MAX_STEPS = 30 # You can change this to impose a limit on the number of steps\n", - "LANGUAGE = \"R\" # Choose between \"R\" and \"PYTHON\"\n", - "DATA_GCS_LOCATION = \"eda/flow0\" # This is the location of the dataset on GCS – ask someone from FutureHouse to upload new datasets\n", - "MODEL_NAME = \"claude-3-7-sonnet-latest\" # Feel free to use any Litellm supported model\n", - "TEMPERATURE = 1.0 # Feel free to try different model temperatures\n", + "import fhda.prompts as prompts" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Instantiate the FutureHouse client with your API key\n", + "FH_API_KEY = \"d6X5e7zz8E29Y+FtYCTBHg.platformv01.eyJqdGkiOiJmZTkzNmFjNS03YzliLTRjNGQtYWY0My0xY2YzYzMzODhiMGMiLCJzdWIiOiIxTEtRWE55M2dRYkRKR0hiWFNWT3NjYmpVTTYzIiwiaWF0IjoxNzQzMjE1NzcyfQ.BDLneLT1zuvVDjK9U4rDgqzp09MzuCb+V3Nkdkde9lQ\" # Add your API key here\n", + "JOB_NAME = \"job-futurehouse-data-analysis-crow\" # Don't change this\n", + "UPLOAD_ID = (\n", + " \"finch_tutorial\" # This is the folder name of the dataset you uploaded to GCS\n", + ")\n", "\n", + "client = FutureHouseClient(\n", + " stage=Stage.DEV,\n", + " auth_type=AuthType.API_KEY,\n", + " api_key=FH_API_KEY,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Load your dataset – note you only have to do this once\n", + "client.upload_file(JOB_NAME, file_path=\"dataset\", upload_id=UPLOAD_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Check what files were uploaded to your gcs folder\n", + "client.list_files(JOB_NAME, upload_id=UPLOAD_ID)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Define your task\n", "# Here is where you can update the prompt. As shown below, by default we use CoT prompting,\n", "# but it is not necessary and we encourage users to experiment with different prompting strategies.\n", - "query = \"\"\"\n", - "Make a discovery using this dataset\n", - "\"\"\"\n", - "\n", - "task = f\"\"\"\\\n", - "Here is the user query to address:\n", - "\n", - "\n", - "{query}\n", - "\n", - "{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\n", - "{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\"\"\n", - "\n", - "# This is extra R prompting to avoid long R output blocks\n", - "if LANGUAGE == \"R\":\n", - " task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"\n", - "\n", + "LANGUAGE = \"PYTHON\" # Choose between \"R\" and \"PYTHON\"\n", + "MAX_STEPS = 30 # You can change this to impose a limit on the number of steps the agent can take\n", + "query = \"Make a short notebook with visualizations exploring the dataset.\"\n", "\n", - "# You shouldn't have to change anything below here\n", - "client = FutureHouseClient(\n", - " stage=CROW_STAGE,\n", - " auth_type=AuthType.API_KEY,\n", - " api_key=API_KEY,\n", + "task = (\n", + " f\"{prompts.CHAIN_OF_THOUGHT_AGNOSTIC.format(language=LANGUAGE)}\\n\"\n", + " f\"{prompts.GENERAL_NOTEBOOK_GUIDELINES.format(language=LANGUAGE)}\"\n", + " f\"Here is the research question to address:\\n\"\n", + " f\"\\n\"\n", + " f\"{query}\\n\"\n", + " f\"\\n\"\n", ")\n", "\n", + "# This is extra R prompting to avoid long R output blocks – also feel free to discard this\n", + "if LANGUAGE == \"R\":\n", + " task += f\"\\n{prompts.R_OUTPUT_RECOMMENDATION_PROMPT}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# This is how to create a task – you shouldn't need to change anything here\n", "job_data = TaskRequest(\n", " name=JOB_NAME,\n", " query=task,\n", " runtime_config=RuntimeConfig(\n", " max_steps=MAX_STEPS,\n", - " upload_id=DATA_GCS_LOCATION, # This is just an example dataset\n", + " upload_id=UPLOAD_ID,\n", " environment_config={\n", - " \"run_notebook_on_edit\": False,\n", " \"eval\": True, # DO NOT CHANGE THIS\n", " \"language\": LANGUAGE,\n", " },\n", - " agent=AgentConfig(\n", - " agent_type=\"ReActAgent\",\n", - " agent_kwargs={\n", - " \"llm_model\": {\"name\": MODEL_NAME, \"temperature\": TEMPERATURE},\n", - " },\n", - " ),\n", " ),\n", ")\n", - "job_id = client.create_task(job_data)\n", + "trajectory_id = client.create_task(job_data)\n", + "print(\n", + " f\"Task running on platform, you can view progress live at:https://platform.futurehouse.org/trajectories/{trajectory_id}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Jobs take on average 3-10 minutes to complete\n", "status = \"in progress\"\n", "while status in [\"in progress\", \"queued\"]:\n", - " print(\"Waiting for task to complete... checking again in 30 seconds\")\n", " time.sleep(15)\n", - " status = client.get_task(job_id).status\n", + " status = client.get_task(trajectory_id).status\n", "\n", "if status == \"failed\":\n", " raise Exception(\"Task failed\")\n", "\n", - "job_result = client.get_task(job_id, verbose=True)\n", + "job_result = client.get_task(trajectory_id, verbose=True)\n", "answer = job_result.environment_frame[\"state\"][\"state\"][\"answer\"]\n", - "print(\n", - " f\"Task completed, the full analysis is available at:https://platform.futurehouse.org/trajectories/{job_id}\\n Agent answer: {answer}\"\n", - ")" + "print(f\"The agent's answer to your research question is: \\n{answer}\")" ] }, { @@ -95,16 +133,19 @@ "metadata": {}, "outputs": [], "source": [ - "# You can also view the notebook locally by saving it to a directory of your choice\n", - "# Define the path where you want to save the notebook\n", - "notebook_path = \"output/analysis_notebook.ipynb\"\n", + "# In addition to viewing the notebook and reasoning trace via the platform,\n", + "# you can also list the files in the trajectory directory and download any files you need\n", + "print(client.list_files(JOB_NAME, trajectory_id=trajectory_id))\n", "\n", - "os.makedirs(os.path.dirname(notebook_path), exist_ok=True)\n", - "notebook_content = job_result.environment_frame[\"state\"][\"state\"][\"nb_state\"]\n", - "with open(notebook_path, \"w\") as f:\n", - " json.dump(notebook_content, f, indent=2)\n", - "\n", - "print(f\"Notebook saved to {os.path.abspath(notebook_path)}\")" + "destination_path = \"output/notebook.ipynb\"\n", + "file_path = \"notebook.ipynb\"\n", + "client.download_file(\n", + " JOB_NAME,\n", + " trajectory_id=trajectory_id,\n", + " file_path=file_path,\n", + " destination_path=destination_path,\n", + ")\n", + "print(f\"Notebook saved to {os.path.abspath(destination_path)}\")" ] } ], diff --git a/uv.lock b/uv.lock index 5f7fd86..5883556 100644 --- a/uv.lock +++ b/uv.lock @@ -96,24 +96,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 }, ] -[[package]] -name = "anndata" -version = "0.11.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "array-api-compat" }, - { name = "h5py" }, - { name = "natsort" }, - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "scipy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/6e/bb/895fa2e9f8cd6d1c058aa90759da715037d0f11e23713e692537555549d7/anndata-0.11.4.tar.gz", hash = "sha256:4ce08d09d2ccb5f37d32790363bbcc7fc1b79863842296ae4badfaf48c736e24", size = 541143 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/4b/ab615fea52e34579d5c6c7dba86b4f9d7f3cdb6a170b348ec49f34cf4355/anndata-0.11.4-py3-none-any.whl", hash = "sha256:fefebb1480316dfa5a23924aa9f74781d447484421bb0c788b0b2ca5e3b339d2", size = 144472 }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -179,15 +161,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/e4/bf8034d25edaa495da3c8a3405627d2e35758e44ff6eaa7948092646fdcc/argon2_cffi_bindings-21.2.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e415e3f62c8d124ee16018e491a009937f8cf7ebf5eb430ffc5de21b900dad93", size = 53104 }, ] -[[package]] -name = "array-api-compat" -version = "1.11.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/1e/d04312a19a67744298b7546885149488b8afbb965dfe693aa4964bb60586/array_api_compat-1.11.2.tar.gz", hash = "sha256:a3b7f7b6af18f4c42e79423b1b2479798998b6a74355069d77a01a5282755b5d", size = 50776 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/d8/3388c7da49f522e51ab2f919797db28782216cadc9ecc9976160302cfcd6/array_api_compat-1.11.2-py3-none-any.whl", hash = "sha256:b1d0059714a4153b3ae37c989e47b07418f727be5b22908dd3cf9d19bdc2c547", size = 53149 }, -] - [[package]] name = "arrow" version = "1.3.0" @@ -716,11 +689,6 @@ dependencies = [ { name = "notebook" }, { name = "numpy" }, { name = "pandas" }, - { name = "pydeseq2" }, - { name = "scikit-learn" }, - { name = "scipy" }, - { name = "seaborn" }, - { name = "statsmodels" }, ] [package.optional-dependencies] @@ -740,14 +708,14 @@ requires-dist = [ { name = "aiodocker", specifier = "==0.24.0" }, { name = "aiofiles", specifier = "==24.1.0" }, { name = "black", marker = "extra == 'dev'" }, - { name = "fhaviary", extras = ["server"] }, - { name = "futurehouse-client", specifier = ">=0.3.14" }, + { name = "fhaviary", extras = ["server"], specifier = "==0.19.0" }, + { name = "futurehouse-client", specifier = "==0.3.18.dev25" }, { name = "google-auth", specifier = "==2.38.0" }, { name = "google-cloud-secret-manager", specifier = "==2.23.0" }, { name = "google-cloud-storage", specifier = "==3.0.0" }, { name = "isort", marker = "extra == 'dev'" }, { name = "jupyter", specifier = "==1.1.1" }, - { name = "ldp", specifier = ">=0.26.0" }, + { name = "ldp", specifier = "==0.26.0" }, { name = "matplotlib", specifier = "==3.10.0" }, { name = "mypy", marker = "extra == 'dev'" }, { name = "nbconvert", specifier = "==7.16.6" }, @@ -756,15 +724,10 @@ requires-dist = [ { name = "numpy", specifier = "==2.2.3" }, { name = "pandas", specifier = "==2.2.3" }, { name = "pre-commit", marker = "extra == 'dev'" }, - { name = "pydeseq2", specifier = "==0.5.0" }, { name = "pytest", marker = "extra == 'dev'" }, { name = "pytest-asyncio", marker = "extra == 'dev'" }, { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "ruff", marker = "extra == 'dev'" }, - { name = "scikit-learn", specifier = "==1.6.1" }, - { name = "scipy", specifier = "==1.15.2" }, - { name = "seaborn", specifier = "==0.13.2" }, - { name = "statsmodels", specifier = "==0.14.4" }, ] [[package]] @@ -818,37 +781,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/bf/ff/44934a031ce5a39125415eb405b9efb76fe7f9586b75291d66ae5cbfc4e6/fonttools-4.56.0-py3-none-any.whl", hash = "sha256:1088182f68c303b50ca4dc0c82d42083d176cba37af1937e1a976a31149d4d14", size = 1089800 }, ] -[[package]] -name = "formulaic" -version = "1.1.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "interface-meta" }, - { name = "numpy" }, - { name = "pandas" }, - { name = "scipy" }, - { name = "typing-extensions" }, - { name = "wrapt" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5c/30/03b5e3bb62374db3f665ca3020fdfc4304e98ceeaaa9dcd7a47a6b574ebf/formulaic-1.1.1.tar.gz", hash = "sha256:ddf80e4bef976dd99698aa27512015276c7b86c314b601ae6fd360c7741b7231", size = 652602 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d2/c2/a34097e53efe70a538ae97574ff9e9866e60fc1c792c19da5fd6b56ce7b5/formulaic-1.1.1-py3-none-any.whl", hash = "sha256:bbb7e38f99e4bcdc62cb0a6a818ad33b370b4e98e9e4f0b276561448482c8268", size = 115718 }, -] - -[[package]] -name = "formulaic-contrasts" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "formulaic" }, - { name = "pandas" }, - { name = "session-info" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/28/e6/4850976c248746062cfaa08628b3ec5ba3dfcab3d6ecd0d3886c36c04681/formulaic_contrasts-1.0.0.tar.gz", hash = "sha256:0a575a810bf1fba28938259d86a3ae2ae90cb9826fca84b9409085170862f701", size = 123794 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/40/7b/639411281256c84e8111bf6cb9676c44dbf5d8ad4cb042f4359b7e7b9e74/formulaic_contrasts-1.0.0-py3-none-any.whl", hash = "sha256:e1220d315cf446bdec9385375ca4da43896e4ba68114ebea1b2a37efa5d097f5", size = 10054 }, -] - [[package]] name = "fqdn" version = "1.5.1" @@ -908,7 +840,7 @@ wheels = [ [[package]] name = "futurehouse-client" -version = "0.3.14" +version = "0.3.18.dev25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cloudpickle" }, @@ -916,13 +848,15 @@ dependencies = [ { name = "fhaviary" }, { name = "httpx" }, { name = "ldp" }, + { name = "litellm" }, { name = "pydantic" }, { name = "python-dotenv" }, { name = "tenacity" }, + { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/64/94/291b3fb8e6de5d7758890dd17c2527fa6ffb0d0cc0826a11bfba3745662a/futurehouse_client-0.3.14.tar.gz", hash = "sha256:e2739e37a7624dcf65c11996f8b3d2cd2e8ed34cff16d72e0f4ce176a9c88a00", size = 136841 } +sdist = { url = "https://files.pythonhosted.org/packages/33/17/cc8bc7b5aa174d10d451a100681dc88ca4718eeb3058e0b023f0939d4358/futurehouse_client-0.3.18.dev25.tar.gz", hash = "sha256:0685836fe273999fe226204334d19d95a823a32e2beca54b1326f8161006f973", size = 145006 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3c/b0/2e3b641964c623bfefe9fbc65e58e4340b32ba56df096efb2f6339d50612/futurehouse_client-0.3.14-py3-none-any.whl", hash = "sha256:f485ced945134e3dcc5770f1429f744d068189639d34cd715de53150680d4bf8", size = 26102 }, + { url = "https://files.pythonhosted.org/packages/e9/39/af2cebaaaf4255973b5b380728593bbf65ee60f10433c79dd532b94a27dc/futurehouse_client-0.3.18.dev25-py3-none-any.whl", hash = "sha256:eab284abb067edf1069c889a2d4b1e6628f926b30459ed3ab4fb80084a28ff9e", size = 31545 }, ] [[package]] @@ -1112,27 +1046,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] -[[package]] -name = "h5py" -version = "3.13.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/03/2e/a22d6a8bfa6f8be33e7febd985680fba531562795f0a9077ed1eb047bfb0/h5py-3.13.0.tar.gz", hash = "sha256:1870e46518720023da85d0895a1960ff2ce398c5671eac3b1a41ec696b7105c3", size = 414876 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d8/20/438f6366ba4ded80eadb38f8927f5e2cd6d2e087179552f20ae3dbcd5d5b/h5py-3.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:477c58307b6b9a2509c59c57811afb9f598aedede24a67da808262dfa0ee37b4", size = 3384442 }, - { url = "https://files.pythonhosted.org/packages/10/13/cc1cb7231399617d9951233eb12fddd396ff5d4f7f057ee5d2b1ca0ee7e7/h5py-3.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:57c4c74f627c616f02b7aec608a8c706fe08cb5b0ba7c08555a4eb1dde20805a", size = 2917567 }, - { url = "https://files.pythonhosted.org/packages/9e/d9/aed99e1c858dc698489f916eeb7c07513bc864885d28ab3689d572ba0ea0/h5py-3.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:357e6dc20b101a805ccfd0024731fbaf6e8718c18c09baf3b5e4e9d198d13fca", size = 4669544 }, - { url = "https://files.pythonhosted.org/packages/a7/da/3c137006ff5f0433f0fb076b1ebe4a7bf7b5ee1e8811b5486af98b500dd5/h5py-3.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6f13f9b5ce549448c01e4dfe08ea8d1772e6078799af2c1c8d09e941230a90d", size = 4932139 }, - { url = "https://files.pythonhosted.org/packages/25/61/d897952629cae131c19d4c41b2521e7dd6382f2d7177c87615c2e6dced1a/h5py-3.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:21daf38171753899b5905f3d82c99b0b1ec2cbbe282a037cad431feb620e62ec", size = 2954179 }, - { url = "https://files.pythonhosted.org/packages/60/43/f276f27921919a9144074320ce4ca40882fc67b3cfee81c3f5c7df083e97/h5py-3.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e520ec76de00943dd017c8ea3f354fa1d2f542eac994811943a8faedf2a7d5cb", size = 3358040 }, - { url = "https://files.pythonhosted.org/packages/1b/86/ad4a4cf781b08d4572be8bbdd8f108bb97b266a14835c640dc43dafc0729/h5py-3.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e79d8368cd9295045956bfb436656bea3f915beaa11d342e9f79f129f5178763", size = 2892766 }, - { url = "https://files.pythonhosted.org/packages/69/84/4c6367d6b58deaf0fa84999ec819e7578eee96cea6cbd613640d0625ed5e/h5py-3.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56dd172d862e850823c4af02dc4ddbc308f042b85472ffdaca67f1598dff4a57", size = 4664255 }, - { url = "https://files.pythonhosted.org/packages/fd/41/bc2df86b72965775f6d621e0ee269a5f3ac23e8f870abf519de9c7d93b4d/h5py-3.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be949b46b7388074c5acae017fbbe3e5ba303fd9daaa52157fdfef30bbdacadd", size = 4927580 }, - { url = "https://files.pythonhosted.org/packages/97/34/165b87ea55184770a0c1fcdb7e017199974ad2e271451fd045cfe35f3add/h5py-3.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:4f97ecde7ac6513b21cd95efdfc38dc6d19f96f6ca6f2a30550e94e551458e0a", size = 2940890 }, -] - [[package]] name = "httpcore" version = "1.0.7" @@ -1218,15 +1131,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 }, ] -[[package]] -name = "interface-meta" -version = "1.3.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/4d/75/10526292b332f3479c246750a96f6ec11a28e297839a9c25583b2aadc119/interface_meta-1.3.0.tar.gz", hash = "sha256:8a4493f8bdb73fb9655dcd5115bc897e207319e36c8835f39c516a2d7e9d79a1", size = 15007 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/3f/a6ec28c88e2d8e54d32598a1e0b5208a4baa72a8e7f6e241beab5731eb9d/interface_meta-1.3.0-py3-none-any.whl", hash = "sha256:de35dc5241431886e709e20a14d6597ed07c9f1e8b4bfcffde2190ca5b700ee8", size = 14854 }, -] - [[package]] name = "ipykernel" version = "6.29.5" @@ -1367,15 +1271,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/91/61/c80ef80ed8a0a21158e289ef70dac01e351d929a1c30cb0f49be60772547/jiter-0.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:3ac9f578c46f22405ff7f8b1f5848fb753cc4b8377fbec8470a7dc3997ca7566", size = 202374 }, ] -[[package]] -name = "joblib" -version = "1.4.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/64/33/60135848598c076ce4b231e1b1895170f45fbcaeaa2c9d5e38b04db70c35/joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e", size = 2116621 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/91/29/df4b9b42f2be0b623cbd5e2140cafcaa2bef0759a00b7b70104dcfe2fb51/joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6", size = 301817 }, -] - [[package]] name = "json5" version = "0.10.0" @@ -1723,7 +1618,7 @@ wheels = [ [[package]] name = "litellm" -version = "1.66.1" +version = "1.67.4.post1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, @@ -1738,10 +1633,7 @@ dependencies = [ { name = "tiktoken" }, { name = "tokenizers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/c1/21/12562c37310254456afdd277454dac4d14b8b40796216e8a438a9e1c5e86/litellm-1.66.1.tar.gz", hash = "sha256:98f7add913e5eae2131dd412ee27532d9a309defd9dbb64f6c6c42ea8a2af068", size = 7203211 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e9/33/fdc4615ca621940406e3b0b303e900bc2868cfcd8c62c4a6f5e7d2f6a56c/litellm-1.66.1-py3-none-any.whl", hash = "sha256:1f601fea3f086c1d2d91be60b9db115082a2f3a697e4e0def72f8b9c777c7232", size = 7559553 }, -] +sdist = { url = "https://files.pythonhosted.org/packages/4d/89/bacf75633dd43d6c5536380fb652c4af25046c29f5c6e5fdb4e8fe5af505/litellm-1.67.4.post1.tar.gz", hash = "sha256:057f2505f82d8c3f83d705c375b0d1931de998b13e239a6b06e16ee351fda648", size = 7243930 } [[package]] name = "markupsafe" @@ -1912,15 +1804,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/e2/5d3f6ada4297caebe1a2add3b126fe800c96f56dbe5d1988a2cbe0b267aa/mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", size = 4695 }, ] -[[package]] -name = "natsort" -version = "8.4.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e2/a9/a0c57aee75f77794adaf35322f8b6404cbd0f89ad45c87197a937764b7d0/natsort-8.4.0.tar.gz", hash = "sha256:45312c4a0e5507593da193dedd04abb1469253b601ecaf63445ad80f0a1ea581", size = 76575 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ef/82/7a9d0550484a62c6da82858ee9419f3dd1ccc9aa1c26a1e43da3ecd20b0d/natsort-8.4.0-py3-none-any.whl", hash = "sha256:4732914fb471f56b5cce04d7bae6f164a592c7712e1c85f9ef585e197299521c", size = 38268 }, -] - [[package]] name = "nbclient" version = "0.10.2" @@ -2175,18 +2058,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, ] -[[package]] -name = "patsy" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/d1/81/74f6a65b848ffd16c18f920620ce999fe45fe27f01ab3911260ce4ed85e4/patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4", size = 396010 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/2b/b50d3d08ea0fc419c183a84210571eba005328efa62b6b98bc28e9ead32a/patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c", size = 232923 }, -] - [[package]] name = "pexpect" version = "4.9.0" @@ -2475,25 +2346,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/df/c3/b15fb833926d91d982fde29c0624c9f225da743c7af801dace0d4e187e71/pydantic_core-2.27.1-cp313-none-win_arm64.whl", hash = "sha256:45cf8588c066860b623cd11c4ba687f8d7175d5f7ef65f7129df8a394c502de5", size = 1882983 }, ] -[[package]] -name = "pydeseq2" -version = "0.5.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "anndata" }, - { name = "formulaic" }, - { name = "formulaic-contrasts" }, - { name = "matplotlib" }, - { name = "numpy" }, - { name = "pandas" }, - { name = "scikit-learn" }, - { name = "scipy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/f4/ec769167b69e4e9267e23563cc60282d1b44ed687609304642cbfb9c1ebc/pydeseq2-0.5.0.tar.gz", hash = "sha256:dec59cee6163c3dc5a333008afc52700c3678e8ebe08583724611110dd8db5bc", size = 51597 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/19/1d442931450c877474b3caf859323b59c841f5125aca4a9bbac2482e5f43/pydeseq2-0.5.0-py3-none-any.whl", hash = "sha256:6cd89bd3bdf48ec62cc0ffca8ca92a4ad59b2ea751be22b1f594ce2a53a58372", size = 46698 }, -] - [[package]] name = "pygments" version = "2.19.1" @@ -2869,34 +2721,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e8/a8/d71f44b93e3aa86ae232af1f2126ca7b95c0f515ec135462b3e1f351441c/ruff-0.9.6-py3-none-win_arm64.whl", hash = "sha256:0e2bb706a2be7ddfea4a4af918562fdc1bcb16df255e5fa595bbd800ce322a5a", size = 10177499 }, ] -[[package]] -name = "scikit-learn" -version = "1.6.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "joblib" }, - { name = "numpy" }, - { name = "scipy" }, - { name = "threadpoolctl" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/18/c797c9b8c10380d05616db3bfb48e2a3358c767affd0857d56c2eb501caa/scikit_learn-1.6.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:926f207c804104677af4857b2c609940b743d04c4c35ce0ddc8ff4f053cddc1b", size = 12104516 }, - { url = "https://files.pythonhosted.org/packages/c4/b7/2e35f8e289ab70108f8cbb2e7a2208f0575dc704749721286519dcf35f6f/scikit_learn-1.6.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2c2cae262064e6a9b77eee1c8e768fc46aa0b8338c6a8297b9b6759720ec0ff2", size = 11167837 }, - { url = "https://files.pythonhosted.org/packages/a4/f6/ff7beaeb644bcad72bcfd5a03ff36d32ee4e53a8b29a639f11bcb65d06cd/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1061b7c028a8663fb9a1a1baf9317b64a257fcb036dae5c8752b2abef31d136f", size = 12253728 }, - { url = "https://files.pythonhosted.org/packages/29/7a/8bce8968883e9465de20be15542f4c7e221952441727c4dad24d534c6d99/scikit_learn-1.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e69fab4ebfc9c9b580a7a80111b43d214ab06250f8a7ef590a4edf72464dd86", size = 13147700 }, - { url = "https://files.pythonhosted.org/packages/62/27/585859e72e117fe861c2079bcba35591a84f801e21bc1ab85bce6ce60305/scikit_learn-1.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:70b1d7e85b1c96383f872a519b3375f92f14731e279a7b4c6cfd650cf5dffc52", size = 11110613 }, - { url = "https://files.pythonhosted.org/packages/2e/59/8eb1872ca87009bdcdb7f3cdc679ad557b992c12f4b61f9250659e592c63/scikit_learn-1.6.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ffa1e9e25b3d93990e74a4be2c2fc61ee5af85811562f1288d5d055880c4322", size = 12010001 }, - { url = "https://files.pythonhosted.org/packages/9d/05/f2fc4effc5b32e525408524c982c468c29d22f828834f0625c5ef3d601be/scikit_learn-1.6.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:dc5cf3d68c5a20ad6d571584c0750ec641cc46aeef1c1507be51300e6003a7e1", size = 11096360 }, - { url = "https://files.pythonhosted.org/packages/c8/e4/4195d52cf4f113573fb8ebc44ed5a81bd511a92c0228889125fac2f4c3d1/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c06beb2e839ecc641366000ca84f3cf6fa9faa1777e29cf0c04be6e4d096a348", size = 12209004 }, - { url = "https://files.pythonhosted.org/packages/94/be/47e16cdd1e7fcf97d95b3cb08bde1abb13e627861af427a3651fcb80b517/scikit_learn-1.6.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8ca8cb270fee8f1f76fa9bfd5c3507d60c6438bbee5687f81042e2bb98e5a97", size = 13171776 }, - { url = "https://files.pythonhosted.org/packages/34/b0/ca92b90859070a1487827dbc672f998da95ce83edce1270fc23f96f1f61a/scikit_learn-1.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:7a1c43c8ec9fde528d664d947dc4c0789be4077a3647f232869f41d9bf50e0fb", size = 11071865 }, - { url = "https://files.pythonhosted.org/packages/12/ae/993b0fb24a356e71e9a894e42b8a9eec528d4c70217353a1cd7a48bc25d4/scikit_learn-1.6.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a17c1dea1d56dcda2fac315712f3651a1fea86565b64b48fa1bc090249cbf236", size = 11955804 }, - { url = "https://files.pythonhosted.org/packages/d6/54/32fa2ee591af44507eac86406fa6bba968d1eb22831494470d0a2e4a1eb1/scikit_learn-1.6.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6a7aa5f9908f0f28f4edaa6963c0a6183f1911e63a69aa03782f0d924c830a35", size = 11100530 }, - { url = "https://files.pythonhosted.org/packages/3f/58/55856da1adec655bdce77b502e94a267bf40a8c0b89f8622837f89503b5a/scikit_learn-1.6.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0650e730afb87402baa88afbf31c07b84c98272622aaba002559b614600ca691", size = 12433852 }, - { url = "https://files.pythonhosted.org/packages/ff/4f/c83853af13901a574f8f13b645467285a48940f185b690936bb700a50863/scikit_learn-1.6.1-cp313-cp313t-win_amd64.whl", hash = "sha256:3f59fe08dc03ea158605170eb52b22a105f238a5d512c4470ddeca71feae8e5f", size = 11337256 }, -] - [[package]] name = "scipy" version = "1.15.2" @@ -2935,20 +2759,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/c8/b3f566db71461cabd4b2d5b39bcc24a7e1c119535c8361f81426be39bb47/scipy-1.15.2-cp313-cp313t-win_amd64.whl", hash = "sha256:fe8a9eb875d430d81755472c5ba75e84acc980e4a8f6204d402849234d3017db", size = 40477705 }, ] -[[package]] -name = "seaborn" -version = "0.13.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "matplotlib" }, - { name = "numpy" }, - { name = "pandas" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 }, -] - [[package]] name = "send2trash" version = "1.8.3" @@ -2958,18 +2768,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/b0/4562db6223154aa4e22f939003cb92514c79f3d4dccca3444253fd17f902/Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9", size = 18072 }, ] -[[package]] -name = "session-info" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "stdlib-list" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/f5/dc/4a0c85aee2034be368d3ca293a563128122dde6db6e1bc9ca9ef3472c731/session_info-1.0.1.tar.gz", hash = "sha256:d71950d5a8ce7f7f7d5e86aa208c148c4e50b5440b77d5544d422b48e4f3ed41", size = 24663 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/c4/f6b7c0ec5241a2bde90c7ba1eca6ba44f8488bcedafe9072c79593015ec0/session_info-1.0.1-py3-none-any.whl", hash = "sha256:451d191e51816070b9f21a6ff3f6eb5d6015ae2738e8db63ac4e6398260a5838", size = 9119 }, -] - [[package]] name = "setuptools" version = "75.8.0" @@ -3074,42 +2872,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/61/f2b52e107b1fc8944b33ef56bf6ac4ebbe16d91b94d2b87ce013bf63fb84/starlette-0.45.3-py3-none-any.whl", hash = "sha256:dfb6d332576f136ec740296c7e8bb8c8a7125044e7c6da30744718880cdd059d", size = 71507 }, ] -[[package]] -name = "statsmodels" -version = "0.14.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "packaging" }, - { name = "pandas" }, - { name = "patsy" }, - { name = "scipy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1f/3b/963a015dd8ea17e10c7b0e2f14d7c4daec903baf60a017e756b57953a4bf/statsmodels-0.14.4.tar.gz", hash = "sha256:5d69e0f39060dc72c067f9bb6e8033b6dccdb0bae101d76a7ef0bcc94e898b67", size = 20354802 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f5/99/654fd41a9024643ee70b239e5ebc987bf98ce9fc2693bd550bee58136564/statsmodels-0.14.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5221dba7424cf4f2561b22e9081de85f5bb871228581124a0d1b572708545199", size = 10220508 }, - { url = "https://files.pythonhosted.org/packages/67/d8/ac30cf4cf97adaa48548be57e7cf02e894f31b45fd55bf9213358d9781c9/statsmodels-0.14.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:17672b30c6b98afe2b095591e32d1d66d4372f2651428e433f16a3667f19eabb", size = 9912317 }, - { url = "https://files.pythonhosted.org/packages/e0/77/2440d551eaf27f9c1d3650e13b3821a35ad5b21d3a19f62fb302af9203e8/statsmodels-0.14.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab5e6312213b8cfb9dca93dd46a0f4dccb856541f91d3306227c3d92f7659245", size = 10301662 }, - { url = "https://files.pythonhosted.org/packages/fa/e1/60a652f18996a40a7410aeb7eb476c18da8a39792c7effe67f06883e9852/statsmodels-0.14.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4bbb150620b53133d6cd1c5d14c28a4f85701e6c781d9b689b53681effaa655f", size = 10741763 }, - { url = "https://files.pythonhosted.org/packages/81/0c/2453eec3ac25e300847d9ed97f41156de145e507391ecb5ac989e111e525/statsmodels-0.14.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb695c2025d122a101c2aca66d2b78813c321b60d3a7c86bb8ec4467bb53b0f9", size = 10879534 }, - { url = "https://files.pythonhosted.org/packages/59/9a/e466a1b887a1441141e52dbcc98152f013d85076576da6eed2357f2016ae/statsmodels-0.14.4-cp312-cp312-win_amd64.whl", hash = "sha256:7f7917a51766b4e074da283c507a25048ad29a18e527207883d73535e0dc6184", size = 9823866 }, - { url = "https://files.pythonhosted.org/packages/31/f8/2662e6a101315ad336f75168fa9bac71f913ebcb92a6be84031d84a0f21f/statsmodels-0.14.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b5a24f5d2c22852d807d2b42daf3a61740820b28d8381daaf59dcb7055bf1a79", size = 10186886 }, - { url = "https://files.pythonhosted.org/packages/fa/c0/ee6e8ed35fc1ca9c7538c592f4974547bf72274bc98db1ae4a6e87481a83/statsmodels-0.14.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df4f7864606fa843d7e7c0e6af288f034a2160dba14e6ccc09020a3cf67cb092", size = 9880066 }, - { url = "https://files.pythonhosted.org/packages/d1/97/3380ca6d8fd66cfb3d12941e472642f26e781a311c355a4e97aab2ed0216/statsmodels-0.14.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91341cbde9e8bea5fb419a76e09114e221567d03f34ca26e6d67ae2c27d8fe3c", size = 10283521 }, - { url = "https://files.pythonhosted.org/packages/fe/2a/55c5b5c5e5124a202ea3fe0bcdbdeceaf91b4ec6164b8434acb9dd97409c/statsmodels-0.14.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1322286a7bfdde2790bf72d29698a1b76c20b8423a55bdcd0d457969d0041f72", size = 10723228 }, - { url = "https://files.pythonhosted.org/packages/4f/76/67747e49dc758daae06f33aad8247b718cd7d224f091d2cd552681215bb2/statsmodels-0.14.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e31b95ac603415887c9f0d344cb523889cf779bc52d68e27e2d23c358958fec7", size = 10859503 }, - { url = "https://files.pythonhosted.org/packages/1d/eb/cb8b01f5edf8f135eb3d0553d159db113a35b2948d0e51eeb735e7ae09ea/statsmodels-0.14.4-cp313-cp313-win_amd64.whl", hash = "sha256:81030108d27aecc7995cac05aa280cf8c6025f6a6119894eef648997936c2dd0", size = 9817574 }, -] - -[[package]] -name = "stdlib-list" -version = "0.11.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/5d/09/8d5c564931ae23bef17420a6c72618463a59222ca4291a7dd88de8a0d490/stdlib_list-0.11.1.tar.gz", hash = "sha256:95ebd1d73da9333bba03ccc097f5bac05e3aa03e6822a0c0290f87e1047f1857", size = 60442 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/88/c7/4102536de33c19d090ed2b04e90e7452e2e3dc653cf3323208034eaaca27/stdlib_list-0.11.1-py3-none-any.whl", hash = "sha256:9029ea5e3dfde8cd4294cfd4d1797be56a67fc4693c606181730148c3fd1da29", size = 83620 }, -] - [[package]] name = "tenacity" version = "9.0.0" @@ -3133,15 +2895,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/9e/2064975477fdc887e47ad42157e214526dcad8f317a948dee17e1659a62f/terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0", size = 14154 }, ] -[[package]] -name = "threadpoolctl" -version = "3.5.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bd/55/b5148dcbf72f5cde221f8bfe3b6a540da7aa1842f6b491ad979a6c8b84af/threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107", size = 41936 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4b/2c/ffbf7a134b9ab11a67b0cf0726453cedd9c5043a4fe7a35d1cefa9a1bcfb/threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467", size = 18414 }, -] - [[package]] name = "tiktoken" version = "0.9.0" From 8ab9263d80effda457d2de22e2d126f50bf05c65 Mon Sep 17 00:00:00 2001 From: Ludovico Mitchener Date: Wed, 7 May 2025 23:02:13 -0700 Subject: [PATCH 3/4] update --- tutorial/platform_api.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorial/platform_api.ipynb b/tutorial/platform_api.ipynb index c8f0eb5..4ee6cb3 100644 --- a/tutorial/platform_api.ipynb +++ b/tutorial/platform_api.ipynb @@ -22,7 +22,7 @@ "outputs": [], "source": [ "# Instantiate the FutureHouse client with your API key\n", - "FH_API_KEY = \"d6X5e7zz8E29Y+FtYCTBHg.platformv01.eyJqdGkiOiJmZTkzNmFjNS03YzliLTRjNGQtYWY0My0xY2YzYzMzODhiMGMiLCJzdWIiOiIxTEtRWE55M2dRYkRKR0hiWFNWT3NjYmpVTTYzIiwiaWF0IjoxNzQzMjE1NzcyfQ.BDLneLT1zuvVDjK9U4rDgqzp09MzuCb+V3Nkdkde9lQ\" # Add your API key here\n", + "FH_API_KEY = \"\" # Add your API key here\n", "JOB_NAME = \"job-futurehouse-data-analysis-crow\" # Don't change this\n", "UPLOAD_ID = (\n", " \"finch_tutorial\" # This is the folder name of the dataset you uploaded to GCS\n", From 4c8321695a81b36bbb19b11d6b0dc184db0b2b21 Mon Sep 17 00:00:00 2001 From: Ludovico Mitchener Date: Wed, 7 May 2025 23:46:25 -0700 Subject: [PATCH 4/4] Remove stage from tutorial --- tutorial/platform_api.ipynb | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tutorial/platform_api.ipynb b/tutorial/platform_api.ipynb index 4ee6cb3..df715d9 100644 --- a/tutorial/platform_api.ipynb +++ b/tutorial/platform_api.ipynb @@ -10,7 +10,7 @@ "import time\n", "\n", "from futurehouse_client import FutureHouseClient\n", - "from futurehouse_client.models import Stage, TaskRequest, RuntimeConfig\n", + "from futurehouse_client.models import TaskRequest, RuntimeConfig\n", "from futurehouse_client.models.app import AuthType\n", "import fhda.prompts as prompts" ] @@ -29,7 +29,6 @@ ")\n", "\n", "client = FutureHouseClient(\n", - " stage=Stage.DEV,\n", " auth_type=AuthType.API_KEY,\n", " api_key=FH_API_KEY,\n", ")"