Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,15 @@ dependencies = [
"pandas==2.2.3",
"numpy==2.2.3",
"matplotlib==3.10.0",
"scipy==1.15.2",
"seaborn==0.13.2",
"scikit-learn==1.6.1",
"statsmodels==0.14.4",
"aiofiles==24.1.0",
"google-auth==2.38.0",
"google-cloud-storage==3.0.0",
"google-cloud-secret-manager==2.23.0",
"futurehouse-client==0.3.15.dev71",
"futurehouse-client==0.3.18.dev25",
"jupyter==1.1.1",
"nbconvert==7.16.6",
"notebook==7.3.2",
"nbformat==5.10.4",
"pydeseq2==0.5.0"
"nbformat==5.10.4"
]
description = "Data analysis crow"
name = "fhda"
Expand Down
2 changes: 1 addition & 1 deletion src/fhda/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
# FutureHosue client config
ENVIRONMENT = os.getenv("ENVIRONMENT", "prod")
CROW_STAGE = getattr(Stage, ENVIRONMENT.upper(), Stage.PROD)
PLATFORM_API_KEY = os.getenv("CROW_API_KEY")
PLATFORM_API_KEY = os.getenv("CROW_API_KEY", None)
32 changes: 21 additions & 11 deletions src/fhda/data_analysis_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,10 @@ async def query_literature(self, query: str) -> str:
Args:
query: The scientific question to answer
"""

if cfg.PLATFORM_API_KEY is None:
raise Exception("Platform API key is not set")

logger.info("Running PQA query")
client = FutureHouseClient(
stage=cfg.CROW_STAGE,
Expand Down Expand Up @@ -197,27 +201,33 @@ def from_task(
user_id = "default_user"
if trajectory_id is None:
trajectory_id = f"{gcs_artifact_path}-{time.time()}"

if environment_config:
kwargs = {
k: v
for k, v in environment_config.items()
if k in cfg.VALID_FROM_TASK_KWARGS
}
else:
kwargs = {}
environment_config = {}
# Always create a new directory for the trajectory
trajectory_path = (
cfg.DATA_STORAGE_PATH / "user_trajectories" / user_id / trajectory_id
)
if environment_config.get("gcs_override", False):
data_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path
else:
data_path = (
cfg.DATA_STORAGE_PATH / "user_data" / user_id / gcs_artifact_path
)
logger.info("Trajectory path: %s", trajectory_path)
logger.info("Data path: %s", data_path)
trajectory_path.mkdir(parents=True, exist_ok=True)
for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir():
for item in data_path.iterdir():
if item.is_file():
shutil.copy2(item, trajectory_path)
elif item.is_dir():
shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)
if environment_config:
kwargs = {
k: v
for k, v in environment_config.items()
if k in cfg.VALID_FROM_TASK_KWARGS
}
else:
kwargs = {}
environment_config = {}
logger.info("Filtered kwargs: %s", kwargs)

language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))
Expand Down
7 changes: 1 addition & 6 deletions src/fhda/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@
variable_name <- read_excel("<fpath>.csv", col_names = FALSE, .name_repair = "minimal")
```

3. When printing dataframes, always wrap them in print() statements:
```r
print(head(dataframe))
```

4. Very important: always use the tidyverse package where possible.
3. Very important: always use the tidyverse package where possible.
"""


Expand Down
95 changes: 81 additions & 14 deletions src/scripts/deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@
)
from futurehouse_client.models.app import TaskQueuesConfig

HIGH = True
HIGH = False
ENVIRONMENT = "DEV"

ENV_VARS = {
"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
"ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
# "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
# "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
"USE_DOCKER": "false",
"STAGE": ENVIRONMENT,
"ENVIRONMENT": ENVIRONMENT,
Expand All @@ -36,17 +36,85 @@
TEMPERATURE = 1
NUM_RETRIES = 3

agent = AgentConfig(
agent_type="ReActAgent",
agent_kwargs={
# agent = AgentConfig(
# agent_type="ReActAgent",
# agent_kwargs={
# "llm_model": {
# "name": MODEL,
# "temperature": TEMPERATURE,
# "num_retries": NUM_RETRIES,
# },
# "hide_old_env_states": True,
# },
# )

AGENT_MODEL_LIST = [
{
"model_name": "anthropic/claude-3-7-sonnet-20250219",
"litellm_params": {
"model": "anthropic/claude-3-7-sonnet-20250219",
"api_key": os.environ["ANTHROPIC_API_KEY"],
},
},
{
"model_name": "openai/gpt-4.1-2025-04-14",
"litellm_params": {
"model": "openai/gpt-4.1-2025-04-14",
"api_key": os.environ["OPENAI_API_KEY"],
},
},
{
"model_name": "anthropic/claude-3-5-sonnet-20241022",
"litellm_params": {
"model": "anthropic/claude-3-5-sonnet-20241022",
"api_key": os.environ["ANTHROPIC_API_KEY"],
},
},
{
"model_name": "openai/gpt-4o-2024-11-20",
"litellm_params": {
"model": "openai/gpt-4o-2024-11-20",
"api_key": os.environ["OPENAI_API_KEY"],
},
},
]

AGENT_ROUTER_KWARGS = {
"set_verbose": True,
# fallback in list order if the main key fails
"fallbacks": [
{
"openai/gpt-4.1-2025-04-14": [
"anthropic/claude-3-7-sonnet-20250219",
"anthropic/claude-3-5-sonnet-20241022",
"openai/gpt-4o-2024-11-20",
]
}
],
}

AGENT_CONFIG = {
"agent_type": "ReActAgent",
"agent_kwargs": {
"llm_model": {
"name": MODEL,
"temperature": TEMPERATURE,
"num_retries": NUM_RETRIES,
"name": "anthropic/claude-3-7-sonnet-20250219",
"config": {
"model_list": AGENT_MODEL_LIST,
"router_kwargs": AGENT_ROUTER_KWARGS,
"fallbacks": [
{
"openai/gpt-4.1-2025-04-14": [
"anthropic/claude-3-7-sonnet-20250219",
"anthropic/claude-3-5-sonnet-20241022",
"openai/gpt-4o-2024-11-20",
]
}
],
},
},
"hide_old_env_states": True,
},
)
}

CROWS_TO_DEPLOY = [
JobDeploymentConfig(
Expand All @@ -55,8 +123,7 @@
name="data-analysis-crow-high" if HIGH else "data-analysis-crow",
environment="src.fhda.data_analysis_env.DataAnalysisEnv",
environment_variables=ENV_VARS,
# agent="ldp.agent.ReActAgent",
agent=agent,
agent=AgentConfig(**AGENT_CONFIG), # type: ignore
container_config=CONTAINER_CONFIG,
force=True,
frame_paths=frame_paths,
Expand All @@ -79,8 +146,8 @@ def rename_dockerfile(path: Path, new_name: str):

if __name__ == "__main__":
client = FutureHouseClient(
# stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])),
stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")),
stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])),
# stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")),
organization="FutureHouse",
auth_type=AuthType.API_KEY,
api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"],
Expand Down
Loading