Skip to content

Commit 33ebfda

Browse files
authored
Misc changes (#18)
1 parent 5d70e7d commit 33ebfda

File tree

7 files changed

+217
-357
lines changed

7 files changed

+217
-357
lines changed

pyproject.toml

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,20 +13,15 @@ dependencies = [
1313
"pandas==2.2.3",
1414
"numpy==2.2.3",
1515
"matplotlib==3.10.0",
16-
"scipy==1.15.2",
17-
"seaborn==0.13.2",
18-
"scikit-learn==1.6.1",
19-
"statsmodels==0.14.4",
2016
"aiofiles==24.1.0",
2117
"google-auth==2.38.0",
2218
"google-cloud-storage==3.0.0",
2319
"google-cloud-secret-manager==2.23.0",
24-
"futurehouse-client==0.3.15.dev71",
20+
"futurehouse-client==0.3.18.dev25",
2521
"jupyter==1.1.1",
2622
"nbconvert==7.16.6",
2723
"notebook==7.3.2",
28-
"nbformat==5.10.4",
29-
"pydeseq2==0.5.0"
24+
"nbformat==5.10.4"
3025
]
3126
description = "Data analysis crow"
3227
name = "fhda"

src/fhda/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,4 @@
2525
# FutureHosue client config
2626
ENVIRONMENT = os.getenv("ENVIRONMENT", "prod")
2727
CROW_STAGE = getattr(Stage, ENVIRONMENT.upper(), Stage.PROD)
28-
PLATFORM_API_KEY = os.getenv("CROW_API_KEY")
28+
PLATFORM_API_KEY = os.getenv("CROW_API_KEY", None)

src/fhda/data_analysis_env.py

Lines changed: 21 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ async def query_literature(self, query: str) -> str:
9696
Args:
9797
query: The scientific question to answer
9898
"""
99+
100+
if cfg.PLATFORM_API_KEY is None:
101+
raise Exception("Platform API key is not set")
102+
99103
logger.info("Running PQA query")
100104
client = FutureHouseClient(
101105
stage=cfg.CROW_STAGE,
@@ -197,27 +201,33 @@ def from_task(
197201
user_id = "default_user"
198202
if trajectory_id is None:
199203
trajectory_id = f"{gcs_artifact_path}-{time.time()}"
200-
204+
if environment_config:
205+
kwargs = {
206+
k: v
207+
for k, v in environment_config.items()
208+
if k in cfg.VALID_FROM_TASK_KWARGS
209+
}
210+
else:
211+
kwargs = {}
212+
environment_config = {}
201213
# Always create a new directory for the trajectory
202214
trajectory_path = (
203215
cfg.DATA_STORAGE_PATH / "user_trajectories" / user_id / trajectory_id
204216
)
217+
if environment_config.get("gcs_override", False):
218+
data_path = cfg.DATA_STORAGE_PATH / gcs_artifact_path
219+
else:
220+
data_path = (
221+
cfg.DATA_STORAGE_PATH / "user_data" / user_id / gcs_artifact_path
222+
)
205223
logger.info("Trajectory path: %s", trajectory_path)
224+
logger.info("Data path: %s", data_path)
206225
trajectory_path.mkdir(parents=True, exist_ok=True)
207-
for item in (cfg.DATA_STORAGE_PATH / gcs_artifact_path).iterdir():
226+
for item in data_path.iterdir():
208227
if item.is_file():
209228
shutil.copy2(item, trajectory_path)
210229
elif item.is_dir():
211230
shutil.copytree(item, trajectory_path / item.name, dirs_exist_ok=True)
212-
if environment_config:
213-
kwargs = {
214-
k: v
215-
for k, v in environment_config.items()
216-
if k in cfg.VALID_FROM_TASK_KWARGS
217-
}
218-
else:
219-
kwargs = {}
220-
environment_config = {}
221231
logger.info("Filtered kwargs: %s", kwargs)
222232

223233
language = getattr(NBLanguage, environment_config.get("language", "PYTHON"))

src/fhda/prompts.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,7 @@
4040
variable_name <- read_excel("<fpath>.csv", col_names = FALSE, .name_repair = "minimal")
4141
```
4242
43-
3. When printing dataframes, always wrap them in print() statements:
44-
```r
45-
print(head(dataframe))
46-
```
47-
48-
4. Very important: always use the tidyverse package where possible.
43+
3. Very important: always use the tidyverse package where possible.
4944
"""
5045

5146

src/scripts/deploy.py

Lines changed: 81 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,12 @@
1212
)
1313
from futurehouse_client.models.app import TaskQueuesConfig
1414

15-
HIGH = True
15+
HIGH = False
1616
ENVIRONMENT = "DEV"
1717

1818
ENV_VARS = {
19-
"OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
20-
"ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
19+
# "OPENAI_API_KEY": os.environ["OPENAI_API_KEY"],
20+
# "ANTHROPIC_API_KEY": os.environ["ANTHROPIC_API_KEY"],
2121
"USE_DOCKER": "false",
2222
"STAGE": ENVIRONMENT,
2323
"ENVIRONMENT": ENVIRONMENT,
@@ -36,17 +36,85 @@
3636
TEMPERATURE = 1
3737
NUM_RETRIES = 3
3838

39-
agent = AgentConfig(
40-
agent_type="ReActAgent",
41-
agent_kwargs={
39+
# agent = AgentConfig(
40+
# agent_type="ReActAgent",
41+
# agent_kwargs={
42+
# "llm_model": {
43+
# "name": MODEL,
44+
# "temperature": TEMPERATURE,
45+
# "num_retries": NUM_RETRIES,
46+
# },
47+
# "hide_old_env_states": True,
48+
# },
49+
# )
50+
51+
AGENT_MODEL_LIST = [
52+
{
53+
"model_name": "anthropic/claude-3-7-sonnet-20250219",
54+
"litellm_params": {
55+
"model": "anthropic/claude-3-7-sonnet-20250219",
56+
"api_key": os.environ["ANTHROPIC_API_KEY"],
57+
},
58+
},
59+
{
60+
"model_name": "openai/gpt-4.1-2025-04-14",
61+
"litellm_params": {
62+
"model": "openai/gpt-4.1-2025-04-14",
63+
"api_key": os.environ["OPENAI_API_KEY"],
64+
},
65+
},
66+
{
67+
"model_name": "anthropic/claude-3-5-sonnet-20241022",
68+
"litellm_params": {
69+
"model": "anthropic/claude-3-5-sonnet-20241022",
70+
"api_key": os.environ["ANTHROPIC_API_KEY"],
71+
},
72+
},
73+
{
74+
"model_name": "openai/gpt-4o-2024-11-20",
75+
"litellm_params": {
76+
"model": "openai/gpt-4o-2024-11-20",
77+
"api_key": os.environ["OPENAI_API_KEY"],
78+
},
79+
},
80+
]
81+
82+
AGENT_ROUTER_KWARGS = {
83+
"set_verbose": True,
84+
# fallback in list order if the main key fails
85+
"fallbacks": [
86+
{
87+
"openai/gpt-4.1-2025-04-14": [
88+
"anthropic/claude-3-7-sonnet-20250219",
89+
"anthropic/claude-3-5-sonnet-20241022",
90+
"openai/gpt-4o-2024-11-20",
91+
]
92+
}
93+
],
94+
}
95+
96+
AGENT_CONFIG = {
97+
"agent_type": "ReActAgent",
98+
"agent_kwargs": {
4299
"llm_model": {
43-
"name": MODEL,
44-
"temperature": TEMPERATURE,
45-
"num_retries": NUM_RETRIES,
100+
"name": "anthropic/claude-3-7-sonnet-20250219",
101+
"config": {
102+
"model_list": AGENT_MODEL_LIST,
103+
"router_kwargs": AGENT_ROUTER_KWARGS,
104+
"fallbacks": [
105+
{
106+
"openai/gpt-4.1-2025-04-14": [
107+
"anthropic/claude-3-7-sonnet-20250219",
108+
"anthropic/claude-3-5-sonnet-20241022",
109+
"openai/gpt-4o-2024-11-20",
110+
]
111+
}
112+
],
113+
},
46114
},
47115
"hide_old_env_states": True,
48116
},
49-
)
117+
}
50118

51119
CROWS_TO_DEPLOY = [
52120
JobDeploymentConfig(
@@ -55,8 +123,7 @@
55123
name="data-analysis-crow-high" if HIGH else "data-analysis-crow",
56124
environment="src.fhda.data_analysis_env.DataAnalysisEnv",
57125
environment_variables=ENV_VARS,
58-
# agent="ldp.agent.ReActAgent",
59-
agent=agent,
126+
agent=AgentConfig(**AGENT_CONFIG), # type: ignore
60127
container_config=CONTAINER_CONFIG,
61128
force=True,
62129
frame_paths=frame_paths,
@@ -79,8 +146,8 @@ def rename_dockerfile(path: Path, new_name: str):
79146

80147
if __name__ == "__main__":
81148
client = FutureHouseClient(
82-
# stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])),
83-
stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")),
149+
stage=Stage.from_string(os.environ.get("CROW_ENV", ENV_VARS["STAGE"])),
150+
# stage=Stage.from_string(os.environ.get("CROW_ENV", "LOCAL")),
84151
organization="FutureHouse",
85152
auth_type=AuthType.API_KEY,
86153
api_key=os.environ[f"CROW_API_KEY_{ENV_VARS['STAGE']}"],

0 commit comments

Comments
 (0)