Skip to content

Commit ec8be75

Browse files
committed
working gaia agent
1 parent 76958ee commit ec8be75

File tree

5 files changed

+91
-17
lines changed

5 files changed

+91
-17
lines changed

conf/gaia_agent.yaml

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
defaults:
2+
3+
- _self_
4+
15
_target_: tapeagents.agent.Agent
2-
name : web_agent
6+
name : gaia_agent
37
max_iterations: 2
4-
llms:
5-
default: ${llm}
68
templates:
79
system_prompt: |
810
You are an expert AI Agent trained to assist users with complex information processing tasks.
@@ -31,18 +33,18 @@ templates:
3133
nodes:
3234
- _target_: tapeagents.nodes.StandardNode
3335
name: plan
34-
system_prompt: ${agent.templates.system_prompt}
36+
system_prompt: ${templates.system_prompt}
3537
guidance: |
3638
Write a concise multi-step plan explaining which steps should be performed to find the answer for the given task.
3739
Remember that you can use web search, browser, python code execution and access the youtube videos to reach your goals.
3840
Be specific about how each step should be performed. Only describe the intended actions here, do not perform them yet.
3941
Consider that next steps may depend on results of previous steps, so include conditional branching using "if" statements where needed.
40-
${agent.templates.thought_format}
41-
steps_prompt: ${agent.templates.allowed_tools}
42+
${templates.thought_format}
43+
steps_prompt: ${templates.allowed_tools}
4244

4345
- _target_: tapeagents.nodes.StandardNode
4446
name: facts_survey
45-
system_prompt: ${agent.templates.system_prompt}
47+
system_prompt: ${templates.system_prompt}
4648
guidance: |
4749
Before we begin executing the plan, please answer the following pre-survey.
4850
Here is the pre-survey:
@@ -51,19 +53,19 @@ nodes:
5153
3. Please list any facts that may need to be derived (e.g., via logical deduction, simulation, or computation)
5254
4. Please list any facts that are recalled from memory, hunches, well-reasoned guesses, etc.
5355
When answering this survey, keep in mind that "facts" will typically be specific names, dates, statistics, etc.
54-
${agent.templates.thought_format}
55-
steps_prompt: ${agent.templates.allowed_tools}
56+
${templates.thought_format}
57+
steps_prompt: ${templates.allowed_tools}
5658

5759
- _target_: tapeagents.nodes.StandardNode
5860
name: act
59-
system_prompt: ${agent.templates.system_prompt}
61+
system_prompt: ${templates.system_prompt}
6062
guidance: |
6163
Produce single next step. If the answer is ready, produce gaia_answer_action.
62-
${agent.templates.format}
63-
steps_prompt: ${agent.templates.allowed_steps}
64+
${templates.format}
65+
steps_prompt: ${templates.allowed_steps}
6466
steps:
6567
- tapeagents.steps.ReasoningThought
66-
- examples.gaia_agent.steps.ExtractedFacts
67-
- examples.gaia_agent.steps.GaiaAnswer
68+
- agentlab.benchmarks.gaia.ExtractedFacts
69+
- agentlab.benchmarks.gaia.GaiaAnswer
6870
use_known_actions: true
6971
next_node: act

conf/llm/gpt4o.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
_target_: tapeagents.llms.LiteLLM
2+
model_name: gpt-4o-2024-08-06
3+
use_cache: false
4+
context_size: 128000
5+
parameters:
6+
temperature: 0.2

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ matplotlib
2626
ray[default]
2727
python-slugify
2828
pillow
29+
gymnasium>=0.27

src/agentlab/agents/tapeagent.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,11 @@
1515

1616
@dataclass
1717
class TapeAgentArgs(AgentArgs):
18-
config_name: str
18+
agent_name: str
1919

2020
def make_agent(self) -> bgym.Agent:
21-
with hydra.initialize(config_path="./conf"):
22-
config = hydra.compose(config_name=self.config_name)
21+
with hydra.initialize(config_path="../../../conf"):
22+
config = hydra.compose(config_name=self.agent_name)
2323
agent: Agent = hydra.utils.instantiate(config)
2424
return TapeAgent(agent=agent, tape=Tape(steps=[]))
2525

@@ -28,6 +28,11 @@ class TapeAgent(bgym.Agent):
2828
agent: Agent
2929
tape: Tape
3030

31+
def __init__(self, agent: Agent, tape: Tape):
32+
super().__init__()
33+
self.agent = agent
34+
self.tape = tape
35+
3136
def obs_preprocessor(self, obs: dict) -> Any:
3237
logger.info(f"Preprocessing observation: {obs}")
3338
return obs

src/agentlab/benchmarks/gaia.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
import os
2+
import shutil
23
from typing import Any, Literal
34

45
import bgym
56
import datasets
7+
from pydantic import Field
8+
from tapeagents.core import Observation, StopStep, Thought
69
from tapeagents.environment import ContainerExecutor
710
from tapeagents.tools.browser import Browser
811
from tapeagents.tools.code_executor import CodeExecutor
@@ -68,3 +71,60 @@ def init_code_sandbox(self) -> None:
6871
stop_container=False,
6972
no_deps=True,
7073
)
74+
75+
76+
class ExtractedFacts(Thought):
77+
"""
78+
Thought that contains the list of facts extracted from the document
79+
"""
80+
81+
kind: Literal["extracted_facts_thought"] = "extracted_facts_thought"
82+
extracted_facts: list[str] | dict[str, Any] | str = Field(
83+
description="facts extracted from the observation"
84+
)
85+
86+
87+
class GaiaQuestion(Observation):
88+
kind: Literal["question"] = "question"
89+
content: str
90+
filename: str | None = None
91+
92+
@classmethod
93+
def from_task(cls, question: dict):
94+
question_prompt = question["Question"]
95+
filename = None
96+
if question["file_name"]:
97+
basename = os.path.basename(question["file_name"])
98+
tmp_fname = f"/tmp/{basename}"
99+
shutil.copyfile(question["file_name"], tmp_fname)
100+
assert os.path.exists(tmp_fname)
101+
filename = tmp_fname
102+
return cls(content=question_prompt, filename=filename)
103+
104+
105+
class GaiaAnswer(StopStep):
106+
"""
107+
Action that indicates the agent has finished the plan and contains the answer or description of failure.
108+
The answer should use already determined facts without additional conversion!
109+
Your final answer should be a number OR as few words as possible OR a comma-separated list of numbers and/or strings.
110+
ADDITIONALLY, your final answer MUST follow any formatting instructions specified in the original question (e.g., alphabetization, sequencing, units, rounding, decimal places, etc.)
111+
If asked for a number, express it numerically, don't use commas, do not add anything after the number, don't include units such as $ or percent signs unless specified otherwise in the question.
112+
If asked for a string, don't use articles or abbreviations (e.g. for cities), unless specified otherwise. Don't output any final sentence punctuation such as '.', '!', or '?'.
113+
If asked for a comma-separated list, apply the above rules depending on whether the elements are numbers or strings.
114+
If unable to determine the final answer, output an empty string.
115+
"""
116+
117+
kind: Literal["gaia_answer_action"] = "gaia_answer_action"
118+
success: bool = Field(
119+
description="True if the task was successful, False otherwise"
120+
)
121+
overview: str = Field(
122+
description="List of steps performed to answer the question. If the task was not successful, includes the reason for failure"
123+
)
124+
answer_unit: str = Field(
125+
description="Unit of measurement for the answer, if applicable; otherwise an empty string"
126+
)
127+
answer: Any = Field(description="Short final answer")
128+
long_answer: str = Field(
129+
description="Detailed final answer not restricted by format rules"
130+
)

0 commit comments

Comments
 (0)