Skip to content

Commit e98a0c2

Browse files
committed
render attached gaia task files into steps
1 parent 3fd383d commit e98a0c2

File tree

1 file changed

+68
-13
lines changed

1 file changed

+68
-13
lines changed

src/agentlab/benchmarks/gaia.py

Lines changed: 68 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99
from typing import Any, Literal
1010

1111
import datasets
12+
from pdf2image import convert_from_path
1213
from pydantic import Field
1314
from tapeagents.core import Action, Observation, Step, StopStep, Thought
1415
from tapeagents.environment import ContainerExecutor, StatefulTool, Tool
1516
from tapeagents.steps import ImageObservation
1617
from tapeagents.tools.browser import Browser
1718
from tapeagents.tools.code_executor import CodeExecutor
1819
from tapeagents.tools.media_reader import VideoReader
20+
from tapeagents.tools.simple_browser import SimpleTextBrowser
1921
from tapeagents.tools.web_search import WebSearch
2022

2123
from agentlab.benchmarks.abstract_env import AbstractBenchmark, AbstractEnvArgs
@@ -39,11 +41,7 @@ def reset(self, seed=None) -> tuple[list[Observation], dict]:
3941
Reset the state of all the tools and prepare initial observations from the task again
4042
"""
4143
super().reset()
42-
question = GaiaQuestion.from_task(self.task)
43-
steps: list[Observation] = [question]
44-
if image_obs := with_image(question):
45-
steps.append(image_obs)
46-
return steps, {}
44+
return task_to_observations(self.task), {}
4745

4846
def calculate_reward(self, action: Action) -> float:
4947
if isinstance(action, GaiaAnswer):
@@ -148,24 +146,81 @@ class GaiaQuestion(Observation):
148146
filename: str | None = None
149147

150148
@classmethod
151-
def from_task(cls, question: dict):
149+
def from_task(cls, question: dict, files_dir: str = "/tmp/gaia_files"):
150+
os.makedirs(files_dir, exist_ok=True)
152151
question_prompt = question["Question"]
153152
filename = None
154153
if question["file_path"]:
155154
basename = os.path.basename(question["file_path"])
156-
tmp_fname = f"/tmp/{basename}"
155+
tmp_fname = os.path.join(files_dir, basename)
157156
shutil.copyfile(question["file_path"], tmp_fname)
158157
assert os.path.exists(tmp_fname)
159158
filename = tmp_fname
160159
return cls(content=question_prompt, filename=filename)
161160

162161

163-
def with_image(question: GaiaQuestion) -> ImageObservation | None:
164-
if question.filename and question.filename.endswith((".png", ".jpg", ".jpeg")):
165-
return ImageObservation(
166-
image_path=question.filename,
167-
image_caption="Attached image",
168-
)
162+
def task_to_observations(task: dict, max_doc_length: int = 8000) -> list[Observation]:
163+
browser = SimpleTextBrowser()
164+
question = GaiaQuestion.from_task(task)
165+
if not question.filename:
166+
return [question]
167+
168+
filename: str | None = question.filename
169+
question.filename = None
170+
steps: list[Observation] = []
171+
name, ext = filename.rsplit(".", maxsplit=1)
172+
ext = ext.lower()
173+
if ext == "zip":
174+
folder_name = name
175+
os.makedirs(folder_name, exist_ok=True)
176+
shutil.unpack_archive(filename, folder_name)
177+
document_text = "\n\nArchive contains the following files:\n"
178+
for i, file in enumerate(os.listdir(folder_name)):
179+
file_path = os.path.join(folder_name, file)
180+
content = browser.get_whole_document(file_path)
181+
file_text = f"{i+1}. {file}. Content:\n{content}\n\n"
182+
if len(file_text) > max_doc_length:
183+
file_text = ""
184+
file_text += f"{i+1}. Path to the '{file}': {file_path}"
185+
document_text += file_text
186+
elif ext in ("png", "jpg", "jpeg"):
187+
steps.append(ImageObservation(image_path=filename, image_caption="Attached image"))
188+
document_text = ""
189+
else:
190+
attach_doc_text = True
191+
if ext == "pdf":
192+
images, total_pages = pdf_to_images(filename)
193+
if total_pages <= 3:
194+
attach_doc_text = False
195+
for i, img_path in enumerate(images):
196+
steps.append(ImageObservation(image_path=img_path, image_caption=f"PDF page {i+1}"))
197+
if attach_doc_text:
198+
try:
199+
content = browser.get_whole_document(filename)
200+
except Exception as e:
201+
logger.exception(f"Failed to read document: {e}")
202+
content = ""
203+
document_text = f"\n\nAttached {ext.upper()} file content:\n{content}\n"
204+
if not len(content) or len(document_text) > max_doc_length:
205+
document_text = ""
206+
else:
207+
document_text = "\nDocument pages attached as images below"
208+
question.filename = filename
209+
question.content += document_text
210+
return [question] + steps
211+
212+
213+
def pdf_to_images(filename: str, n_pages: int = 3):
214+
images = []
215+
for i, image in enumerate(convert_from_path(filename)):
216+
page_index = i + 1
217+
page_fname = filename[:-4] + f"_{page_index}.png"
218+
if os.path.exists(page_fname):
219+
images.append(page_fname)
220+
continue
221+
image.save(page_fname)
222+
images.append(page_fname)
223+
return images[:n_pages], len(images)
169224

170225

171226
class GaiaAnswer(StopStep):

0 commit comments

Comments
 (0)