99from typing import Any , Literal
1010
1111import datasets
12+ from pdf2image import convert_from_path
1213from pydantic import Field
1314from tapeagents .core import Action , Observation , Step , StopStep , Thought
1415from tapeagents .environment import ContainerExecutor , StatefulTool , Tool
1516from tapeagents .steps import ImageObservation
1617from tapeagents .tools .browser import Browser
1718from tapeagents .tools .code_executor import CodeExecutor
1819from tapeagents .tools .media_reader import VideoReader
20+ from tapeagents .tools .simple_browser import SimpleTextBrowser
1921from tapeagents .tools .web_search import WebSearch
2022
2123from agentlab .benchmarks .abstract_env import AbstractBenchmark , AbstractEnvArgs
@@ -39,11 +41,7 @@ def reset(self, seed=None) -> tuple[list[Observation], dict]:
3941 Reset the state of all the tools and prepare initial observations from the task again
4042 """
4143 super ().reset ()
42- question = GaiaQuestion .from_task (self .task )
43- steps : list [Observation ] = [question ]
44- if image_obs := with_image (question ):
45- steps .append (image_obs )
46- return steps , {}
44+ return task_to_observations (self .task ), {}
4745
4846 def calculate_reward (self , action : Action ) -> float :
4947 if isinstance (action , GaiaAnswer ):
@@ -148,24 +146,81 @@ class GaiaQuestion(Observation):
148146 filename : str | None = None
149147
150148 @classmethod
151- def from_task (cls , question : dict ):
149+ def from_task (cls , question : dict , files_dir : str = "/tmp/gaia_files" ):
150+ os .makedirs (files_dir , exist_ok = True )
152151 question_prompt = question ["Question" ]
153152 filename = None
154153 if question ["file_path" ]:
155154 basename = os .path .basename (question ["file_path" ])
156- tmp_fname = f"/tmp/ { basename } "
155+ tmp_fname = os . path . join ( files_dir , basename )
157156 shutil .copyfile (question ["file_path" ], tmp_fname )
158157 assert os .path .exists (tmp_fname )
159158 filename = tmp_fname
160159 return cls (content = question_prompt , filename = filename )
161160
162161
163- def with_image (question : GaiaQuestion ) -> ImageObservation | None :
164- if question .filename and question .filename .endswith ((".png" , ".jpg" , ".jpeg" )):
165- return ImageObservation (
166- image_path = question .filename ,
167- image_caption = "Attached image" ,
168- )
162+ def task_to_observations (task : dict , max_doc_length : int = 8000 ) -> list [Observation ]:
163+ browser = SimpleTextBrowser ()
164+ question = GaiaQuestion .from_task (task )
165+ if not question .filename :
166+ return [question ]
167+
168+ filename : str | None = question .filename
169+ question .filename = None
170+ steps : list [Observation ] = []
171+ name , ext = filename .rsplit ("." , maxsplit = 1 )
172+ ext = ext .lower ()
173+ if ext == "zip" :
174+ folder_name = name
175+ os .makedirs (folder_name , exist_ok = True )
176+ shutil .unpack_archive (filename , folder_name )
177+ document_text = "\n \n Archive contains the following files:\n "
178+ for i , file in enumerate (os .listdir (folder_name )):
179+ file_path = os .path .join (folder_name , file )
180+ content = browser .get_whole_document (file_path )
181+ file_text = f"{ i + 1 } . { file } . Content:\n { content } \n \n "
182+ if len (file_text ) > max_doc_length :
183+ file_text = ""
184+ file_text += f"{ i + 1 } . Path to the '{ file } ': { file_path } "
185+ document_text += file_text
186+ elif ext in ("png" , "jpg" , "jpeg" ):
187+ steps .append (ImageObservation (image_path = filename , image_caption = "Attached image" ))
188+ document_text = ""
189+ else :
190+ attach_doc_text = True
191+ if ext == "pdf" :
192+ images , total_pages = pdf_to_images (filename )
193+ if total_pages <= 3 :
194+ attach_doc_text = False
195+ for i , img_path in enumerate (images ):
196+ steps .append (ImageObservation (image_path = img_path , image_caption = f"PDF page { i + 1 } " ))
197+ if attach_doc_text :
198+ try :
199+ content = browser .get_whole_document (filename )
200+ except Exception as e :
201+ logger .exception (f"Failed to read document: { e } " )
202+ content = ""
203+ document_text = f"\n \n Attached { ext .upper ()} file content:\n { content } \n "
204+ if not len (content ) or len (document_text ) > max_doc_length :
205+ document_text = ""
206+ else :
207+ document_text = "\n Document pages attached as images below"
208+ question .filename = filename
209+ question .content += document_text
210+ return [question ] + steps
211+
212+
213+ def pdf_to_images (filename : str , n_pages : int = 3 ):
214+ images = []
215+ for i , image in enumerate (convert_from_path (filename )):
216+ page_index = i + 1
217+ page_fname = filename [:- 4 ] + f"_{ page_index } .png"
218+ if os .path .exists (page_fname ):
219+ images .append (page_fname )
220+ continue
221+ image .save (page_fname )
222+ images .append (page_fname )
223+ return images [:n_pages ], len (images )
169224
170225
171226class GaiaAnswer (StopStep ):
0 commit comments