1+ import json
12import re
23import ast
34import time
910from rich .console import Group
1011from rich .text import Text
1112
12- from smolagents .agents import CodeAgent , handle_agent_output_types , AgentError
13+ from smolagents .agents import CodeAgent , handle_agent_output_types , AgentError , ActionOutput , RunResult
1314from smolagents .local_python_executor import fix_final_answer_code
1415from smolagents .memory import ActionStep , PlanningStep , FinalAnswerStep , ToolCall , TaskStep , SystemPromptStep
15- from smolagents .models import ChatMessage
16- from smolagents .monitoring import LogLevel
17- from smolagents .utils import AgentExecutionError , AgentGenerationError , truncate_content
16+ from smolagents .models import ChatMessage , CODEAGENT_RESPONSE_FORMAT
17+ from smolagents .monitoring import LogLevel , Timing , YELLOW_HEX , TokenUsage
18+ from smolagents .utils import AgentExecutionError , AgentGenerationError , truncate_content , AgentMaxStepsError , \
19+ extract_code_from_text
1820
1921from ..utils .observer import MessageObserver , ProcessType
2022from jinja2 import Template , StrictUndefined
@@ -125,13 +127,17 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
125127
126128 # Add new step in logs
127129 memory_step .model_input_messages = input_messages
130+ stop_sequences = ["<END_CODE>" , "Observation:" , "Calling tools:" , "<END_CODE" ]
131+
128132 try :
129- additional_args = {
130- "grammar" : self .grammar } if self .grammar is not None else {}
133+ additional_args : dict [str , Any ] = {}
134+ if self ._use_structured_outputs_internally :
135+ additional_args ["response_format" ] = CODEAGENT_RESPONSE_FORMAT
131136 chat_message : ChatMessage = self .model (input_messages ,
132- stop_sequences = [ "<END_CODE>" , "Observation:" , "Calling tools:" , "<END_CODE" ] , ** additional_args )
137+ stop_sequences = stop_sequences , ** additional_args )
133138 memory_step .model_output_message = chat_message
134139 model_output = chat_message .content
140+ memory_step .token_usage = chat_message .token_usage
135141 memory_step .model_output = model_output
136142
137143 self .logger .log_markdown (
@@ -145,7 +151,13 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
145151
146152 # Parse
147153 try :
148- code_action = fix_final_answer_code (parse_code_blobs (model_output ))
154+ if self ._use_structured_outputs_internally :
155+ code_action = json .loads (model_output )["code" ]
156+ code_action = extract_code_from_text (code_action , self .code_block_tags ) or code_action
157+ else :
158+ code_action = parse_code_blobs (model_output )
159+ code_action = fix_final_answer_code (code_action )
160+ memory_step .code_action = code_action
149161 # Record parsing results
150162 self .observer .add_message (
151163 self .agent_name , ProcessType .PARSE , code_action )
@@ -155,26 +167,29 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
155167 content = model_output , title = "AGENT FINAL ANSWER" , level = LogLevel .INFO )
156168 raise FinalAnswerError ()
157169
158- memory_step .tool_calls = [
159- ToolCall (name = "python_interpreter" , arguments = code_action , id = f"call_{ len (self .memory .steps )} " , )]
170+ tool_call = ToolCall (
171+ name = "python_interpreter" ,
172+ arguments = code_action ,
173+ id = f"call_{ len (self .memory .steps )} " ,
174+ )
175+ memory_step .tool_calls = [tool_call ]
160176
161177 # Execute
162178 self .logger .log_code (title = "Executing parsed code:" ,
163179 content = code_action , level = LogLevel .INFO )
164- is_final_answer = False
165180 try :
166- output , execution_logs , is_final_answer = self .python_executor (
167- code_action )
168-
181+ code_output = self .python_executor (code_action )
169182 execution_outputs_console = []
170- if len (execution_logs ) > 0 :
183+ if len (code_output . logs ) > 0 :
171184 # Record execution results
172185 self .observer .add_message (
173- self .agent_name , ProcessType .EXECUTION_LOGS , f"{ execution_logs } " )
186+ self .agent_name , ProcessType .EXECUTION_LOGS , f"{ code_output . logs } " )
174187
175188 execution_outputs_console += [
176- Text ("Execution logs:" , style = "bold" ), Text (execution_logs ), ]
177- observation = "Execution logs:\n " + execution_logs
189+ Text ("Execution logs:" , style = "bold" ),
190+ Text (code_output .logs ),
191+ ]
192+ observation = "Execution logs:\n " + code_output .logs
178193 except Exception as e :
179194 if hasattr (self .python_executor , "state" ) and "_print_outputs" in self .python_executor .state :
180195 execution_logs = str (
@@ -196,20 +211,24 @@ def _step_stream(self, memory_step: ActionStep) -> Generator[Any]:
196211 level = LogLevel .INFO , )
197212 raise AgentExecutionError (error_msg , self .logger )
198213
199- truncated_output = truncate_content (str (output ))
200- if output is not None :
214+ truncated_output = None
215+ if code_output is not None and code_output .output is not None :
216+ truncated_output = truncate_content (str (code_output .output ))
201217 observation += "Last output from code snippet:\n " + truncated_output
202218 memory_step .observations = observation
203219
204- execution_outputs_console += [
205- Text (f"{ ('Out - Final answer' if is_final_answer else 'Out' )} : { truncated_output } " ,
206- style = ("bold #d4b702" if is_final_answer else "" ), ), ]
220+ if not code_output .is_final_answer and truncated_output is not None :
221+ execution_outputs_console += [
222+ Text (
223+ f"Out: { truncated_output } " ,
224+ ),
225+ ]
207226 self .logger .log (Group (* execution_outputs_console ), level = LogLevel .INFO )
208- memory_step .action_output = output
209- yield output if is_final_answer else None
227+ memory_step .action_output = code_output . output
228+ yield ActionOutput ( output = code_output . output , is_final_answer = code_output . is_final_answer )
210229
211230 def run (self , task : str , stream : bool = False , reset : bool = True , images : Optional [List [str ]] = None ,
212- additional_args : Optional [Dict ] = None , max_steps : Optional [int ] = None , ):
231+ additional_args : Optional [Dict ] = None , max_steps : Optional [int ] = None , return_full_result : bool | None = None ):
213232 """
214233 Run the agent for the given task.
215234
@@ -220,6 +239,8 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
220239 images (`list[str]`, *optional*): Paths to image(s).
221240 additional_args (`dict`, *optional*): Any other variables that you want to pass to the agent run, for instance images or dataframes. Give them clear names!
222241 max_steps (`int`, *optional*): Maximum number of steps the agent can take to solve the task. if not provided, will use the agent's default value.
242+ return_full_result (`bool`, *optional*): Whether to return the full [`RunResult`] object or just the final answer output.
243+ If `None` (default), the agent's `self.return_full_result` setting is used.
223244
224245 Example:
225246 ```py
@@ -236,7 +257,6 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
236257You have been provided with these additional arguments, that you can access using the keys as variables in your python code:
237258{ str (additional_args )} ."""
238259
239- self .system_prompt = self .initialize_system_prompt ()
240260 self .memory .system_prompt = SystemPromptStep (
241261 system_prompt = self .system_prompt )
242262 if reset :
@@ -261,8 +281,47 @@ def run(self, task: str, stream: bool = False, reset: bool = True, images: Optio
261281 if stream :
262282 # The steps are returned as they are executed through a generator to iterate on.
263283 return self ._run_stream (task = self .task , max_steps = max_steps , images = images )
284+ run_start_time = time .time ()
285+ steps = list (self ._run_stream (task = self .task , max_steps = max_steps , images = images ))
286+
264287 # Outputs are returned only at the end. We only look at the last step.
265- return list (self ._run_stream (task = self .task , max_steps = max_steps , images = images ))[- 1 ].final_answer
288+ assert isinstance (steps [- 1 ], FinalAnswerStep )
289+ output = steps [- 1 ].output
290+
291+ return_full_result = return_full_result if return_full_result is not None else self .return_full_result
292+ if return_full_result :
293+ total_input_tokens = 0
294+ total_output_tokens = 0
295+ correct_token_usage = True
296+ for step in self .memory .steps :
297+ if isinstance (step , (ActionStep , PlanningStep )):
298+ if step .token_usage is None :
299+ correct_token_usage = False
300+ break
301+ else :
302+ total_input_tokens += step .token_usage .input_tokens
303+ total_output_tokens += step .token_usage .output_tokens
304+ if correct_token_usage :
305+ token_usage = TokenUsage (input_tokens = total_input_tokens , output_tokens = total_output_tokens )
306+ else :
307+ token_usage = None
308+
309+ if self .memory .steps and isinstance (getattr (self .memory .steps [- 1 ], "error" , None ), AgentMaxStepsError ):
310+ state = "max_steps_error"
311+ else :
312+ state = "success"
313+
314+ step_dicts = self .memory .get_full_steps ()
315+
316+ return RunResult (
317+ output = output ,
318+ token_usage = token_usage ,
319+ steps = step_dicts ,
320+ timing = Timing (start_time = run_start_time , end_time = time .time ()),
321+ state = state ,
322+ )
323+
324+ return output
266325
267326 def __call__ (self , task : str , ** kwargs ):
268327 """Adds additional prompting for the managed agent, runs it, and wraps the output.
@@ -271,7 +330,11 @@ def __call__(self, task: str, **kwargs):
271330 full_task = Template (self .prompt_templates ["managed_agent" ]["task" ], undefined = StrictUndefined ).render ({
272331 "name" : self .name , "task" : task , ** self .state
273332 })
274- report = self .run (full_task , ** kwargs )
333+ result = self .run (full_task , ** kwargs )
334+ if isinstance (result , RunResult ):
335+ report = result .output
336+ else :
337+ report = result
275338
276339 # When a sub-agent finishes running, return a marker
277340 try :
@@ -286,7 +349,7 @@ def __call__(self, task: str, **kwargs):
286349 if self .provide_run_summary :
287350 answer += "\n \n For more detail, find below a summary of this agent's work:\n <summary_of_work>\n "
288351 for message in self .write_memory_to_messages (summary_mode = True ):
289- content = message [ " content" ]
352+ content = message . content
290353 answer += "\n " + truncate_content (str (content )) + "\n ---"
291354 answer += "\n </summary_of_work>"
292355 return answer
@@ -295,37 +358,52 @@ def _run_stream(
295358 self , task : str , max_steps : int , images : list ["PIL.Image.Image" ] | None = None
296359 ) -> Generator [ActionStep | PlanningStep | FinalAnswerStep ]:
297360 final_answer = None
361+ action_step = None
298362 self .step_number = 1
299- while final_answer is None and self .step_number <= max_steps and not self .stop_event .is_set ():
363+ returned_final_answer = False
364+ while not returned_final_answer and self .step_number <= max_steps and not self .stop_event .is_set ():
300365 step_start_time = time .time ()
301366
302367 action_step = ActionStep (
303- step_number = self .step_number , start_time = step_start_time , observations_images = images
368+ step_number = self .step_number , timing = Timing ( start_time = step_start_time ) , observations_images = images
304369 )
305370 try :
306- for el in self ._execute_step (action_step ):
307- yield el
308- final_answer = el
371+ for output in self ._step_stream (action_step ):
372+ yield output
373+
374+ if isinstance (output , ActionOutput ) and output .is_final_answer :
375+ final_answer = output .output
376+ self .logger .log (
377+ Text (f"Final answer: { final_answer } " , style = f"bold { YELLOW_HEX } " ),
378+ level = LogLevel .INFO ,
379+ )
380+
381+ if self .final_answer_checks :
382+ self ._validate_final_answer (final_answer )
383+ returned_final_answer = True
384+ action_step .is_final_answer = True
385+
309386 except FinalAnswerError :
310387 # When the model does not output code, directly treat the large model content as the final answer
311388 final_answer = action_step .model_output
312389 if isinstance (final_answer , str ):
313390 final_answer = convert_code_format (final_answer )
391+ returned_final_answer = True
392+ action_step .is_final_answer = True
314393
315394 except AgentError as e :
316395 action_step .error = e
317396
318397 finally :
319- self ._finalize_step (action_step , step_start_time )
398+ self ._finalize_step (action_step )
320399 self .memory .steps .append (action_step )
321400 yield action_step
322401 self .step_number += 1
323402
324403 if self .stop_event .is_set ():
325404 final_answer = "<user_break>"
326405
327- if final_answer is None and self .step_number == max_steps + 1 :
328- final_answer = self ._handle_max_steps_reached (
329- task , images , step_start_time )
406+ if not returned_final_answer and self .step_number == max_steps + 1 :
407+ final_answer = self ._handle_max_steps_reached (task )
330408 yield action_step
331409 yield FinalAnswerStep (handle_agent_output_types (final_answer ))
0 commit comments