feat: Support both output_schema and tools at the same time in LlmAgent

seanzhougoogle · copybara-github · commit af635674b5d3 · 2025-08-08T10:55:22.000-07:00
1. Allow developers to specify output schema and tools together.
2. If both are specified, do the following:
  2.1 Do not set output schema on the model config
  2.2 Add a special tool called set_model_response(result)
  2.3 `result` has the same schema as the requested output_schema
  2.4 Instruct the model to use set_model_response() to output its final result, rather than output text directly.
  2.5 When the set_model_response() is called, ADK will extract its content and put it in a text part, so the client would treat it as the model response.

PiperOrigin-RevId: 792686011
diff --git a/src/google/adk/agents/llm_agent.py b/src/google/adk/agents/llm_agent.py
@@ -499,12 +499,6 @@ def __check_output_schema(self):
           ' sub_agents must be empty to disable agent transfer.'
       )
 
-    if self.tools:
-      raise ValueError(
-          f'Invalid config for agent {self.name}: if output_schema is set,'
-          ' tools must be empty'
-      )
-
   @field_validator('generate_content_config', mode='after')
   @classmethod
   def __validate_generate_content_config(
diff --git a/src/google/adk/flows/llm_flows/_output_schema_processor.py b/src/google/adk/flows/llm_flows/_output_schema_processor.py
@@ -0,0 +1,112 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Handles output schema when tools are also present."""
+
+from __future__ import annotations
+
+import json
+from typing import AsyncGenerator
+
+from typing_extensions import override
+
+from ...agents.invocation_context import InvocationContext
+from ...events.event import Event
+from ...models.llm_request import LlmRequest
+from ...tools.set_model_response_tool import SetModelResponseTool
+from ._base_llm_processor import BaseLlmRequestProcessor
+
+
+class _OutputSchemaRequestProcessor(BaseLlmRequestProcessor):
+  """Processor that handles output schema for agents with tools."""
+
+  @override
+  async def run_async(
+      self, invocation_context: InvocationContext, llm_request: LlmRequest
+  ) -> AsyncGenerator[Event, None]:
+    from ...agents.llm_agent import LlmAgent
+
+    agent = invocation_context.agent
+    if not isinstance(agent, LlmAgent):
+      return
+
+    # Check if we need the processor: output_schema + tools
+    if not agent.output_schema or not agent.tools:
+      return
+
+    # Add the set_model_response tool to handle structured output
+    set_response_tool = SetModelResponseTool(agent.output_schema)
+    llm_request.append_tools([set_response_tool])
+
+    # Add instruction about using the set_model_response tool
+    instruction = (
+        'IMPORTANT: You have access to other tools, but you must provide '
+        'your final response using the set_model_response tool with the '
+        'required structured format. After using any other tools needed '
+        'to complete the task, always call set_model_response with your '
+        'final answer in the specified schema format.'
+    )
+    llm_request.append_instructions([instruction])
+
+    return
+    yield  # Generator requires yield statement in function body.
+
+
+def create_final_model_response_event(
+    invocation_context: InvocationContext, json_response: str
+) -> Event:
+  """Create a final model response event from set_model_response JSON.
+
+  Args:
+    invocation_context: The invocation context.
+    json_response: The JSON response from set_model_response tool.
+
+  Returns:
+    A new Event that looks like a normal model response.
+  """
+  from google.genai import types
+
+  # Create a proper model response event
+  final_event = Event(author=invocation_context.agent.name)
+  final_event.content = types.Content(
+      role='model', parts=[types.Part(text=json_response)]
+  )
+  return final_event
+
+
+def get_structured_model_response(function_response_event: Event) -> str | None:
+  """Check if function response contains set_model_response and extract JSON.
+
+  Args:
+    function_response_event: The function response event to check.
+
+  Returns:
+    JSON response string if set_model_response was called, None otherwise.
+  """
+  if (
+      not function_response_event
+      or not function_response_event.get_function_responses()
+  ):
+    return None
+
+  for func_response in function_response_event.get_function_responses():
+    if func_response.name == 'set_model_response':
+      # Convert dict to JSON string
+      return json.dumps(func_response.response)
+
+  return None
+
+
+# Export the processors
+request_processor = _OutputSchemaRequestProcessor()
diff --git a/src/google/adk/flows/llm_flows/base_llm_flow.py b/src/google/adk/flows/llm_flows/base_llm_flow.py
@@ -28,6 +28,7 @@
 from websockets.exceptions import ConnectionClosed
 from websockets.exceptions import ConnectionClosedOK
 
+from . import _output_schema_processor
 from . import functions
 from ...agents.base_agent import BaseAgent
 from ...agents.callback_context import CallbackContext
@@ -500,8 +501,21 @@ async def _postprocess_live(
       function_response_event = await functions.handle_function_calls_live(
           invocation_context, model_response_event, llm_request.tools_dict
       )
+      # Always yield the function response event first
       yield function_response_event
 
+      # Check if this is a set_model_response function response
+      if json_response := _output_schema_processor.get_structured_model_response(
+          function_response_event
+      ):
+        # Create and yield a final model response event
+        final_event = (
+            _output_schema_processor.create_final_model_response_event(
+                invocation_context, json_response
+            )
+        )
+        yield final_event
+
       transfer_to_agent = function_response_event.actions.transfer_to_agent
       if transfer_to_agent:
         agent_to_run = self._get_agent_to_run(
@@ -532,7 +546,20 @@ async def _postprocess_handle_function_calls_async(
       if auth_event:
         yield auth_event
 
+      # Always yield the function response event first
       yield function_response_event
+
+      # Check if this is a set_model_response function response
+      if json_response := _output_schema_processor.get_structured_model_response(
+          function_response_event
+      ):
+        # Create and yield a final model response event
+        final_event = (
+            _output_schema_processor.create_final_model_response_event(
+                invocation_context, json_response
+            )
+        )
+        yield final_event
       transfer_to_agent = function_response_event.actions.transfer_to_agent
       if transfer_to_agent:
         agent_to_run = self._get_agent_to_run(
diff --git a/src/google/adk/flows/llm_flows/basic.py b/src/google/adk/flows/llm_flows/basic.py
@@ -50,7 +50,11 @@ async def run_async(
         if agent.generate_content_config
         else types.GenerateContentConfig()
     )
-    if agent.output_schema:
+    # Only set output_schema if no tools are specified. as of now, model don't
+    # support output_schema and tools together. we have a workaround to support
+    # both outoput_schema and tools at the same time. see
+    # _output_schema_processor.py for details
+    if agent.output_schema and not agent.tools:
       llm_request.set_output_schema(agent.output_schema)
 
     llm_request.live_connect_config.response_modalities = (
diff --git a/src/google/adk/flows/llm_flows/single_flow.py b/src/google/adk/flows/llm_flows/single_flow.py
@@ -14,10 +14,13 @@
 
 """Implementation of single flow."""
 
+from __future__ import annotations
+
 import logging
 
 from . import _code_execution
 from . import _nl_planning
+from . import _output_schema_processor
 from . import basic
 from . import contents
 from . import identity
@@ -50,6 +53,9 @@ def __init__(self):
         # Code execution should be after the contents as it mutates the contents
         # to optimize data files.
         _code_execution.request_processor,
+        # Output schema processor add system instruction and set_model_response
+        # when both output_schema and tools are present.
+        _output_schema_processor.request_processor,
     ]
     self.response_processors += [
         _nl_planning.response_processor,
diff --git a/src/google/adk/tools/set_model_response_tool.py b/src/google/adk/tools/set_model_response_tool.py
@@ -0,0 +1,112 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tool for setting model response when using output_schema with other tools."""
+
+from __future__ import annotations
+
+from typing import Any
+from typing import Optional
+
+from google.genai import types
+from pydantic import BaseModel
+from typing_extensions import override
+
+from ._automatic_function_calling_util import build_function_declaration
+from .base_tool import BaseTool
+from .tool_context import ToolContext
+
+MODEL_JSON_RESPONSE_KEY = 'temp:__adk_model_response__'
+
+
+class SetModelResponseTool(BaseTool):
+  """Internal tool used for output schema workaround.
+
+  This tool allows the model to set its final response when output_schema
+  is configured alongside other tools. The model should use this tool to
+  provide its final structured response instead of outputting text directly.
+  """
+
+  def __init__(self, output_schema: type[BaseModel]):
+    """Initialize the tool with the expected output schema.
+
+    Args:
+      output_schema: The pydantic model class defining the expected output
+        structure.
+    """
+    self.output_schema = output_schema
+
+    # Create a function that matches the output schema
+    def set_model_response() -> str:
+      """Set your final response using the required output schema.
+
+      Use this tool to provide your final structured answer instead
+      of outputting text directly.
+      """
+      return 'Response set successfully.'
+
+    # Add the schema fields as parameters to the function dynamically
+    import inspect
+
+    schema_fields = output_schema.model_fields
+    params = []
+    for field_name, field_info in schema_fields.items():
+      param = inspect.Parameter(
+          field_name,
+          inspect.Parameter.KEYWORD_ONLY,
+          annotation=field_info.annotation,
+      )
+      params.append(param)
+
+    # Create new signature with schema parameters
+    new_sig = inspect.Signature(parameters=params)
+    setattr(set_model_response, '__signature__', new_sig)
+
+    self.func = set_model_response
+
+    super().__init__(
+        name=self.func.__name__,
+        description=self.func.__doc__.strip() if self.func.__doc__ else '',
+    )
+
+  @override
+  def _get_declaration(self) -> Optional[types.FunctionDeclaration]:
+    """Gets the OpenAPI specification of this tool."""
+    function_decl = types.FunctionDeclaration.model_validate(
+        build_function_declaration(
+            func=self.func,
+            ignore_params=[],
+            variant=self._api_variant,
+        )
+    )
+    return function_decl
+
+  @override
+  async def run_async(
+      self, *, args: dict[str, Any], tool_context: ToolContext  # pylint: disable=unused-argument
+  ) -> dict[str, Any]:
+    """Process the model's response and return the validated dict.
+
+    Args:
+      args: The structured response data matching the output schema.
+      tool_context: Tool execution context.
+
+    Returns:
+      The validated response as dict.
+    """
+    # Validate the input matches the expected schema
+    validated_response = self.output_schema.model_validate(args)
+
+    # Return the validated dict directly
+    return validated_response.model_dump()
diff --git a/tests/unittests/agents/test_llm_agent_fields.py b/tests/unittests/agents/test_llm_agent_fields.py
@@ -201,19 +201,18 @@ class Schema(BaseModel):
     )
 
 
-def test_output_schema_with_tools_will_throw():
+def test_output_schema_with_tools_will_not_throw():
   class Schema(BaseModel):
     pass
 
   def _a_tool():
     pass
 
-  with pytest.raises(ValueError):
-    _ = LlmAgent(
-        name='test_agent',
-        output_schema=Schema,
-        tools=[_a_tool],
-    )
+  LlmAgent(
+      name='test_agent',
+      output_schema=Schema,
+      tools=[_a_tool],
+  )
 
 
 def test_before_model_callback():
diff --git a/tests/unittests/flows/llm_flows/test_basic_processor.py b/tests/unittests/flows/llm_flows/test_basic_processor.py
diff --git a/tests/unittests/flows/llm_flows/test_output_schema_processor.py b/tests/unittests/flows/llm_flows/test_output_schema_processor.py
diff --git a/tests/unittests/tools/test_set_model_response_tool.py b/tests/unittests/tools/test_set_model_response_tool.py