deepsense-ai
diff --git a/‎examples/agents/a2a/agent_orchestrator.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/agents/a2a/agent_orchestrator.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/ragbits-agents/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions b/‎packages/ragbits-agents/CHANGELOG.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/ragbits-agents/src/ragbits/agents/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎packages/ragbits-agents/src/ragbits/agents/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎packages/ragbits-agents/src/ragbits/agents/_main.py‎
Lines changed: 158 additions & 115 deletions b/‎packages/ragbits-agents/src/ragbits/agents/_main.py‎
Lines changed: 158 additions & 115 deletions
@@ -140,7 +140,7 @@ async def execute_agent_task(self, task_index: int) -> str:
 
         tool_calls = None
         if result.tool_calls:
-            tool_calls = [{"name": tc.name, "arguments": tc.arguments, "output": tc.output} for tc in result.tool_calls]
+            tool_calls = [{"name": tc.name, "arguments": tc.arguments, "output": tc.result} for tc in result.tool_calls]
 
         return json.dumps(
             {
 
@@ -7,6 +7,7 @@
 - Update Agent run method docstring (#565)
 - Fix AgentResult typing (#600)
 - Support history handling in Agent (#648)
+- Support run_streaming in Agent (#650)
 - Support A2A protocol (#649)
 
 ## 1.0.0 (2025-06-04)
 
@@ -1,10 +1,11 @@
-from ragbits.agents._main import Agent, AgentOptions, AgentResult, ToolCallResult
+from ragbits.agents._main import Agent, AgentOptions, AgentResult, AgentResultStreaming, ToolCallResult
 from ragbits.agents.types import QuestionAnswerAgent, QuestionAnswerPromptInput, QuestionAnswerPromptOutput
 
 __all__ = [
     "Agent",
     "AgentOptions",
     "AgentResult",
+    "AgentResultStreaming",
     "QuestionAnswerAgent",
     "QuestionAnswerPromptInput",
     "QuestionAnswerPromptOutput",
 
@@ -1,22 +1,23 @@
-from collections.abc import Callable
+from collections.abc import AsyncGenerator, AsyncIterator, Callable
 from copy import deepcopy
 from dataclasses import dataclass
 from inspect import getdoc, iscoroutinefunction
-from types import ModuleType
+from types import ModuleType, SimpleNamespace
 from typing import Any, ClassVar, Generic, cast, overload
 
 from a2a.types import AgentCapabilities, AgentCard, AgentSkill
 
 from ragbits import agents
 from ragbits.agents.exceptions import (
     AgentInvalidPromptInputError,
+    AgentToolExecutionError,
     AgentToolNotAvailableError,
     AgentToolNotSupportedError,
 )
 from ragbits.core.audit.traces import trace
-from ragbits.core.llms.base import LLM, LLMClientOptionsT, LLMResponseWithMetadata
+from ragbits.core.llms.base import LLM, LLMClientOptionsT, LLMResponseWithMetadata, ToolCall
 from ragbits.core.options import Options
-from ragbits.core.prompt.base import ChatFormat, SimplePrompt
+from ragbits.core.prompt.base import BasePrompt, ChatFormat, SimplePrompt
 from ragbits.core.prompt.prompt import Prompt, PromptInputT, PromptOutputT
 from ragbits.core.types import NOT_GIVEN, NotGiven
 from ragbits.core.utils.config_handling import ConfigurableComponent
@@ -28,9 +29,10 @@ class ToolCallResult:
     Result of the tool call.
     """
 
+    id: str
     name: str
     arguments: dict
-    output: Any
+    result: Any
 
 
 @dataclass
@@ -58,11 +60,58 @@ class AgentOptions(Options, Generic[LLMClientOptionsT]):
     """The options for the LLM."""
 
 
+class AgentResultStreaming(AsyncIterator[str | ToolCall | ToolCallResult]):
+    """
+    An async iterator that will collect all yielded items by LLM.generate_streaming(). This object is returned
+    by `run_streaming`. It can be used in an `async for` loop to process items as they arrive. After the loop completes,
+    all items are available under the same names as in AgentResult class.
+    """
+
+    def __init__(self, generator: AsyncGenerator[str | ToolCall | ToolCallResult | SimpleNamespace | BasePrompt]):
+        self._generator = generator
+        self.content: str = ""
+        self.tool_calls: list[ToolCallResult] | None = None
+        self.metadata: dict = {}
+        self.history: ChatFormat
+
+    def __aiter__(self) -> AsyncIterator[str | ToolCall | ToolCallResult]:
+        return self
+
+    async def __anext__(self) -> str | ToolCall | ToolCallResult:
+        try:
+            item = await self._generator.__anext__()
+            match item:
+                case str():
+                    self.content += item
+                case ToolCall():
+                    pass
+                case ToolCallResult():
+                    if self.tool_calls is None:
+                        self.tool_calls = []
+                    self.tool_calls.append(item)
+                case BasePrompt():
+                    item.add_assistant_message(self.content)
+                    self.history = item.chat
+                    item = await self._generator.__anext__()
+                    item = cast(SimpleNamespace, item)
+                    item.result = {
+                        "content": self.content,
+                        "metadata": self.metadata,
+                        "tool_calls": self.tool_calls,
+                    }
+                    raise StopAsyncIteration
+                case _:
+                    raise ValueError(f"Unexpected item: {item}")
+            return item
+        except StopAsyncIteration:
+            raise
+
+
 class Agent(
     ConfigurableComponent[AgentOptions[LLMClientOptionsT]], Generic[LLMClientOptionsT, PromptInputT, PromptOutputT]
 ):
     """
-    Agent class that orchestrates the LLM and the prompt.
+    Agent class that orchestrates the LLM and the prompt, and can call tools.
 
     Current implementation is highly experimental, and the API is subject to change.
     """
@@ -107,7 +156,7 @@ def __init__(
     @overload
     async def run(
         self: "Agent[LLMClientOptionsT, None, PromptOutputT]",
-        input: str,
+        input: str | None = None,
         options: AgentOptions[LLMClientOptionsT] | None = None,
     ) -> AgentResult[PromptOutputT]: ...
 
@@ -118,26 +167,18 @@ async def run(
         options: AgentOptions[LLMClientOptionsT] | None = None,
     ) -> AgentResult[PromptOutputT]: ...
 
-    @overload
     async def run(
-        self: "Agent[LLMClientOptionsT, None, PromptOutputT]",
-        options: AgentOptions[LLMClientOptionsT] | None = None,
-    ) -> AgentResult[PromptOutputT]: ...
-
-    async def run(self, *args: Any, **kwargs: Any) -> AgentResult[PromptOutputT]:
+        self, input: str | PromptInputT | None = None, options: AgentOptions[LLMClientOptionsT] | None = None
+    ) -> AgentResult[PromptOutputT]:
         """
         Run the agent. The method is experimental, inputs and outputs may change in the future.
 
         Args:
-            *args: Positional arguments corresponding to the overload signatures.
-                - If provided, the first positional argument is interpreted as `input`.
-                - If a second positional argument is provided, it is interpreted as `options`.
-            **kwargs: Keyword arguments corresponding to the overload signatures.
-                - `input`: The input for the agent run. Can be:
-                  - str: A string input that will be used as user message.
-                  - PromptInputT: Structured input for use with structured prompt classes.
-                  - None: No input. Only valid when a string prompt was provided during initialization.
-                - `options`: The options for the agent run.
+            input: The input for the agent run. Can be:
+                - str: A string input that will be used as user message.
+                - PromptInputT: Structured input for use with structured prompt classes.
+                - None: No input. Only valid when a string prompt was provided during initialization.
+            options: The options for the agent run.
 
         Returns:
             The result of the agent run.
@@ -147,8 +188,7 @@ async def run(self, *args: Any, **kwargs: Any) -> AgentResult[PromptOutputT]:
             AgentToolNotAvailableError: If the selected tool is not available.
             AgentInvalidPromptInputError: If the prompt/input combination is invalid.
         """
-        input = cast(PromptInputT, args[0] if args else kwargs.get("input"))
-        options = args[1] if len(args) > 1 else kwargs.get("options")
+        input = cast(PromptInputT, input)
 
         merged_options = (self.default_options | options) if options else self.default_options
         llm_options = merged_options.llm_options or None
@@ -170,29 +210,10 @@ async def run(self, *args: Any, **kwargs: Any) -> AgentResult[PromptOutputT]:
                     break
 
                 for tool_call in response.tool_calls:
-                    if tool_call.type != "function":
-                        raise AgentToolNotSupportedError(tool_call.type)
-
-                    if tool_call.name not in self.tools_mapping:
-                        raise AgentToolNotAvailableError(tool_call.name)
-
-                    tool = self.tools_mapping[tool_call.name]
-                    tool_output = (
-                        await tool(**tool_call.arguments) if iscoroutinefunction(tool) else tool(**tool_call.arguments)
-                    )
-                    tool_calls.append(
-                        ToolCallResult(
-                            name=tool_call.name,
-                            arguments=tool_call.arguments,
-                            output=tool_output,
-                        )
-                    )
-                    prompt_with_history = prompt_with_history.add_tool_use_message(
-                        id=tool_call.id,
-                        name=tool_call.name,
-                        arguments=tool_call.arguments,
-                        result=tool_output,
-                    )
+                    result = await self._execute_tool(tool_call)
+                    tool_calls.append(result)
+
+                    prompt_with_history = prompt_with_history.add_tool_use_message(**result.__dict__)
 
             outputs.result = {
                 "content": response.content,
@@ -212,10 +233,76 @@ async def run(self, *args: Any, **kwargs: Any) -> AgentResult[PromptOutputT]:
                 history=prompt_with_history.chat,
             )
 
+    @overload
+    def run_streaming(
+        self: "Agent[LLMClientOptionsT, None, PromptOutputT]",
+        input: str | None = None,
+        options: AgentOptions[LLMClientOptionsT] | None = None,
+    ) -> AgentResultStreaming: ...
+
+    @overload
+    def run_streaming(
+        self: "Agent[LLMClientOptionsT, PromptInputT, PromptOutputT]",
+        input: PromptInputT,
+        options: AgentOptions[LLMClientOptionsT] | None = None,
+    ) -> AgentResultStreaming: ...
+
+    def run_streaming(
+        self, input: str | PromptInputT | None = None, options: AgentOptions[LLMClientOptionsT] | None = None
+    ) -> AgentResultStreaming:
+        """
+        This method returns an `AgentResultStreaming` object that can be asynchronously
+        iterated over. After the loop completes, all items are available under the same names as in AgentResult class.
+
+        Args:
+            input: The input for the agent run.
+            options: The options for the agent run.
+
+        Returns:
+            A `StreamingResult` object for iteration and collection.
+
+        Raises:
+            AgentToolNotSupportedError: If the selected tool type is not supported.
+            AgentToolNotAvailableError: If the selected tool is not available.
+            AgentInvalidPromptInputError: If the prompt/input combination is invalid.
+        """
+        generator = self._stream_internal(input, options)
+        return AgentResultStreaming(generator)
+
+    async def _stream_internal(
+        self, input: str | PromptInputT | None = None, options: AgentOptions[LLMClientOptionsT] | None = None
+    ) -> AsyncGenerator[str | ToolCall | ToolCallResult | SimpleNamespace | BasePrompt]:
+        input = cast(PromptInputT, input)
+        merged_options = (self.default_options | options) if options else self.default_options
+        llm_options = merged_options.llm_options or None
+
+        prompt_with_history = self._get_prompt_with_history(input)
+        with trace(input=input, options=merged_options) as outputs:
+            while True:
+                returned_tool_call = False
+                async for chunk in self.llm.generate_streaming(
+                    prompt=prompt_with_history,
+                    tools=list(self.tools_mapping.values()),
+                    options=llm_options,
+                ):
+                    yield chunk
+
+                    if isinstance(chunk, ToolCall):
+                        result = await self._execute_tool(chunk)
+                        yield result
+                        prompt_with_history = prompt_with_history.add_tool_use_message(**result.__dict__)
+                        returned_tool_call = True
+
+                if not returned_tool_call:
+                    break
+            yield prompt_with_history
+            if self.keep_history:
+                self.history = prompt_with_history.chat
+            yield outputs
+
     def _get_prompt_with_history(self, input: PromptInputT) -> SimplePrompt | Prompt[PromptInputT, PromptOutputT]:
         curr_history = deepcopy(self.history)
         if isinstance(self.prompt, type) and issubclass(self.prompt, Prompt):
-            # If we had actual instance we could just run add_user_message here
             if self.keep_history:
                 self.prompt = self.prompt(input, curr_history)
                 return self.prompt
@@ -248,6 +335,29 @@ def _get_prompt_with_history(self, input: PromptInputT) -> SimplePrompt | Prompt
 
         return SimplePrompt(curr_history)
 
+    async def _execute_tool(self, tool_call: ToolCall) -> ToolCallResult:
+        if tool_call.type != "function":
+            raise AgentToolNotSupportedError(tool_call.type)
+
+        if tool_call.name not in self.tools_mapping:
+            raise AgentToolNotAvailableError(tool_call.name)
+
+        tool = self.tools_mapping[tool_call.name]
+
+        try:
+            tool_output = (
+                await tool(**tool_call.arguments) if iscoroutinefunction(tool) else tool(**tool_call.arguments)
+            )
+        except Exception as e:
+            raise AgentToolExecutionError(tool_call.name, e) from e
+
+        return ToolCallResult(
+            id=tool_call.id,
+            name=tool_call.name,
+            arguments=tool_call.arguments,
+            result=tool_output,
+        )
+
     def get_agent_card(
         self,
         name: str,
@@ -307,70 +417,3 @@ def _extract_agent_skill(func: Callable) -> AgentSkill:
         """
         doc = getdoc(func) or ""
         return AgentSkill(name=func.__name__.replace("_", " ").title(), id=func.__name__, description=doc, tags=[])
-
-    # TODO: implement run_streaming method according to the comment - https://github.com/deepsense-ai/ragbits/pull/623#issuecomment-2970514478
-    # @overload
-    # def run_streaming(
-    #     self: "Agent[LLMClientOptionsT, PromptInputT, str]",
-    #     input: PromptInputT,
-    #     options: AgentOptions[LLMClientOptionsT] | None = None,
-    # ) -> AsyncGenerator[str | ToolCall, None]: ...
-
-    # @overload
-    # def run_streaming(
-    #     self: "Agent[LLMClientOptionsT, None, str]",
-    #     options: AgentOptions[LLMClientOptionsT] | None = None,
-    # ) -> AsyncGenerator[str | ToolCall, None]: ...
-
-    # async def run_streaming(self, *args: Any, **kwargs: Any) -> AsyncGenerator[str | ToolCall, None]:  # noqa: D417
-    #     """
-    #     Run the agent. The method is experimental, inputs and outputs may change in the future.
-
-    #     Args:
-    #         input: The input for the agent run.
-    #         options: The options for the agent run.
-
-    #     Yields:
-    #         Response text chunks or tool calls from the Agent.
-    #     """
-    #     input = cast(PromptInputT, args[0] if args else kwargs.get("input"))
-    #     options = args[1] if len(args) > 1 else kwargs.get("options")
-
-    #     merged_options = (self.default_options | options) if options else self.default_options
-    #     tools = merged_options.tools or None
-    #     llm_options = merged_options.llm_options or None
-
-    #     prompt = self.prompt(input)
-    #     tools_mapping = {} if not tools else {f.__name__: f for f in tools}
-
-    #     while True:
-    #         returned_tool_call = False
-    #         async for chunk in self.llm.generate_streaming(
-    #             prompt=prompt,
-    #             tools=tools,  # type: ignore
-    #             options=llm_options,
-    #         ):
-    #             yield chunk
-
-    #             if isinstance(chunk, ToolCall):
-    #                 if chunk.type != "function":
-    #                     raise AgentToolNotSupportedError(chunk.type)
-
-    #                 if chunk.name not in tools_mapping:
-    #                     raise AgentToolNotAvailableError(chunk.name)
-
-    #                 tool = tools_mapping[chunk.name]
-    #                 tool_output = (
-    #                     await tool(**chunk.arguments) if iscoroutinefunction(tool) else tool(**chunk.arguments)
-    #                 )
-
-    #                 prompt = prompt.add_tool_use_message(
-    #                     tool_call_id=chunk.id,
-    #                     tool_name=chunk.name,
-    #                     tool_arguments=chunk.arguments,
-    #                     tool_call_result=tool_output,
-    #                 )
-    #                 returned_tool_call = True
-
-    #         if not returned_tool_call:
-    #             break
Original file line number	Diff line number	Diff line change
`@@ -140,7 +140,7 @@ async def execute_agent_task(self, task_index: int) -> str:`
`140`	`140`
`141`	`141`	`tool_calls = None`
`142`	`142`	`if result.tool_calls:`
`143`		`- tool_calls = [{"name": tc.name, "arguments": tc.arguments, "output": tc.output} for tc in result.tool_calls]`
	`143`	`+ tool_calls = [{"name": tc.name, "arguments": tc.arguments, "output": tc.result} for tc in result.tool_calls]`
`144`	`144`
`145`	`145`	`return json.dumps(`
`146`	`146`	`{`