feat(chat-agent): add citation and trajectory extensions (#1000)

aleskalfas · web-flow · commit d0248d835043 · 2025-08-04T13:18:58.000+02:00
Signed-off-by: Aleš Kalfas &lt;kalfas.ales@gmail.com&gt;
diff --git a/agents/official/beeai-framework/chat/pyproject.toml b/agents/official/beeai-framework/chat/pyproject.toml
@@ -7,7 +7,7 @@ authors = [
 ]
 requires-python = ">=3.13,<4"
 dependencies = [
-    "beeai-framework[duckduckgo,wikipedia]~=0.1.31",
+    "beeai-framework[duckduckgo,wikipedia]~=0.1.34",
     "beeai-sdk",
     "openinference-instrumentation-beeai>=0.1.6",
     "pydantic-settings>=2.9.0",
diff --git a/agents/official/beeai-framework/chat/src/chat/agent.py b/agents/official/beeai-framework/chat/src/chat/agent.py
@@ -2,11 +2,13 @@
 # SPDX-License-Identifier: Apache-2.0
 import logging
 import os
+from typing import Annotated
 import uuid
 from collections import defaultdict
 from textwrap import dedent
 
 from a2a.types import (
+    AgentCapabilities,
     AgentSkill,
     Artifact,
     FilePart,
@@ -21,6 +23,7 @@
 from beeai_framework.agents.experimental.events import (
     RequirementAgentSuccessEvent,
 )
+from beeai_framework.agents.experimental.utils._tool import FinalAnswerTool
 from beeai_framework.backend.types import ChatModelParameters
 from beeai_framework.memory import UnconstrainedMemory
 from beeai_framework.middleware.trajectory import GlobalTrajectoryMiddleware
@@ -29,10 +32,19 @@
 from beeai_framework.tools.search.wikipedia import WikipediaTool
 from beeai_framework.tools.weather.openmeteo import OpenMeteoTool
 
-from beeai_sdk.a2a.extensions import AgentDetail, AgentDetailTool
+from beeai_sdk.a2a.extensions import (
+    AgentDetail,
+    AgentDetailTool,
+    CitationExtensionServer,
+    CitationExtensionSpec,
+    TrajectoryExtensionServer,
+    TrajectoryExtensionSpec,
+)
 from beeai_sdk.a2a.types import AgentMessage
 from beeai_sdk.server import Server
 from beeai_sdk.server.context import Context
+from chat.helpers.citations import extract_citations
+from chat.helpers.trajectory import TrajectoryContent
 from openinference.instrumentation.beeai import BeeAIInstrumentor
 
 from chat.tools.files.file_creator import FileCreatorTool, FileCreatorToolOutput
@@ -51,9 +63,12 @@
 
 BeeAIInstrumentor().instrument()
 ## TODO: https://github.com/phoenixframework/phoenix/issues/6224
-logging.getLogger("opentelemetry.exporter.otlp.proto.http._log_exporter").setLevel(logging.CRITICAL)
-logging.getLogger("opentelemetry.exporter.otlp.proto.http.metric_exporter").setLevel(logging.CRITICAL)
-
+logging.getLogger("opentelemetry.exporter.otlp.proto.http._log_exporter").setLevel(
+    logging.CRITICAL
+)
+logging.getLogger("opentelemetry.exporter.otlp.proto.http.metric_exporter").setLevel(
+    logging.CRITICAL
+)
 
 logger = logging.getLogger(__name__)
 
@@ -76,9 +91,17 @@
         ui_type="chat",
         user_greeting="How can I help you?",
         tools=[
-            AgentDetailTool(name="Web Search (DuckDuckGo)", description="Retrieves real-time search results."),
-            AgentDetailTool(name="Wikipedia Search", description="Fetches summaries from Wikipedia."),
-            AgentDetailTool(name="Weather Information (OpenMeteo)", description="Provides real-time weather updates."),
+            AgentDetailTool(
+                name="Web Search (DuckDuckGo)",
+                description="Retrieves real-time search results.",
+            ),
+            AgentDetailTool(
+                name="Wikipedia Search", description="Fetches summaries from Wikipedia."
+            ),
+            AgentDetailTool(
+                name="Weather Information (OpenMeteo)",
+                description="Provides real-time weather updates.",
+            ),
         ],
         framework="BeeAI",
     ),
@@ -114,23 +137,44 @@
                 """
             ),
             tags=["chat"],
-            examples=["Please find a room in LA, CA, April 15, 2025, checkout date is april 18, 2 adults"],
+            examples=[
+                "Please find a room in LA, CA, April 15, 2025, checkout date is april 18, 2 adults"
+            ],
         )
     ],
 )
-async def chat(message: Message, context: Context):
+async def chat(
+    message: Message,
+    context: Context,
+    trajectory: Annotated[TrajectoryExtensionServer, TrajectoryExtensionSpec()],
+    citation: Annotated[CitationExtensionServer, CitationExtensionSpec()],
+):
     """
     The agent is an AI-powered conversational system with memory, supporting real-time search, Wikipedia lookups,
     and weather updates through integrated tools.
     """
-    extracted_files = await extract_files(history=messages[context.context_id], incoming_message=message)
+    extracted_files = await extract_files(
+        history=messages[context.context_id], incoming_message=message
+    )
     input = to_framework_message(message)
 
     # Configure tools
     file_reader_tool_class = create_file_reader_tool_class(
         extracted_files
     )  # Dynamically created tool input schema based on real provided files ensures that small LLMs can't hallucinate the input
 
+    FinalAnswerTool.description = """Assemble and send the final answer to the user. When using information gathered from other tools that provided URL addresses, you MUST properly cite them using markdown citation format: [description](URL).
+
+Citation Requirements:
+- Use descriptive text that summarizes the source content
+- Include the exact URL provided by the tool
+- Place citations inline where the information is referenced
+
+Examples:
+- According to [OpenAI's latest announcement](https://example.com/gpt5), GPT-5 will be released next year.
+- Recent studies show [AI adoption has increased by 67%](https://example.com/ai-study) in enterprise environments.
+- Weather data indicates [temperatures will reach 25°C tomorrow](https://weather.example.com/forecast).""" # type: ignore
+
     tools = [
         # Auxiliary tools
         ActTool(),  # Enforces correct thinking sequence by requiring tool selection before execution
@@ -180,6 +224,16 @@ async def chat(message: Message, context: Context):
 
         last_step = event.state.steps[-1] if event.state.steps else None
         if last_step and last_step.tool is not None:
+            trajectory_content = TrajectoryContent(
+                input=last_step.input,
+                output=last_step.output,
+                error=last_step.error,
+            )
+            yield trajectory.trajectory_metadata(
+                title=last_step.tool.name,
+                content=trajectory_content.model_dump_json(),
+            )
+
             if isinstance(last_step.output, FileCreatorToolOutput):
                 result = last_step.output.result
                 for file_info in result.files:
@@ -205,7 +259,15 @@ async def chat(message: Message, context: Context):
 
     if final_answer:
         framework_messages[context.context_id].append(final_answer)
-        message = AgentMessage(text=final_answer.text)
+
+        citations, clean_text = extract_citations(final_answer.text)
+
+        message = AgentMessage(
+            text=clean_text,
+            metadata=(
+                citation.citation_metadata(citations=citations) if citations else None
+            ),
+        )
         messages[context.context_id].append(message)
         yield message
 
diff --git a/agents/official/beeai-framework/chat/src/chat/helpers/citations.py b/agents/official/beeai-framework/chat/src/chat/helpers/citations.py
@@ -0,0 +1,56 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+import re
+from beeai_sdk.a2a.extensions import Citation
+
+
+def extract_citations(text: str) -> tuple[list[Citation], str]:
+    """
+    Extract citations from markdown-style links and return cleaned text.
+
+    This function parses text containing markdown-style citations in the format
+    [citation_text](url) and extracts them into Citation objects while cleaning
+    the original text to contain only the citation content.
+
+    Args:
+        text (str): Input text containing markdown-style citations
+
+    Returns:
+        tuple[list[Citation], str]: A tuple containing:
+            - List of Citation objects with metadata
+            - Cleaned text with citation links replaced by content only
+
+    Example:
+        >>> text = "According to [recent studies](https://example.com/study) and [research papers](https://academic.org/paper), AI is advancing rapidly."
+        >>> citations, clean_text = extract_citations(text)
+        >>> print(clean_text)
+        "According to recent studies and research papers, AI is advancing rapidly."
+        >>> print(len(citations))
+        2
+        >>> print(citations[0].url)
+        "https://example.com/study"
+        >>> print(citations[0].title)
+        "Study"
+        >>> print(citations[0].description)
+        "recent studies"
+    """
+    citations, offset = [], 0
+    pattern = r"\[([^\]]+)\]\(([^)]+)\)"
+
+    for match in re.finditer(pattern, text):
+        content, url = match.groups()
+        start = match.start() - offset
+
+        citations.append(
+            Citation(
+                url=url,
+                title=url.split("/")[-1].replace("-", " ").title() or content[:50],
+                description=content[:100] + ("..." if len(content) > 100 else ""),
+                start_index=start,
+                end_index=start + len(content),
+            )
+        )
+        offset += len(match.group(0)) - len(content)
+
+    return citations, re.sub(pattern, r"\1", text)
diff --git a/agents/official/beeai-framework/chat/src/chat/helpers/trajectory.py b/agents/official/beeai-framework/chat/src/chat/helpers/trajectory.py
@@ -0,0 +1,28 @@
+# Copyright 2025 © BeeAI a Series of LF Projects, LLC
+# SPDX-License-Identifier: Apache-2.0
+
+from typing import Any
+from beeai_framework.errors import FrameworkError
+from beeai_framework.tools import ToolOutput
+from pydantic import BaseModel, InstanceOf, field_serializer
+
+class TrajectoryContent(BaseModel):
+    input: Any
+    output: InstanceOf[ToolOutput] | None = None
+    error: InstanceOf[FrameworkError] | None = None
+    
+    @field_serializer('output')
+    def serialize_output(self, output: ToolOutput | None) -> Any:
+        if output is None:
+            return None
+        # Check if it's a JSONToolOutput with to_json_safe method
+        if hasattr(output, 'to_json_safe'):
+            return output.to_json_safe()
+        # Fallback to text content for other ToolOutput types
+        return {"text_content": output.get_text_content()}
+    
+    @field_serializer('error')
+    def serialize_error(self, error: FrameworkError | None) -> dict[str, Any] | None:
+        if error is None:
+            return None
+        return {"message": str(error), "type": error.__class__.__name__}
diff --git a/agents/official/beeai-framework/chat/src/chat/tools/general/act.py b/agents/official/beeai-framework/chat/src/chat/tools/general/act.py
@@ -162,18 +162,15 @@ async def run(self, state: RequirementAgentRunState, ctx: RunContext) -> list[Ru
                     "Last step output must be an instance of ActToolOutput."
                 )
             selected_tool = last_step.output.result.selected_tool
-            if selected_tool == "final_answer":
-                return []
-            else:
-                return [
-                    Rule(
-                        target=selected_tool,
-                        forced=True,
-                        allowed=True,
-                        prevent_stop=False,
-                        hidden=False,
-                    )
-                ]
+            return [
+                Rule(
+                    target=selected_tool,
+                    forced=True,
+                    allowed=True,
+                    prevent_stop=False,
+                    hidden=False,
+                )
+            ]
 
         return [
             Rule(
@@ -197,11 +194,7 @@ def act_tool_middleware(ctx: RunContext) -> None:
         raise ValueError("ActTool is not found in the agent's tools.")
 
     def handle_start(data: RequirementAgentStartEvent, event: EventMeta) -> None:
-        allowed_tools = (
-            [t.name for t in data.request.tools if t.name != "act"]
-            if data.state.iteration == 1
-            else [t.name for t in data.request.allowed_tools if t.name != "act"]
-        )
+        allowed_tools = [t.name for t in data.request.tools if t.name != "act"]
         act_tool.allowed_tools_names = allowed_tools
 
     ctx.emitter.on("start", handle_start)
diff --git a/agents/official/beeai-framework/chat/uv.lock b/agents/official/beeai-framework/chat/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@ authors = [`
`7`	`7`	`]`
`8`	`8`	`requires-python = ">=3.13,<4"`
`9`	`9`	`dependencies = [`
`10`		`- "beeai-framework[duckduckgo,wikipedia]~=0.1.31",`
	`10`	`+ "beeai-framework[duckduckgo,wikipedia]~=0.1.34",`
`11`	`11`	`"beeai-sdk",`
`12`	`12`	`"openinference-instrumentation-beeai>=0.1.6",`
`13`	`13`	`"pydantic-settings>=2.9.0",`