Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,6 @@ db.sqlite3

# Docker compose override
compose.override.yml

# LLM configuration
src/backend/conversations/configuration/llm/default_dev.json
50 changes: 41 additions & 9 deletions src/backend/chat/clients/pydantic_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
from chat.ai_sdk_types import (
LanguageModelV1Source,
SourceUIPart,
TextUIPart,
UIMessage,
)
from chat.clients.async_to_sync import convert_async_generator_to_sync
Expand All @@ -75,6 +76,7 @@
from chat.tools.document_generic_search_rag import add_document_rag_search_tool_from_setting
from chat.tools.document_search_rag import add_document_rag_search_tool
from chat.tools.document_summarize import document_summarize
from chat.tools.fetch_url import URL_PATTERN, detect_url_in_conversation, fetch_url
from chat.vercel_ai_sdk.core import events_v4, events_v5
from chat.vercel_ai_sdk.encoder import EventEncoder

Expand Down Expand Up @@ -390,6 +392,27 @@ async def _run_agent( # noqa: PLR0912, PLR0915 # pylint: disable=too-many-branc
input=user_prompt if self._store_analytics else "REDACTED"
)

# Check if URL is present in current message or conversation, and add fetch_url tool dynamically
# Check current message first (most recent)
has_url_in_current_message = any(
URL_PATTERN.search(part.text) if isinstance(part, TextUIPart) else False
for part in messages[-1].parts or []
) or (URL_PATTERN.search(messages[-1].content) if messages[-1].content else False)

# Also check conversation history
has_url_in_conversation = detect_url_in_conversation(self.conversation)

# Check if fetch_url tool already exists (might be in configuration)
fetch_url_exists = "fetch_url" in self.conversation_agent._function_toolset.tools # pylint: disable=protected-access

if (has_url_in_current_message or has_url_in_conversation) and not fetch_url_exists:
# Add fetch_url tool dynamically if URL is detected and tool doesn't exist yet
@self.conversation_agent.tool(name="fetch_url", retries=2)
@functools.wraps(fetch_url)
async def fetch_url_tool(ctx: RunContext, url: str) -> ToolReturn:
"""Wrap the fetch_url tool to provide context and add the tool."""
return await fetch_url(ctx, url)

usage = {"promptTokens": 0, "completionTokens": 0}

conversation_has_documents = self._is_document_upload_enabled and (
Expand Down Expand Up @@ -484,11 +507,13 @@ def force_web_search_prompt() -> str:
.aexists()
)

should_enable_rag = has_not_pdf_docs or has_url_in_current_message or has_url_in_conversation

document_urls = []
if not conversation_has_documents and not has_not_pdf_docs:
if not conversation_has_documents and not should_enable_rag:
# No documents to process
pass
elif has_not_pdf_docs:
elif should_enable_rag:
add_document_rag_search_tool(self.conversation_agent)

@self.conversation_agent.instructions
Expand All @@ -505,13 +530,15 @@ def summarization_system_prompt() -> str:
)

# Inform the model (system-level) that documents are attached and available
@self.conversation_agent.system_prompt
def attached_documents_note() -> str:
return (
"[Internal context] User documents are attached to this conversation. "
"Do not request re-upload of documents; consider them already available "
"via the internal store."
)
# Only if we actually have documents (not just URL), to avoid hallucination
if has_not_pdf_docs:
@self.conversation_agent.system_prompt
def attached_documents_note() -> str:
return (
"[Internal context] User documents are attached to this conversation. "
"Do not request re-upload of documents; consider them already available "
"via the internal store."
)

@self.conversation_agent.tool(name="summarize", retries=2)
@functools.wraps(document_summarize)
Expand Down Expand Up @@ -549,6 +576,7 @@ async def summarize(ctx: RunContext, *args, **kwargs) -> ToolReturn:

_final_output_from_tool = None
_ui_sources = []
_added_source_urls = set()

# Help Mistral to prevent `Unexpected role 'user' after role 'tool'` error.
if history and history[-1].kind == "request":
Expand Down Expand Up @@ -661,6 +689,10 @@ async def summarize(ctx: RunContext, *args, **kwargs) -> ToolReturn:
sources := event.result.metadata.get("sources")
):
for source_url in sources:
# Skip if we've already added this URL to avoid duplicates
if source_url in _added_source_urls:
continue
_added_source_urls.add(source_url)
url_source = LanguageModelV1Source(
sourceType="url",
id=str(uuid.uuid4()),
Expand Down
Loading
Loading