✨(websearch) add Brave llm/context snippets

camilleAND · camilleAND · commit e335f1ce9725 · 2026-03-20T11:59:56.000+01:00
Use llm/context endpoint with snippets, change tool name for web_search

Signed-off-by: camilleAND &lt;camille.andre@modernisation.gouv.fr&gt;
diff --git a/src/backend/chat/agents/conversation.py b/src/backend/chat/agents/conversation.py
@@ -128,7 +128,8 @@ def get_web_search_tool_name(self) -> str | None:
         """
         for toolset in self.toolsets:
             for tool in toolset.tools.values():
-                if tool.name.startswith("web_search_"):
+                # Support both legacy names (web_search_*) and the new generic "web_search"
+                if tool.name.startswith("web_search"):
                     return tool.name
         return None
 
diff --git a/src/backend/chat/tests/clients/pydantic_ai/test_smart_web_search.py b/src/backend/chat/tests/clients/pydantic_ai/test_smart_web_search.py
@@ -23,7 +23,7 @@ def _llm_config_with_websearch(settings):
             is_active=True,
             icon=None,
             system_prompt="You are an amazing assistant.",
-            tools=["web_search_brave_with_document_backend"],
+            tools=["web_search"],
             provider=LLMProvider(
                 hrid="unused",
                 base_url="https://example.com",
@@ -68,7 +68,7 @@ def test_smart_search_enabled_tool_is_called(_llm_config_with_websearch):
     with service.conversation_agent.override(model=TestModel(), deps=service._context_deps):
         response = service.conversation_agent.run_sync("Search the web for something.")
 
-    assert "web_search_brave_with_document_backend" in response.output
+    assert "web_search" in response.output
 
 
 def test_force_websearch_overrides_smart_search_disabled(_llm_config_with_websearch):
@@ -92,4 +92,4 @@ def test_force_websearch_overrides_smart_search_disabled(_llm_config_with_websea
     )
     with service.conversation_agent.override(model=TestModel(), deps=service._context_deps):
         response = service.conversation_agent.run_sync("Search the web for something.")
-        assert "web_search_brave_with_document_backend" in response.output
+        assert "web_search" in response.output
diff --git a/src/backend/chat/tests/tools/test_web_search_brave.py b/src/backend/chat/tests/tools/test_web_search_brave.py
@@ -24,10 +24,12 @@
     _query_brave_api_async,
     format_tool_return,
     web_search_brave,
+    web_search_brave_configurable,
     web_search_brave_with_document_backend,
 )
 
-BRAVE_URL = "https://api.search.brave.com/res/v1/web/search"
+# Must match the URL used in _query_brave_api_async
+BRAVE_URL = "https://api.search.brave.com/res/v1/llm/context"
 
 
 @pytest.fixture(autouse=True)
@@ -42,6 +44,8 @@ def brave_settings(settings):
     settings.BRAVE_SEARCH_SPELLCHECK = True
     settings.BRAVE_SEARCH_EXTRA_SNIPPETS = True
     settings.BRAVE_SUMMARIZATION_ENABLED = False
+    settings.BRAVE_USE_LLM_CONTEXT = True
+    settings.BRAVE_USE_RAG = True
     settings.BRAVE_CACHE_TTL = 3600
     settings.BRAVE_RAG_WEB_SEARCH_CHUNK_NUMBER = 5
 
@@ -1028,6 +1032,75 @@ async def test_web_search_brave_with_document_backend_rag_search_params(mocked_c
     )
 
 
+@pytest.mark.asyncio
+async def test_web_search_brave_configurable_uses_non_rag_when_llm_context_enabled(
+    settings, mocked_context
+):
+    """LLM context must always bypass RAG regardless of BRAVE_USE_RAG."""
+    settings.BRAVE_USE_LLM_CONTEXT = True
+    settings.BRAVE_USE_RAG = True
+
+    with patch(
+        "chat.tools.web_search_brave.web_search_brave", new_callable=AsyncMock
+    ) as mock_non_rag:
+        with patch(
+            "chat.tools.web_search_brave.web_search_brave_with_document_backend",
+            new_callable=AsyncMock,
+        ) as mock_rag:
+            mock_non_rag.return_value = "non-rag"
+            result = await web_search_brave_configurable(mocked_context, "query")
+
+    assert result == "non-rag"
+    mock_non_rag.assert_called_once_with(mocked_context, "query")
+    mock_rag.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_web_search_brave_configurable_uses_rag_when_classic_and_rag_enabled(
+    settings, mocked_context
+):
+    """Classic search + BRAVE_USE_RAG=True should use RAG implementation."""
+    settings.BRAVE_USE_LLM_CONTEXT = False
+    settings.BRAVE_USE_RAG = True
+
+    with patch(
+        "chat.tools.web_search_brave.web_search_brave", new_callable=AsyncMock
+    ) as mock_non_rag:
+        with patch(
+            "chat.tools.web_search_brave.web_search_brave_with_document_backend",
+            new_callable=AsyncMock,
+        ) as mock_rag:
+            mock_rag.return_value = "rag"
+            result = await web_search_brave_configurable(mocked_context, "query")
+
+    assert result == "rag"
+    mock_rag.assert_called_once_with(mocked_context, "query")
+    mock_non_rag.assert_not_called()
+
+
+@pytest.mark.asyncio
+async def test_web_search_brave_configurable_uses_non_rag_when_classic_and_rag_disabled(
+    settings, mocked_context
+):
+    """Classic search + BRAVE_USE_RAG=False should use non-RAG implementation."""
+    settings.BRAVE_USE_LLM_CONTEXT = False
+    settings.BRAVE_USE_RAG = False
+
+    with patch(
+        "chat.tools.web_search_brave.web_search_brave", new_callable=AsyncMock
+    ) as mock_non_rag:
+        with patch(
+            "chat.tools.web_search_brave.web_search_brave_with_document_backend",
+            new_callable=AsyncMock,
+        ) as mock_rag:
+            mock_non_rag.return_value = "non-rag"
+            result = await web_search_brave_configurable(mocked_context, "query")
+
+    assert result == "non-rag"
+    mock_non_rag.assert_called_once_with(mocked_context, "query")
+    mock_rag.assert_not_called()
+
+
 @pytest.mark.asyncio
 async def test_fetch_and_store_none_document():
     """Test _fetch_and_store_async when extraction returns None instead of empty string."""
diff --git a/src/backend/chat/tools/__init__.py b/src/backend/chat/tools/__init__.py
@@ -4,7 +4,9 @@
 
 from .fake_current_weather import get_current_weather
 from .web_seach_albert_rag import web_search_albert_rag
-from .web_search_brave import web_search_brave, web_search_brave_with_document_backend
+from .web_search_brave import (
+    web_search_brave_configurable,
+)
 from .web_search_tavily import web_search_tavily
 
 
@@ -18,13 +20,22 @@ def get_pydantic_tools_by_name(name: str) -> Tool:
     tool_dict = {
         "get_current_weather": Tool(get_current_weather, takes_ctx=False),
         "web_search_brave": Tool(
-            web_search_brave,
+            web_search_brave_configurable,
             takes_ctx=True,
             prepare=only_if_web_search_enabled,
             max_retries=2,
         ),
+        # Backward-compatible alias (older settings may still reference this tool name).
         "web_search_brave_with_document_backend": Tool(
-            web_search_brave_with_document_backend,
+            web_search_brave_configurable,
+            name="web_search_brave_with_document_backend",
+            takes_ctx=True,
+            prepare=only_if_web_search_enabled,
+            max_retries=2,
+        ),
+        "web_search": Tool(
+            web_search_brave_configurable,
+            name="web_search",
             takes_ctx=True,
             prepare=only_if_web_search_enabled,
             max_retries=2,
diff --git a/src/backend/chat/tools/web_search_brave.py b/src/backend/chat/tools/web_search_brave.py
@@ -101,6 +101,18 @@ async def _fetch_and_extract_async(url: str) -> str:
         raise DocumentFetchError(f"Failed to extract content from {url}: {e}") from e
 
 
+def _get_snippets_from_result(result: dict) -> List[str]:
+    """Return merged snippets/extra_snippets as a list, guarding against None."""
+    snippets = result.get("snippets") or []
+    extra_snippets = result.get("extra_snippets") or []
+    # Both are expected to be lists of strings; fall back to one or the other if needed.
+    if snippets and not extra_snippets:
+        return snippets
+    if extra_snippets and not snippets:
+        return extra_snippets
+    return snippets or extra_snippets
+
+
 async def _extract_and_summarize_snippets_async(query: str, url: str) -> List[str]:
     """Fetch, extract and summarize text content from the URL.
 
@@ -143,22 +155,45 @@ async def _fetch_and_store_async(url: str, document_store, **kwargs) -> None:
 
 
 async def _query_brave_api_async(query: str) -> List[dict]:
-    """Query the Brave Search API and return the raw results."""
-    url = "https://api.search.brave.com/res/v1/web/search"
+    """Query the Brave Search API and return the raw results.
+
+    Uses either the LLM context endpoint (res/v1/llm/context) or the classic web search
+    endpoint (res/v1/web/search) depending on the BRAVE_USE_LLM_CONTEXT setting.
+    """
+    if settings.BRAVE_USE_LLM_CONTEXT:
+        logger.debug("Using LLM context endpoint")
+        url = "https://api.search.brave.com/res/v1/llm/context"
+        data = {
+            "q": query,
+            "country": settings.BRAVE_SEARCH_COUNTRY,
+            "search_lang": settings.BRAVE_SEARCH_LANG,
+            "count": settings.BRAVE_MAX_RESULTS,
+            "safesearch": settings.BRAVE_SEARCH_SAFE_SEARCH,
+            "spellcheck": settings.BRAVE_SEARCH_SPELLCHECK,
+            "result_filter": "web,faq,query",
+            "extra_snippets": settings.BRAVE_SEARCH_EXTRA_SNIPPETS,
+            "maximum_number_of_urls": settings.BRAVE_MAX_RESULTS,
+            "maximum_number_of_tokens": settings.BRAVE_MAX_TOKENS,
+            "maximum_number_of_snippets": settings.BRAVE_MAX_SNIPPETS,
+            "maximum_number_of_snippets_per_url": settings.BRAVE_MAX_SNIPPETS_PER_URL,
+        }
+    else:
+        logger.debug("Using classic web search endpoint")
+        url = "https://api.search.brave.com/res/v1/web/search"
+        data = {
+            "q": query,
+            "country": settings.BRAVE_SEARCH_COUNTRY,
+            "search_lang": settings.BRAVE_SEARCH_LANG,
+            "count": settings.BRAVE_MAX_RESULTS,
+            "safesearch": settings.BRAVE_SEARCH_SAFE_SEARCH,
+            "spellcheck": settings.BRAVE_SEARCH_SPELLCHECK,
+            "result_filter": "web,faq,query",
+            "extra_snippets": settings.BRAVE_SEARCH_EXTRA_SNIPPETS,
+        }
     headers = {
         "Accept": "application/json",
         "X-Subscription-Token": settings.BRAVE_API_KEY,
     }
-    data = {
-        "q": query,
-        "country": settings.BRAVE_SEARCH_COUNTRY,
-        "search_lang": settings.BRAVE_SEARCH_LANG,
-        "count": settings.BRAVE_MAX_RESULTS,
-        "safesearch": settings.BRAVE_SEARCH_SAFE_SEARCH,
-        "spellcheck": settings.BRAVE_SEARCH_SPELLCHECK,
-        "result_filter": "web,faq,query",
-        "extra_snippets": settings.BRAVE_SEARCH_EXTRA_SNIPPETS,
-    }
     params = {k: v for k, v in data.items() if v is not None}
 
     try:
@@ -167,6 +202,29 @@ async def _query_brave_api_async(query: str) -> List[dict]:
             response.raise_for_status()
             json_response = response.json()
 
+            # LLM context API: results are under `grounding.generic`
+            # See: https://api-dashboard.search.brave.com/documentation/services/llm-context
+            if "grounding" in json_response:
+                generic_results = json_response.get("grounding", {}).get("generic", []) or []
+                normalized_results: List[dict] = []
+                for item in generic_results:
+                    item_url = item.get("url")
+                    if not item_url:
+                        continue
+
+                    normalized_results.append(
+                        {
+                            "url": item_url,
+                            # Fallback to URL if no title is provided
+                            "title": item.get("title") or item_url,
+                            # `snippets` is already a list
+                            "snippets": item.get("snippets") or [],
+                        }
+                    )
+
+                return normalized_results
+
+            # Fallback for classic web search JSON shape, if we ever switch back
             # https://api-dashboard.search.brave.com/app/documentation/web-search/responses#Result
             return json_response.get("web", {}).get("results", [])
 
@@ -217,14 +275,14 @@ def format_tool_return(raw_search_results: List[dict]) -> ToolReturn:
             str(idx): {
                 "url": result["url"],
                 "title": result["title"],
-                "snippets": result.get("extra_snippets", []),
+                "snippets": _get_snippets_from_result(result),
             }
             for idx, result in enumerate(raw_search_results)
-            if result.get("extra_snippets", [])
+            if _get_snippets_from_result(result)
         },
         metadata={
             "sources": {
-                result["url"] for result in raw_search_results if result.get("extra_snippets", [])
+                result["url"] for result in raw_search_results if _get_snippets_from_result(result)
             }
         },
     )
@@ -239,14 +297,18 @@ async def web_search_brave(_ctx: RunContext, query: str) -> ToolReturn:
         _ctx (RunContext): The run context, used by the wrapper.
         query (str): The query to search for.
     """
+    logger.debug("Starting web search without RAG backend for query: %s", query)
     try:
         raw_search_results = await _query_brave_api_async(query)
 
         await sync_to_async(reset_caches)()  # Clear trafilatura caches to avoid memory bloat/leaks
 
-        # Parallelize fetch/extract for results that don't include extra_snippets
+        # Parallelize fetch/extract only for results that don't already include any snippets
+        # (neither Brave `snippets` nor `extra_snippets`).
         to_process = [
-            (idx, r) for idx, r in enumerate(raw_search_results) if not r.get("extra_snippets")
+            (idx, r)
+            for idx, r in enumerate(raw_search_results)
+            if not r.get("extra_snippets") and not r.get("snippets")
         ]
 
         if to_process:
@@ -292,7 +354,7 @@ async def web_search_brave_with_document_backend(ctx: RunContext, query: str) ->
         ctx (RunContext): The run context containing the conversation.
         query (str): The query to search for.
     """
-    logger.info("Starting web search with RAG backend for query: %s", query)
+    logger.debug("Starting web search with RAG backend for query: %s", query)
     try:
         raw_search_results = await _query_brave_api_async(query)
 
@@ -328,7 +390,7 @@ async def web_search_brave_with_document_backend(ctx: RunContext, query: str) ->
                     session=ctx.deps.session,
                     user_sub=ctx.deps.user.sub,
                 )
-                logger.info("RAG search returned:  %s", rag_results)
+                logger.debug("RAG search returned:  %s", rag_results)
 
                 ctx.usage += RunUsage(
                     input_tokens=rag_results.usage.prompt_tokens,
@@ -366,3 +428,21 @@ async def web_search_brave_with_document_backend(ctx: RunContext, query: str) ->
             f"An unexpected error occurred during web search with RAG: {type(e).__name__}. "
             "You must explain this to the user and not try to answer based on your knowledge."
         ) from e
+
+
+@last_model_retry_soft_fail
+async def web_search_brave_configurable(ctx: RunContext, query: str) -> ToolReturn:
+    """Route web search implementation based on Brave settings.
+
+    Priority:
+    1) BRAVE_USE_LLM_CONTEXT=True => always use non-RAG flow.
+    2) BRAVE_USE_RAG=True => use document-backend (RAG) flow.
+    3) Otherwise => use non-RAG flow.
+    """
+    if settings.BRAVE_USE_LLM_CONTEXT:
+        return await web_search_brave(ctx, query)
+
+    if settings.BRAVE_USE_RAG:
+        return await web_search_brave_with_document_backend(ctx, query)
+
+    return await web_search_brave(ctx, query)
diff --git a/src/backend/conversations/brave_settings.py b/src/backend/conversations/brave_settings.py
@@ -23,6 +23,14 @@ class BraveSettings:
         environ_prefix=None,
     )
 
+    # Enable RAG processing for Brave web search.
+    # If BRAVE_USE_LLM_CONTEXT is true, RAG is disabled regardless
+    BRAVE_USE_RAG = values.BooleanValue(
+        default=True,
+        environ_name="BRAVE_USE_RAG",
+        environ_prefix=None,
+    )
+
     # For web_search_brave_with_document_backend: number of chunks to retrieve RAG search
     BRAVE_RAG_WEB_SEARCH_CHUNK_NUMBER = values.IntegerValue(
         default=10,
@@ -74,3 +82,27 @@ class BraveSettings:
         environ_name="BRAVE_SEARCH_EXTRA_SNIPPETS",
         environ_prefix=None,
     )
+
+    # Whether to use the LLM context endpoint or the classic search
+    BRAVE_USE_LLM_CONTEXT = values.BooleanValue(
+        default=True,
+        environ_name="BRAVE_USE_LLM_CONTEXT",
+        environ_prefix=None,
+    )
+
+    # LLM context endpoint limits
+    BRAVE_MAX_TOKENS = values.IntegerValue(
+        default=8192,
+        environ_name="BRAVE_MAX_TOKENS",
+        environ_prefix=None,
+    )
+    BRAVE_MAX_SNIPPETS = values.IntegerValue(
+        default=50,
+        environ_name="BRAVE_MAX_SNIPPETS",
+        environ_prefix=None,
+    )
+    BRAVE_MAX_SNIPPETS_PER_URL = values.IntegerValue(
+        default=10,
+        environ_name="BRAVE_MAX_SNIPPETS_PER_URL",
+        environ_prefix=None,
+    )