Adjust defaults for multimodal-related parameters to work for default deployments (#2717)

pamelafox · web-flow · commit 74b66bdff8a4 · 2025-09-08T14:02:14.000-07:00
* Fix frontend defaults and e2e test

* Add backend guards and tests

* Deault send_image_sources to self.multimodal_enabled
diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py
@@ -412,6 +412,7 @@ async def get_sources_content(
         self,
         results: list[Document],
         use_semantic_captions: bool,
+        include_text_sources: bool,
         download_image_sources: bool,
         user_oid: Optional[str] = None,
     ) -> DataPoints:
@@ -442,10 +443,13 @@ def nonewlines(s: str) -> str:
                 citations.append(citation)
 
             # If semantic captions are used, extract captions; otherwise, use content
-            if use_semantic_captions and doc.captions:
-                text_sources.append(f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}")
-            else:
-                text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
+            if include_text_sources:
+                if use_semantic_captions and doc.captions:
+                    text_sources.append(
+                        f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}"
+                    )
+                else:
+                    text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
 
             if download_image_sources and hasattr(doc, "images") and doc.images:
                 for img in doc.images:
@@ -457,9 +461,7 @@ def nonewlines(s: str) -> str:
                     if url:
                         image_sources.append(url)
                     citations.append(self.get_image_citation(doc.sourcepage or "", img["url"]))
-        if download_image_sources:
-            return DataPoints(text=text_sources, images=image_sources, citations=citations)
-        return DataPoints(text=text_sources, citations=citations)
+        return DataPoints(text=text_sources, images=image_sources, citations=citations)
 
     def get_citation(self, sourcepage: Optional[str]):
         return sourcepage or ""
diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py
@@ -16,7 +16,6 @@
 
 from approaches.approach import (
     Approach,
-    DataPoints,
     ExtraInfo,
     ThoughtStep,
 )
@@ -284,9 +283,11 @@ async def run_search_approach(
         minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
         search_index_filter = self.build_filter(overrides, auth_claims)
         send_text_sources = overrides.get("send_text_sources", True)
-        send_image_sources = overrides.get("send_image_sources", True)
+        send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
         search_text_embeddings = overrides.get("search_text_embeddings", True)
-        search_image_embeddings = overrides.get("search_image_embeddings", self.multimodal_enabled)
+        search_image_embeddings = (
+            overrides.get("search_image_embeddings", self.multimodal_enabled) and self.multimodal_enabled
+        )
 
         original_user_query = messages[-1]["content"]
         if not isinstance(original_user_query, str):
@@ -342,11 +343,12 @@ async def run_search_approach(
 
         # STEP 3: Generate a contextual and content specific answer using the search results and chat history
         data_points = await self.get_sources_content(
-            results, use_semantic_captions, download_image_sources=send_image_sources, user_oid=auth_claims.get("oid")
+            results,
+            use_semantic_captions,
+            include_text_sources=send_text_sources,
+            download_image_sources=send_image_sources,
+            user_oid=auth_claims.get("oid"),
         )
-        if not send_text_sources:
-            data_points = DataPoints(text=[], images=data_points.images, citations=data_points.citations)
-
         extra_info = ExtraInfo(
             data_points,
             thoughts=[
@@ -396,7 +398,7 @@ async def run_agentic_retrieval_approach(
         # 50 is the amount of documents that the reranker can process per query
         max_docs_for_reranker = max_subqueries * 50
         send_text_sources = overrides.get("send_text_sources", True)
-        send_image_sources = overrides.get("send_image_sources", True)
+        send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
 
         response, results = await self.run_agentic_retrieval(
             messages=messages,
@@ -412,12 +414,10 @@ async def run_agentic_retrieval_approach(
         data_points = await self.get_sources_content(
             results,
             use_semantic_captions=False,
+            include_text_sources=send_text_sources,
             download_image_sources=send_image_sources,
             user_oid=auth_claims.get("oid"),
         )
-        if not send_text_sources:
-            data_points = DataPoints(text=[], images=data_points.images, citations=data_points.citations)
-
         extra_info = ExtraInfo(
             data_points,
             thoughts=[
diff --git a/app/backend/approaches/retrievethenread.py b/app/backend/approaches/retrievethenread.py
@@ -159,9 +159,12 @@ async def run_search_approach(
         minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
         filter = self.build_filter(overrides, auth_claims)
         q = str(messages[-1]["content"])
-        send_image_sources = overrides.get("send_image_sources", True)
+        send_text_sources = overrides.get("send_text_sources", True)
+        send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
         search_text_embeddings = overrides.get("search_text_embeddings", True)
-        search_image_embeddings = overrides.get("search_image_embeddings", self.multimodal_enabled)
+        search_image_embeddings = (
+            overrides.get("search_image_embeddings", self.multimodal_enabled) and self.multimodal_enabled
+        )
 
         vectors: list[VectorQuery] = []
         if use_vector_search:
@@ -185,7 +188,11 @@ async def run_search_approach(
         )
 
         data_points = await self.get_sources_content(
-            results, use_semantic_captions, download_image_sources=send_image_sources, user_oid=auth_claims.get("oid")
+            results,
+            use_semantic_captions,
+            include_text_sources=send_text_sources,
+            download_image_sources=send_image_sources,
+            user_oid=auth_claims.get("oid"),
         )
 
         return ExtraInfo(
@@ -226,7 +233,8 @@ async def run_agentic_retrieval_approach(
         results_merge_strategy = overrides.get("results_merge_strategy", "interleaved")
         # 50 is the amount of documents that the reranker can process per query
         max_docs_for_reranker = max_subqueries * 50
-        send_image_sources = overrides.get("send_image_sources", True)
+        send_text_sources = overrides.get("send_text_sources", True)
+        send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
 
         response, results = await self.run_agentic_retrieval(
             messages,
@@ -242,6 +250,7 @@ async def run_agentic_retrieval_approach(
         data_points = await self.get_sources_content(
             results,
             use_semantic_captions=False,
+            include_text_sources=send_text_sources,
             download_image_sources=send_image_sources,
             user_oid=auth_claims.get("oid"),
         )
diff --git a/app/frontend/src/pages/ask/Ask.tsx b/app/frontend/src/pages/ask/Ask.tsx
@@ -37,13 +37,13 @@ export function Component(): JSX.Element {
     const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
     const [reasoningEffort, setReasoningEffort] = useState<string>("");
     const [sendTextSources, setSendTextSources] = useState<boolean>(true);
-    const [sendImageSources, setSendImageSources] = useState<boolean>(true);
+    const [sendImageSources, setSendImageSources] = useState<boolean>(false);
     const [includeCategory, setIncludeCategory] = useState<string>("");
 
     const [excludeCategory, setExcludeCategory] = useState<string>("");
     const [question, setQuestion] = useState<string>("");
     const [searchTextEmbeddings, setSearchTextEmbeddings] = useState<boolean>(true);
-    const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(true);
+    const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(false);
     const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false);
     const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false);
     const [showMultimodalOptions, setShowMultimodalOptions] = useState<boolean>(false);
@@ -87,12 +87,11 @@ export function Component(): JSX.Element {
         configApi().then(config => {
             setShowMultimodalOptions(config.showMultimodalOptions);
             if (config.showMultimodalOptions) {
-                // Set default LLM inputs based on config override or fallback to Texts
-                setSendTextSources(true);
-                setSendImageSources(true);
-                // Set default vector field settings
-                setSearchTextEmbeddings(true);
-                setSearchImageEmbeddings(true);
+                // Initialize from server config so defaults follow deployment settings
+                setSendTextSources(config.ragSendTextSources !== undefined ? config.ragSendTextSources : true);
+                setSendImageSources(config.ragSendImageSources);
+                setSearchTextEmbeddings(config.ragSearchTextEmbeddings);
+                setSearchImageEmbeddings(config.ragSearchImageEmbeddings);
             }
             setUseSemanticRanker(config.showSemanticRankerOption);
             setShowSemanticRankerOption(config.showSemanticRankerOption);
diff --git a/app/frontend/src/pages/chat/Chat.tsx b/app/frontend/src/pages/chat/Chat.tsx
@@ -48,11 +48,11 @@ const Chat = () => {
     const [excludeCategory, setExcludeCategory] = useState<string>("");
     const [useSuggestFollowupQuestions, setUseSuggestFollowupQuestions] = useState<boolean>(false);
     const [searchTextEmbeddings, setSearchTextEmbeddings] = useState<boolean>(true);
-    const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(true);
+    const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(false);
     const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false);
     const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false);
     const [sendTextSources, setSendTextSources] = useState<boolean>(true);
-    const [sendImageSources, setSendImageSources] = useState<boolean>(true);
+    const [sendImageSources, setSendImageSources] = useState<boolean>(false);
 
     const lastQuestionRef = useRef<string>("");
     const chatMessageStreamEnd = useRef<HTMLDivElement | null>(null);
@@ -99,7 +99,7 @@ const Chat = () => {
         configApi().then(config => {
             setShowMultimodalOptions(config.showMultimodalOptions);
             if (config.showMultimodalOptions) {
-                // Always have at least one source enabled, default to text if none specified
+                // Initialize from server config so defaults match deployment settings
                 setSendTextSources(config.ragSendTextSources !== undefined ? config.ragSendTextSources : true);
                 setSendImageSources(config.ragSendImageSources);
                 setSearchTextEmbeddings(config.ragSearchTextEmbeddings);
diff --git a/tests/e2e.py b/tests/e2e.py
@@ -93,14 +93,21 @@ def test_chat(sized_page: Page, live_server_url: str):
 
     # Set up a mock route to the /chat endpoint with streaming results
     def handle(route: Route):
-        # Assert that session_state is specified in the request (None for now)
         try:
             post_data = route.request.post_data_json
-            if post_data and "session_state" in post_data:
-                session_state = post_data["session_state"]
-                assert session_state is None
+            # Assert that session_state is specified (None initially)
+            if "session_state" in post_data:
+                assert post_data["session_state"] is None
+            overrides = post_data["context"]["overrides"]
+            # Assert that the default overrides are correct
+            assert overrides.get("send_text_sources") is True
+            assert overrides.get("send_image_sources") is False
+            assert overrides.get("search_text_embeddings") is True
+            assert overrides.get("search_image_embeddings") is False
+            # retrieval_mode may be explicitly "hybrid" or omitted (interpreted as hybrid)
+            assert overrides.get("retrieval_mode") in ["hybrid", None]
         except Exception as e:
-            print(f"Error in test_chat handler: {e}")
+            print(f"Error in test_chat handler (defaults validation): {e}")
 
         # Read the JSONL from our snapshot results and return as the response
         f = open("tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines")
diff --git a/tests/test_app.py b/tests/test_app.py
@@ -296,6 +296,44 @@ async def test_ask_rtr_text_semanticcaptions(client, snapshot):
     snapshot.assert_match(json.dumps(result, indent=4), "result.json")
 
 
+@pytest.mark.asyncio
+@pytest.mark.parametrize("route", ["/ask", "/chat"])
+async def test_send_text_sources_false(client, route):
+    """When send_text_sources is False, text sources should be omitted while citations remain."""
+    response = await client.post(
+        route,
+        json={
+            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "context": {"overrides": {"retrieval_mode": "text", "send_text_sources": False}},
+        },
+    )
+    assert response.status_code == 200
+    result = await response.get_json()
+    data_points = result["context"]["data_points"]
+    assert data_points["text"] == []
+    assert "citations" in data_points and len(data_points["citations"]) > 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("route", ["/ask", "/chat"])
+async def test_search_image_embeddings_ignored_without_multimodal(client, route):
+    """Sending search_image_embeddings=True when USE_MULTIMODAL is false should be ignored and still succeed (200)."""
+    response = await client.post(
+        route,
+        json={
+            "messages": [{"content": "What is the capital of France?", "role": "user"}],
+            "context": {"overrides": {"search_image_embeddings": True, "send_image_sources": True}},
+        },
+    )
+    assert response.status_code == 200
+    result = await response.get_json()
+    # Ensure the thought step recorded search_image_embeddings as False
+    search_thought = [
+        thought for thought in result["context"]["thoughts"] if thought["title"].startswith("Search using")
+    ][0]
+    assert search_thought["props"]["search_image_embeddings"] is False
+
+
 @pytest.mark.asyncio
 async def test_ask_rtr_hybrid(client, snapshot):
     response = await client.post(