Skip to content

Commit 53df4df

Browse files
committed
Merge from main
2 parents 78187e4 + 165dcac commit 53df4df

File tree

11 files changed

+189
-113
lines changed

11 files changed

+189
-113
lines changed

app/backend/approaches/approach.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,7 @@ async def get_sources_content(
352352
self,
353353
results: list[Document],
354354
use_semantic_captions: bool,
355+
include_text_sources: bool,
355356
download_image_sources: bool,
356357
user_oid: Optional[str] = None,
357358
) -> DataPoints:
@@ -382,10 +383,13 @@ def nonewlines(s: str) -> str:
382383
citations.append(citation)
383384

384385
# If semantic captions are used, extract captions; otherwise, use content
385-
if use_semantic_captions and doc.captions:
386-
text_sources.append(f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}")
387-
else:
388-
text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
386+
if include_text_sources:
387+
if use_semantic_captions and doc.captions:
388+
text_sources.append(
389+
f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}"
390+
)
391+
else:
392+
text_sources.append(f"{citation}: {nonewlines(doc.content or '')}")
389393

390394
if download_image_sources and hasattr(doc, "images") and doc.images:
391395
for img in doc.images:
@@ -397,9 +401,7 @@ def nonewlines(s: str) -> str:
397401
if url:
398402
image_sources.append(url)
399403
citations.append(self.get_image_citation(doc.sourcepage or "", img["url"]))
400-
if download_image_sources:
401-
return DataPoints(text=text_sources, images=image_sources, citations=citations)
402-
return DataPoints(text=text_sources, citations=citations)
404+
return DataPoints(text=text_sources, images=image_sources, citations=citations)
403405

404406
def get_citation(self, sourcepage: Optional[str]):
405407
return sourcepage or ""

app/backend/approaches/chatreadretrieveread.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
from approaches.approach import (
1818
Approach,
19-
DataPoints,
2019
ExtraInfo,
2120
ThoughtStep,
2221
)
@@ -282,9 +281,11 @@ async def run_search_approach(
282281
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
283282
search_index_filter = self.build_filter(overrides, auth_claims)
284283
send_text_sources = overrides.get("send_text_sources", True)
285-
send_image_sources = overrides.get("send_image_sources", True)
284+
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
286285
search_text_embeddings = overrides.get("search_text_embeddings", True)
287-
search_image_embeddings = overrides.get("search_image_embeddings", self.multimodal_enabled)
286+
search_image_embeddings = (
287+
overrides.get("search_image_embeddings", self.multimodal_enabled) and self.multimodal_enabled
288+
)
288289

289290
original_user_query = messages[-1]["content"]
290291
if not isinstance(original_user_query, str):
@@ -340,11 +341,12 @@ async def run_search_approach(
340341

341342
# STEP 3: Generate a contextual and content specific answer using the search results and chat history
342343
data_points = await self.get_sources_content(
343-
results, use_semantic_captions, download_image_sources=send_image_sources, user_oid=auth_claims.get("oid")
344+
results,
345+
use_semantic_captions,
346+
include_text_sources=send_text_sources,
347+
download_image_sources=send_image_sources,
348+
user_oid=auth_claims.get("oid"),
344349
)
345-
if not send_text_sources:
346-
data_points = DataPoints(text=[], images=data_points.images, citations=data_points.citations)
347-
348350
extra_info = ExtraInfo(
349351
data_points,
350352
thoughts=[
@@ -390,7 +392,7 @@ async def run_agentic_retrieval_approach(
390392
top = overrides.get("top", 3)
391393
results_merge_strategy = overrides.get("results_merge_strategy", "interleaved")
392394
send_text_sources = overrides.get("send_text_sources", True)
393-
send_image_sources = overrides.get("send_image_sources", True)
395+
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
394396

395397
response, results = await self.run_agentic_retrieval(
396398
messages=messages,
@@ -404,12 +406,10 @@ async def run_agentic_retrieval_approach(
404406
data_points = await self.get_sources_content(
405407
results,
406408
use_semantic_captions=False,
409+
include_text_sources=send_text_sources,
407410
download_image_sources=send_image_sources,
408411
user_oid=auth_claims.get("oid"),
409412
)
410-
if not send_text_sources:
411-
data_points = DataPoints(text=[], images=data_points.images, citations=data_points.citations)
412-
413413
extra_info = ExtraInfo(
414414
data_points,
415415
thoughts=[

app/backend/approaches/retrievethenread.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,12 @@ async def run_search_approach(
157157
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0)
158158
filter = self.build_filter(overrides, auth_claims)
159159
q = str(messages[-1]["content"])
160-
send_image_sources = overrides.get("send_image_sources", True)
160+
send_text_sources = overrides.get("send_text_sources", True)
161+
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
161162
search_text_embeddings = overrides.get("search_text_embeddings", True)
162-
search_image_embeddings = overrides.get("search_image_embeddings", self.multimodal_enabled)
163+
search_image_embeddings = (
164+
overrides.get("search_image_embeddings", self.multimodal_enabled) and self.multimodal_enabled
165+
)
163166

164167
vectors: list[VectorQuery] = []
165168
if use_vector_search:
@@ -183,7 +186,11 @@ async def run_search_approach(
183186
)
184187

185188
data_points = await self.get_sources_content(
186-
results, use_semantic_captions, download_image_sources=send_image_sources, user_oid=auth_claims.get("oid")
189+
results,
190+
use_semantic_captions,
191+
include_text_sources=send_text_sources,
192+
download_image_sources=send_image_sources,
193+
user_oid=auth_claims.get("oid"),
187194
)
188195

189196
return ExtraInfo(
@@ -220,7 +227,8 @@ async def run_agentic_retrieval_approach(
220227
search_index_filter = self.build_filter(overrides, auth_claims)
221228
top = overrides.get("top", 3)
222229
results_merge_strategy = overrides.get("results_merge_strategy", "interleaved")
223-
send_image_sources = overrides.get("send_image_sources", True)
230+
send_text_sources = overrides.get("send_text_sources", True)
231+
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled
224232

225233
response, results = await self.run_agentic_retrieval(
226234
messages,
@@ -234,6 +242,7 @@ async def run_agentic_retrieval_approach(
234242
data_points = await self.get_sources_content(
235243
results,
236244
use_semantic_captions=False,
245+
include_text_sources=send_text_sources,
237246
download_image_sources=send_image_sources,
238247
user_oid=auth_claims.get("oid"),
239248
)

app/backend/prepdocslib/searchmanager.py

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -491,35 +491,33 @@ async def update_content(self, sections: list[Section], url: Optional[str] = Non
491491

492492
async with self.search_info.create_search_client() as search_client:
493493
for batch_index, batch in enumerate(section_batches):
494-
image_fields = {}
495-
if self.search_images:
496-
image_fields = {
497-
"images": [
498-
{
499-
"url": image.url,
500-
"description": image.description,
501-
"boundingbox": image.bbox,
502-
"embedding": image.embedding,
503-
}
504-
for section in batch
505-
for image in section.chunk.images
506-
]
507-
}
508-
documents = [
509-
{
494+
documents = []
495+
for section_index, section in enumerate(batch):
496+
image_fields = {}
497+
if self.search_images:
498+
image_fields = {
499+
"images": [
500+
{
501+
"url": image.url,
502+
"description": image.description,
503+
"boundingbox": image.bbox,
504+
"embedding": image.embedding,
505+
}
506+
for image in section.chunk.images
507+
]
508+
}
509+
document = {
510510
"id": f"{section.content.filename_to_id()}-page-{section_index + batch_index * MAX_BATCH_SIZE}",
511511
"content": section.chunk.text,
512512
"category": section.category,
513513
"sourcepage": BlobManager.sourcepage_from_file_page(
514-
filename=section.content.filename(),
515-
page=section.chunk.page_num,
514+
filename=section.content.filename(), page=section.chunk.page_num
516515
),
517516
"sourcefile": section.content.filename(),
518517
**image_fields,
519518
**section.content.acls,
520519
}
521-
for section_index, section in enumerate(batch)
522-
]
520+
documents.append(document)
523521
if url:
524522
for document in documents:
525523
document["storageUrl"] = url

app/frontend/src/pages/ask/Ask.tsx

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ export function Component(): JSX.Element {
3636
const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false);
3737
const [reasoningEffort, setReasoningEffort] = useState<string>("");
3838
const [sendTextSources, setSendTextSources] = useState<boolean>(true);
39-
const [sendImageSources, setSendImageSources] = useState<boolean>(true);
39+
const [sendImageSources, setSendImageSources] = useState<boolean>(false);
4040
const [includeCategory, setIncludeCategory] = useState<string>("");
4141

4242
const [excludeCategory, setExcludeCategory] = useState<string>("");
4343
const [question, setQuestion] = useState<string>("");
4444
const [searchTextEmbeddings, setSearchTextEmbeddings] = useState<boolean>(true);
45-
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(true);
45+
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(false);
4646
const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false);
4747
const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false);
4848
const [showMultimodalOptions, setShowMultimodalOptions] = useState<boolean>(false);
@@ -86,12 +86,11 @@ export function Component(): JSX.Element {
8686
configApi().then(config => {
8787
setShowMultimodalOptions(config.showMultimodalOptions);
8888
if (config.showMultimodalOptions) {
89-
// Set default LLM inputs based on config override or fallback to Texts
90-
setSendTextSources(true);
91-
setSendImageSources(true);
92-
// Set default vector field settings
93-
setSearchTextEmbeddings(true);
94-
setSearchImageEmbeddings(true);
89+
// Initialize from server config so defaults follow deployment settings
90+
setSendTextSources(config.ragSendTextSources !== undefined ? config.ragSendTextSources : true);
91+
setSendImageSources(config.ragSendImageSources);
92+
setSearchTextEmbeddings(config.ragSearchTextEmbeddings);
93+
setSearchImageEmbeddings(config.ragSearchImageEmbeddings);
9594
}
9695
setUseSemanticRanker(config.showSemanticRankerOption);
9796
setShowSemanticRankerOption(config.showSemanticRankerOption);

app/frontend/src/pages/chat/Chat.tsx

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ const Chat = () => {
4747
const [excludeCategory, setExcludeCategory] = useState<string>("");
4848
const [useSuggestFollowupQuestions, setUseSuggestFollowupQuestions] = useState<boolean>(false);
4949
const [searchTextEmbeddings, setSearchTextEmbeddings] = useState<boolean>(true);
50-
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(true);
50+
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(false);
5151
const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false);
5252
const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false);
5353
const [sendTextSources, setSendTextSources] = useState<boolean>(true);
54-
const [sendImageSources, setSendImageSources] = useState<boolean>(true);
54+
const [sendImageSources, setSendImageSources] = useState<boolean>(false);
5555

5656
const lastQuestionRef = useRef<string>("");
5757
const chatMessageStreamEnd = useRef<HTMLDivElement | null>(null);
@@ -98,7 +98,7 @@ const Chat = () => {
9898
configApi().then(config => {
9999
setShowMultimodalOptions(config.showMultimodalOptions);
100100
if (config.showMultimodalOptions) {
101-
// Always have at least one source enabled, default to text if none specified
101+
// Initialize from server config so defaults match deployment settings
102102
setSendTextSources(config.ragSendTextSources !== undefined ? config.ragSendTextSources : true);
103103
setSendImageSources(config.ragSendImageSources);
104104
setSearchTextEmbeddings(config.ragSearchTextEmbeddings);

tests/e2e.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,14 +93,21 @@ def test_chat(sized_page: Page, live_server_url: str):
9393

9494
# Set up a mock route to the /chat endpoint with streaming results
9595
def handle(route: Route):
96-
# Assert that session_state is specified in the request (None for now)
9796
try:
9897
post_data = route.request.post_data_json
99-
if post_data and "session_state" in post_data:
100-
session_state = post_data["session_state"]
101-
assert session_state is None
98+
# Assert that session_state is specified (None initially)
99+
if "session_state" in post_data:
100+
assert post_data["session_state"] is None
101+
overrides = post_data["context"]["overrides"]
102+
# Assert that the default overrides are correct
103+
assert overrides.get("send_text_sources") is True
104+
assert overrides.get("send_image_sources") is False
105+
assert overrides.get("search_text_embeddings") is True
106+
assert overrides.get("search_image_embeddings") is False
107+
# retrieval_mode may be explicitly "hybrid" or omitted (interpreted as hybrid)
108+
assert overrides.get("retrieval_mode") in ["hybrid", None]
102109
except Exception as e:
103-
print(f"Error in test_chat handler: {e}")
110+
print(f"Error in test_chat handler (defaults validation): {e}")
104111

105112
# Read the JSONL from our snapshot results and return as the response
106113
f = open("tests/snapshots/test_app/test_chat_stream_text/client0/result.jsonlines")

0 commit comments

Comments
 (0)