-
Notifications
You must be signed in to change notification settings - Fork 5k
Adjust defaults for multimodal-related parameters to work for default deployments #2717
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -412,6 +412,7 @@ async def get_sources_content( | |||||||||||||
self, | ||||||||||||||
results: list[Document], | ||||||||||||||
use_semantic_captions: bool, | ||||||||||||||
include_text_sources: bool, | ||||||||||||||
download_image_sources: bool, | ||||||||||||||
user_oid: Optional[str] = None, | ||||||||||||||
) -> DataPoints: | ||||||||||||||
|
@@ -442,10 +443,13 @@ def nonewlines(s: str) -> str: | |||||||||||||
citations.append(citation) | ||||||||||||||
|
||||||||||||||
# If semantic captions are used, extract captions; otherwise, use content | ||||||||||||||
if use_semantic_captions and doc.captions: | ||||||||||||||
text_sources.append(f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}") | ||||||||||||||
else: | ||||||||||||||
text_sources.append(f"{citation}: {nonewlines(doc.content or '')}") | ||||||||||||||
if include_text_sources: | ||||||||||||||
if use_semantic_captions and doc.captions: | ||||||||||||||
text_sources.append( | ||||||||||||||
f"{citation}: {nonewlines(' . '.join([cast(str, c.text) for c in doc.captions]))}" | ||||||||||||||
) | ||||||||||||||
else: | ||||||||||||||
text_sources.append(f"{citation}: {nonewlines(doc.content or '')}") | ||||||||||||||
|
||||||||||||||
if download_image_sources and hasattr(doc, "images") and doc.images: | ||||||||||||||
for img in doc.images: | ||||||||||||||
|
@@ -457,9 +461,7 @@ def nonewlines(s: str) -> str: | |||||||||||||
if url: | ||||||||||||||
image_sources.append(url) | ||||||||||||||
citations.append(self.get_image_citation(doc.sourcepage or "", img["url"])) | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return statement always includes image_sources regardless of the download_image_sources parameter. When download_image_sources is False, image_sources should be empty or None to avoid returning potentially unintended image data.
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But it will be an empty list in that case? What's wrong with that? |
||||||||||||||
if download_image_sources: | ||||||||||||||
return DataPoints(text=text_sources, images=image_sources, citations=citations) | ||||||||||||||
return DataPoints(text=text_sources, citations=citations) | ||||||||||||||
return DataPoints(text=text_sources, images=image_sources, citations=citations) | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes image_sources [] when not including image sources, which is consistent with text_sources being [] when not including text sources. Snapshots already aligned with this, and it simplified code. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The return statement always includes
Suggested change
Copilot uses AI. Check for mistakes. Positive FeedbackNegative Feedback There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot But it will be an empty list in that case, since image_sources is only populated when download_image_sources is True? |
||||||||||||||
|
||||||||||||||
def get_citation(self, sourcepage: Optional[str]): | ||||||||||||||
return sourcepage or "" | ||||||||||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -16,7 +16,6 @@ | |||||
|
||||||
from approaches.approach import ( | ||||||
Approach, | ||||||
DataPoints, | ||||||
ExtraInfo, | ||||||
ThoughtStep, | ||||||
) | ||||||
|
@@ -284,9 +283,11 @@ async def run_search_approach( | |||||
minimum_reranker_score = overrides.get("minimum_reranker_score", 0.0) | ||||||
search_index_filter = self.build_filter(overrides, auth_claims) | ||||||
send_text_sources = overrides.get("send_text_sources", True) | ||||||
send_image_sources = overrides.get("send_image_sources", True) | ||||||
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. great :) |
||||||
search_text_embeddings = overrides.get("search_text_embeddings", True) | ||||||
search_image_embeddings = overrides.get("search_image_embeddings", self.multimodal_enabled) | ||||||
search_image_embeddings = ( | ||||||
overrides.get("search_image_embeddings", self.multimodal_enabled) and self.multimodal_enabled | ||||||
) | ||||||
|
||||||
original_user_query = messages[-1]["content"] | ||||||
if not isinstance(original_user_query, str): | ||||||
|
@@ -342,11 +343,12 @@ async def run_search_approach( | |||||
|
||||||
# STEP 3: Generate a contextual and content specific answer using the search results and chat history | ||||||
data_points = await self.get_sources_content( | ||||||
results, use_semantic_captions, download_image_sources=send_image_sources, user_oid=auth_claims.get("oid") | ||||||
results, | ||||||
use_semantic_captions, | ||||||
include_text_sources=send_text_sources, | ||||||
download_image_sources=send_image_sources, | ||||||
user_oid=auth_claims.get("oid"), | ||||||
) | ||||||
if not send_text_sources: | ||||||
data_points = DataPoints(text=[], images=data_points.images, citations=data_points.citations) | ||||||
|
||||||
extra_info = ExtraInfo( | ||||||
data_points, | ||||||
thoughts=[ | ||||||
|
@@ -396,7 +398,7 @@ async def run_agentic_retrieval_approach( | |||||
# 50 is the amount of documents that the reranker can process per query | ||||||
max_docs_for_reranker = max_subqueries * 50 | ||||||
send_text_sources = overrides.get("send_text_sources", True) | ||||||
send_image_sources = overrides.get("send_image_sources", True) | ||||||
send_image_sources = overrides.get("send_image_sources", False) and self.multimodal_enabled | ||||||
|
send_image_sources = overrides.get("send_image_sources", False) and self.multimodal_enabled | |
send_image_sources = overrides.get("send_image_sources", self.multimodal_enabled) and self.multimodal_enabled |
Copilot uses AI. Check for mistakes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed in follow-up commit
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,13 +37,13 @@ export function Component(): JSX.Element { | |
const [useQueryRewriting, setUseQueryRewriting] = useState<boolean>(false); | ||
const [reasoningEffort, setReasoningEffort] = useState<string>(""); | ||
const [sendTextSources, setSendTextSources] = useState<boolean>(true); | ||
const [sendImageSources, setSendImageSources] = useState<boolean>(true); | ||
const [sendImageSources, setSendImageSources] = useState<boolean>(false); | ||
const [includeCategory, setIncludeCategory] = useState<string>(""); | ||
|
||
const [excludeCategory, setExcludeCategory] = useState<string>(""); | ||
const [question, setQuestion] = useState<string>(""); | ||
const [searchTextEmbeddings, setSearchTextEmbeddings] = useState<boolean>(true); | ||
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(true); | ||
const [searchImageEmbeddings, setSearchImageEmbeddings] = useState<boolean>(false); | ||
const [useOidSecurityFilter, setUseOidSecurityFilter] = useState<boolean>(false); | ||
const [useGroupsSecurityFilter, setUseGroupsSecurityFilter] = useState<boolean>(false); | ||
const [showMultimodalOptions, setShowMultimodalOptions] = useState<boolean>(false); | ||
|
@@ -87,12 +87,11 @@ export function Component(): JSX.Element { | |
configApi().then(config => { | ||
setShowMultimodalOptions(config.showMultimodalOptions); | ||
if (config.showMultimodalOptions) { | ||
// Set default LLM inputs based on config override or fallback to Texts | ||
setSendTextSources(true); | ||
setSendImageSources(true); | ||
// Set default vector field settings | ||
setSearchTextEmbeddings(true); | ||
setSearchImageEmbeddings(true); | ||
// Initialize from server config so defaults follow deployment settings | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is how /chat was already doing it, fixed for consistency |
||
setSendTextSources(config.ragSendTextSources !== undefined ? config.ragSendTextSources : true); | ||
setSendImageSources(config.ragSendImageSources); | ||
setSearchTextEmbeddings(config.ragSearchTextEmbeddings); | ||
setSearchImageEmbeddings(config.ragSearchImageEmbeddings); | ||
} | ||
setUseSemanticRanker(config.showSemanticRankerOption); | ||
setShowSemanticRankerOption(config.showSemanticRankerOption); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only change here is the very top line-