diff --git a/app/backend/approaches/approach.py b/app/backend/approaches/approach.py index 04a74a8818..2b0119b342 100644 --- a/app/backend/approaches/approach.py +++ b/app/backend/approaches/approach.py @@ -214,6 +214,45 @@ async def search( ) -> list[Document]: search_text = query_text if use_text_search else "" search_vectors = vectors if use_vector_search else [] + # Specialized filename: pattern support. If the query explicitly requests a filename, + # bypass normal text/vector search and filter directly on the sourcepage field. + if query_text and query_text.startswith("filename:"): + raw_filename = query_text[len("filename:") :].strip() + safe_filename = raw_filename.replace("'", "''") + filename_filter = f"sourcefile eq '{safe_filename}'" + effective_filter = f"{filter} and {filename_filter}" if filter else filename_filter + results = await self.search_client.search( + search_text="", # empty since we rely solely on filter + filter=effective_filter, + top=top, + ) + documents: list[Document] = [] + async for page in results.by_page(): + async for document in page: + documents.append( + Document( + id=document.get("id"), + content=document.get("content"), + category=document.get("category"), + sourcepage=document.get("sourcepage"), + sourcefile=document.get("sourcefile"), + oids=document.get("oids"), + groups=document.get("groups"), + captions=cast(list[QueryCaptionResult], document.get("@search.captions")), + score=document.get("@search.score"), + reranker_score=document.get("@search.reranker_score"), + images=document.get("images"), + ) + ) + qualified_documents = [ + doc + for doc in documents + if ( + (doc.score or 0) >= (minimum_search_score or 0) + and (doc.reranker_score or 0) >= (minimum_reranker_score or 0) + ) + ] + return qualified_documents if use_semantic_ranker: results = await self.search_client.search( search_text=search_text, diff --git a/app/backend/approaches/chatreadretrieveread.py b/app/backend/approaches/chatreadretrieveread.py index bc51dc107a..62f5437778 100644 --- a/app/backend/approaches/chatreadretrieveread.py +++ b/app/backend/approaches/chatreadretrieveread.py @@ -102,6 +102,11 @@ def get_search_query(self, chat_completion: ChatCompletion, user_query: str): search_query = arg.get("search_query", self.NO_RESPONSE) if search_query != self.NO_RESPONSE: return search_query + if function.name == "search_by_filename": + arg = json.loads(function.arguments) + filename = arg.get("filename", "") + if filename: + return f"filename:{filename}" elif query_text := response_message.content: if query_text.strip() != self.NO_RESPONSE: return query_text diff --git a/app/backend/approaches/prompts/chat_query_rewrite.prompty b/app/backend/approaches/prompts/chat_query_rewrite.prompty index 545b3f5b8c..c41490a2e6 100644 --- a/app/backend/approaches/prompts/chat_query_rewrite.prompty +++ b/app/backend/approaches/prompts/chat_query_rewrite.prompty @@ -23,17 +23,10 @@ Do not include any special characters like '+'. If the question is not in English, translate the question to English before generating the search query. If you cannot generate a search query, return just the number 0. -user: -How did crypto do last year? - -assistant: -Summarize Cryptocurrency Market Dynamics from last year - -user: -What are my health plans? +You have two callable tools available (they may be invoked via function calling): +1. search_sources: Use this to generate a general keyword style search query. +2. search_by_filename: Use this ONLY when the user clearly references a specific document by its exact filename (e.g., "PerksPlus.pdf"). Provide just the filename (without extra words). If the user asks to summarize or open a specific known file by name, prefer calling search_by_filename. If they mention concepts or partial names, fall back to generating a normal keyword query with search_sources. -assistant: -Show available health plans {% for message in past_messages %} {{ message["role"] }}: @@ -41,4 +34,4 @@ Show available health plans {% endfor %} user: -Generate search query for: {{ user_query }} +{{ user_query }} diff --git a/app/backend/approaches/prompts/chat_query_rewrite_tools.json b/app/backend/approaches/prompts/chat_query_rewrite_tools.json index cf1743483c..019f9cbfc4 100644 --- a/app/backend/approaches/prompts/chat_query_rewrite_tools.json +++ b/app/backend/approaches/prompts/chat_query_rewrite_tools.json @@ -1,17 +1,36 @@ -[{ - "type": "function", - "function": { - "name": "search_sources", - "description": "Retrieve sources from the Azure AI Search index", - "parameters": { - "type": "object", - "properties": { - "search_query": { - "type": "string", - "description": "Query string to retrieve documents from azure search eg: 'Health care plan'" - } - }, - "required": ["search_query"] +[ + { + "type": "function", + "function": { + "name": "search_sources", + "description": "Retrieve sources from the Azure AI Search index", + "parameters": { + "type": "object", + "properties": { + "search_query": { + "type": "string", + "description": "Query string to retrieve documents from azure search eg: 'Health care plan'" + } + }, + "required": ["search_query"] + } + } + }, + { + "type": "function", + "function": { + "name": "search_by_filename", + "description": "Retrieve a specific filename from the Azure AI Search index", + "parameters": { + "type": "object", + "properties": { + "filename": { + "type": "string", + "description": "The filename, like 'PerksPlus.pdf'" + } + }, + "required": ["filename"] + } } } -}] +]