diff --git a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py index 4dae8cd..cf3c4a1 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_async/vectorstores.py @@ -389,6 +389,7 @@ async def asimilarity_search( k: int = 4, fetch_k: int = 50, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -413,11 +414,13 @@ async def asimilarity_search( k=k, num_candidates=fetch_k, filter=filter, + fields=fields, custom_query=custom_query, ) docs = _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) return [doc for doc, _score in docs] @@ -504,7 +507,8 @@ async def asimilarity_search_with_score( query: str, k: int = 4, filter: Optional[List[dict]] = None, - *, + fields: Optional[List[str]] = None, + *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] ] = None, @@ -528,11 +532,16 @@ async def asimilarity_search_with_score( raise ValueError("scores are currently not supported in hybrid mode") hits = await self._store.search( - query=query, k=k, filter=filter, custom_query=custom_query + query=query, + k=k, + filter=filter, + fields=fields, + custom_query=custom_query ) return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) @@ -541,7 +550,8 @@ async def asimilarity_search_by_vector_with_relevance_scores( embedding: List[float], k: int = 4, filter: Optional[List[Dict]] = None, - *, + fields: Optional[List[str]] = None, + *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] ] = None, @@ -569,11 +579,13 @@ async def asimilarity_search_by_vector_with_relevance_scores( query_vector=embedding, k=k, filter=filter, + fields=fields, custom_query=custom_query, ) return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) diff --git a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py index 168b131..a7b5e9c 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py +++ b/libs/elasticsearch/langchain_elasticsearch/_sync/vectorstores.py @@ -389,6 +389,7 @@ def similarity_search( k: int = 4, fetch_k: int = 50, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -413,11 +414,13 @@ def similarity_search( k=k, num_candidates=fetch_k, filter=filter, + fields=fields, custom_query=custom_query, ) docs = _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) return [doc for doc, _score in docs] @@ -504,6 +507,7 @@ def similarity_search_with_score( query: str, k: int = 4, filter: Optional[List[dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -528,11 +532,16 @@ def similarity_search_with_score( raise ValueError("scores are currently not supported in hybrid mode") hits = self._store.search( - query=query, k=k, filter=filter, custom_query=custom_query + query=query, + k=k, + filter=filter, + fields=fields, + custom_query=custom_query ) return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) @@ -541,6 +550,7 @@ def similarity_search_by_vector_with_relevance_scores( embedding: List[float], k: int = 4, filter: Optional[List[Dict]] = None, + fields: Optional[List[str]] = None, *, custom_query: Optional[ Callable[[Dict[str, Any], Optional[str]], Dict[str, Any]] @@ -569,11 +579,13 @@ def similarity_search_by_vector_with_relevance_scores( query_vector=embedding, k=k, filter=filter, + fields=fields, custom_query=custom_query, ) return _hits_to_docs_scores( hits=hits, content_field=self.query_field, + fields=fields, doc_builder=doc_builder, ) diff --git a/libs/elasticsearch/langchain_elasticsearch/_utilities.py b/libs/elasticsearch/langchain_elasticsearch/_utilities.py index 7fb37a6..b4bae91 100644 --- a/libs/elasticsearch/langchain_elasticsearch/_utilities.py +++ b/libs/elasticsearch/langchain_elasticsearch/_utilities.py @@ -69,11 +69,14 @@ def _hits_to_docs_scores( documents = [] - def default_doc_builder(hit: Dict) -> Document: - return Document( + def default_doc_builder(hit: Dict, fields: List[str]) -> Document: + doc = Document( page_content=hit["_source"].get(content_field, ""), metadata=hit["_source"].get("metadata", {}), ) + for field_key in fields: + doc.metadata[field_key] = hit["_source"].get(field_key, None) + return doc doc_builder = doc_builder or default_doc_builder @@ -87,7 +90,7 @@ def default_doc_builder(hit: Dict) -> Document: ]: hit["_source"]["metadata"][field] = hit["_source"][field] - doc = doc_builder(hit) + doc = doc_builder(hit, fields) documents.append((doc, hit["_score"])) return documents