Skip to content

Commit c53bfc6

Browse files
committed
INPYTHON-764 Make usage of text_key more obvious
1 parent caace80 commit c53bfc6

File tree

3 files changed

+73
-3
lines changed

3 files changed

+73
-3
lines changed

libs/langchain-mongodb/langchain_mongodb/vectorstores.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import logging
4+
import warnings
45
from typing import (
56
Any,
67
Callable,
@@ -108,6 +109,7 @@ class MongoDBAtlasVectorSearch(VectorStore):
108109
namespace="db_name.collection_name",
109110
embedding=OpenAIEmbeddings(),
110111
index_name="vector_index",
112+
text_key="text_field"
111113
)
112114
113115
Add Documents:
@@ -807,15 +809,27 @@ def _similarity_search_with_score(
807809
docs = []
808810

809811
# Format
812+
missing_text_key = False
810813
for res in cursor:
811814
if self._text_key not in res:
815+
missing_text_key = True
812816
continue
813817
text = res.pop(self._text_key)
814818
score = res.pop("score")
815819
make_serializable(res)
816820
docs.append(
817821
(Document(page_content=text, metadata=res, id=res["_id"]), score)
818822
)
823+
824+
if (
825+
missing_text_key
826+
and not len(docs)
827+
and self._collection.count_documents({}) > 0
828+
):
829+
warnings.warn(
830+
f"Could not find any documents with the text_key: '{self._text_key}'",
831+
stacklevel=2,
832+
)
819833
return docs
820834

821835
def create_vector_search_index(

libs/langchain-mongodb/pyproject.toml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,20 @@ dev = [
4545
]
4646

4747
[tool.pytest.ini_options]
48+
minversion = "7"
4849
addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5"
50+
log_cli_level = "INFO"
51+
faulthandler_timeout = 1500
52+
xfail_strict = true
4953
markers = [
5054
"requires: mark tests as requiring a specific library",
5155
"compile: mark placeholder test used to compile integration tests without running them",
5256
]
5357
asyncio_mode = "auto"
5458
asyncio_default_fixture_loop_scope = "function"
59+
filterwarnings = [
60+
"error"
61+
]
5562

5663
[tool.mypy]
5764
disallow_untyped_defs = true

libs/langchain-mongodb/tests/integration_tests/test_vectorstore_add_delete.py

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
from pymongo.collection import Collection
1313

1414
from langchain_mongodb import MongoDBAtlasVectorSearch
15+
from langchain_mongodb.index import (
16+
create_vector_search_index,
17+
)
1518
from langchain_mongodb.utils import oid_to_str
1619

1720
from ..utils import DB_NAME, ConsistentFakeEmbeddings, PatchedMongoDBAtlasVectorSearch
@@ -23,9 +26,24 @@
2326

2427
@pytest.fixture
2528
def collection(client: MongoClient) -> Collection:
26-
clx = client[DB_NAME][COLLECTION_NAME]
27-
clx.delete_many({})
28-
return clx
29+
if COLLECTION_NAME not in client[DB_NAME].list_collection_names():
30+
clxn = client[DB_NAME].create_collection(COLLECTION_NAME)
31+
else:
32+
clxn = client[DB_NAME][COLLECTION_NAME]
33+
34+
clxn.delete_many({})
35+
36+
if not any([INDEX_NAME == ix["name"] for ix in clxn.list_search_indexes()]):
37+
create_vector_search_index(
38+
collection=clxn,
39+
index_name=INDEX_NAME,
40+
dimensions=DIMENSIONS,
41+
path="embedding",
42+
similarity="cosine",
43+
wait_until_complete=60,
44+
)
45+
46+
return clxn
2947

3048

3149
@pytest.fixture(scope="module")
@@ -206,3 +224,34 @@ def test_add_documents(
206224
result_ids = vectorstore.add_documents(docs, ids, batch_size=batch_size)
207225
assert len(result_ids) == n_docs
208226
assert set(ids) == set(collection.distinct("_id"))
227+
228+
229+
def test_warning_for_misaligned_text_key(
230+
collection: Collection, trivial_embeddings: Embeddings
231+
):
232+
collection.delete_many({})
233+
vectorstore = PatchedMongoDBAtlasVectorSearch(
234+
collection=collection,
235+
embedding=trivial_embeddings,
236+
index_name=INDEX_NAME,
237+
)
238+
239+
# If the collection is empty, no warning is raised.
240+
assert len(vectorstore.similarity_search("foo")) == 0
241+
242+
# Insert a document that doesn't match the text key, and look for the warning.
243+
collection.delete_many({})
244+
vectorstore.add_texts(["foo"], ids=["1"])
245+
# Update the doc to change the text field to a different one.
246+
collection.update_one(
247+
{"_id": "1"},
248+
{"$unset": {vectorstore._text_key: ""}, "$set": {"fake_text_key": "foo"}},
249+
)
250+
with pytest.warns(UserWarning) as record:
251+
vectorstore.similarity_search("foo")
252+
253+
assert len(record) == 1
254+
assert (
255+
record[0].message.args[0]
256+
== "Could not find any documents with the text_key: 'text'"
257+
)

0 commit comments

Comments
 (0)