Skip to content

Commit 20f30c2

Browse files
committed
Fix missing id in Document
1 parent 0eef6fe commit 20f30c2

File tree

3 files changed

+55
-20
lines changed

3 files changed

+55
-20
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
* Fix missing id in Document
2+
13
## 0.0.10 ##
24
* Make batch embeddings flag true by default
35
* Fix custom column name error

langchain_ydb/vectorstores.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,7 @@ def similarity_search_by_vector(
675675
Document(
676676
page_content=row["document"],
677677
metadata=json.loads(row["metadata"]),
678+
id=row["id"],
678679
)
679680
for row in res
680681
]
@@ -708,6 +709,7 @@ def similarity_search_with_score(
708709
Document(
709710
page_content=row["document"],
710711
metadata=json.loads(row["metadata"]),
712+
id=row["id"],
711713
),
712714
row["score"],
713715
)
@@ -747,6 +749,7 @@ def similarity_search_by_vector_with_score(
747749
Document(
748750
page_content=row["document"],
749751
metadata=json.loads(row["metadata"]),
752+
id=row["id"],
750753
),
751754
row["score"],
752755
)

tests/test_vectorestore.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
from .fake_embeddings import ConsistentFakeEmbeddings
99

1010

11+
def document_eq(doc1: Document, doc2: Document, check_id: bool = False) -> bool:
12+
"""Compare two documents, optionally checking the id."""
13+
return (
14+
doc1.page_content == doc2.page_content
15+
and doc1.metadata == doc2.metadata
16+
and (not check_id or doc1.id == doc2.id)
17+
)
18+
19+
1120
@pytest.mark.parametrize("vector_pass_as_bytes", [True, False])
1221
def test_ydb(vector_pass_as_bytes: bool) -> None:
1322
"""Test end to end construction and search."""
@@ -19,7 +28,7 @@ def test_ydb(vector_pass_as_bytes: bool) -> None:
1928
config.table = "test_ydb"
2029
docsearch = YDB.from_texts(texts, ConsistentFakeEmbeddings(), config=config)
2130
output = docsearch.similarity_search("foo", k=1)
22-
assert output == [Document(page_content="foo")]
31+
assert document_eq(output[0], Document(page_content="foo"))
2332
docsearch.drop()
2433

2534

@@ -37,7 +46,7 @@ async def test_ydb_async(vector_pass_as_bytes: bool) -> None:
3746
texts=texts, embedding=ConsistentFakeEmbeddings(), config=config
3847
)
3948
output = await docsearch.asimilarity_search("foo", k=1)
40-
assert output == [Document(page_content="foo")]
49+
assert document_eq(output[0], Document(page_content="foo"))
4150
docsearch.drop()
4251

4352

@@ -56,7 +65,7 @@ def test_ydb_with_custom_column_names() -> None:
5665
config.table = "test_ydb_custom_col_names"
5766
docsearch = YDB.from_texts(texts, ConsistentFakeEmbeddings(), config=config)
5867
output = docsearch.similarity_search("bar", k=1)
59-
assert output == [Document(page_content="bar")]
68+
assert document_eq(output[0], Document(page_content="bar"))
6069
docsearch.drop()
6170

6271

@@ -87,7 +96,7 @@ def test_create_ydb_with_metadatas() -> None:
8796
metadatas=metadatas,
8897
)
8998
output = docsearch.similarity_search("foo", k=1)
90-
assert output == [Document(page_content="foo", metadata={"page": "0"})]
99+
assert document_eq(output[0], Document(page_content="foo", metadata={"page": "0"}))
91100
docsearch.drop()
92101

93102

@@ -120,7 +129,7 @@ def test_create_ydb_with_empty_metadatas() -> None:
120129
metadatas=metadatas,
121130
)
122131
output = docsearch.similarity_search("foo", k=1)
123-
assert output == [Document(page_content="foo")]
132+
assert document_eq(output[0], Document(page_content="foo"))
124133
docsearch.drop()
125134

126135

@@ -166,6 +175,27 @@ def test_delete() -> None:
166175
docsearch.drop()
167176

168177

178+
def test_id_persistence() -> None:
179+
"""Test id persistence."""
180+
texts = ["foo", "bar", "baz"]
181+
ids = ["1", "2", "3"]
182+
config = YDBSettings(drop_existing_table=True)
183+
config.table = "test_id_persistence"
184+
docsearch = YDB.from_texts(
185+
texts=texts,
186+
embedding=ConsistentFakeEmbeddings(),
187+
config=config,
188+
ids=ids,
189+
)
190+
output = docsearch.similarity_search("foo", k=1)
191+
assert document_eq(output[0], Document(page_content="foo", id="1"), check_id=True)
192+
output = docsearch.similarity_search("bar", k=1)
193+
assert document_eq(output[0], Document(page_content="bar", id="2"), check_id=True)
194+
output = docsearch.similarity_search("baz", k=1)
195+
assert document_eq(output[0], Document(page_content="baz", id="3"), check_id=True)
196+
docsearch.drop()
197+
198+
169199
def test_delete_with_ids() -> None:
170200
"""Test delete with specified ids."""
171201
texts = ["foo", "bar", "baz"]
@@ -181,7 +211,7 @@ def test_delete_with_ids() -> None:
181211
docsearch.delete(ids[:2])
182212

183213
output = docsearch.similarity_search("sometext", k=1)
184-
assert output == [Document(page_content="baz")]
214+
assert document_eq(output[0], Document(page_content="baz"))
185215

186216
docsearch.drop()
187217

@@ -200,13 +230,13 @@ def test_search_with_filter() -> None:
200230
)
201231

202232
output = docsearch.similarity_search("sometext", filter={"page": "0"}, k=1)
203-
assert output == [Document(page_content="foo", metadata={"page": "0"})]
233+
assert document_eq(output[0], Document(page_content="foo", metadata={"page": "0"}))
204234

205235
output = docsearch.similarity_search("sometext", filter={"page": "1"}, k=1)
206-
assert output == [Document(page_content="bar", metadata={"page": "1"})]
236+
assert document_eq(output[0], Document(page_content="bar", metadata={"page": "1"}))
207237

208238
output = docsearch.similarity_search("sometext", filter={"page": "2"}, k=1)
209-
assert output == [Document(page_content="baz", metadata={"page": "2"})]
239+
assert document_eq(output[0], Document(page_content="baz", metadata={"page": "2"}))
210240

211241
docsearch.drop()
212242

@@ -237,7 +267,7 @@ def test_search_with_complex_filter() -> None:
237267
output = docsearch.similarity_search(
238268
"sometext", filter={"page": "1", "index": "2"}, k=1
239269
)
240-
assert output == []
270+
assert len(output) == 0
241271

242272
docsearch.drop()
243273

@@ -267,7 +297,7 @@ def test_different_search_strategies(strategy: YDBSearchStrategy) -> None:
267297
)
268298

269299
output = docsearch.similarity_search("foo", k=1)
270-
assert output == [Document(page_content="foo")]
300+
assert document_eq(output[0], Document(page_content="foo"))
271301

272302
docsearch.drop()
273303

@@ -279,7 +309,7 @@ def test_search_with_score() -> None:
279309
config.table = "test_ydb"
280310
docsearch = YDB.from_texts(texts, ConsistentFakeEmbeddings(), config=config)
281311
output = docsearch.similarity_search_with_score("foo", k=1)
282-
assert output[0][0] == Document(page_content="foo")
312+
assert document_eq(output[0][0], Document(page_content="foo"))
283313
docsearch.drop()
284314

285315

@@ -292,13 +322,13 @@ def test_ydb_with_persistence() -> None:
292322
embeddings = ConsistentFakeEmbeddings()
293323
docsearch = YDB.from_texts(texts, embeddings, config=config)
294324
output = docsearch.similarity_search("foo", k=1)
295-
assert output == [Document(page_content="foo")]
325+
assert document_eq(output[0], Document(page_content="foo"))
296326

297327
config = YDBSettings()
298328
config.table = "test_ydb_with_persistence"
299329
docsearch = YDB(embedding=embeddings, config=config)
300330
output = docsearch.similarity_search("foo", k=1)
301-
assert output == [Document(page_content="foo")]
331+
assert document_eq(output[0], Document(page_content="foo"))
302332

303333
docsearch.drop()
304334

@@ -313,7 +343,7 @@ def test_search_from_retriever_interface() -> None:
313343
retriever = docsearch.as_retriever(search_kwargs={"k": 1})
314344

315345
output = retriever.invoke("foo")
316-
assert output == [Document(page_content="foo")]
346+
assert document_eq(output[0], Document(page_content="foo"))
317347
docsearch.drop()
318348

319349

@@ -333,7 +363,7 @@ def test_search_from_retriever_interface_with_filter() -> None:
333363
retriever = docsearch.as_retriever(search_kwargs={"k": 1})
334364

335365
output = retriever.invoke("sometext", filter={"page": "1"})
336-
assert output == [Document(page_content="bar", metadata={"page": "1"})]
366+
assert document_eq(output[0], Document(page_content="bar", metadata={"page": "1"}))
337367

338368
docsearch.drop()
339369

@@ -469,7 +499,7 @@ def test_basic_vector_index(strategy: YDBSearchStrategy) -> None:
469499
)
470500

471501
output = docsearch.similarity_search("foo", k=1)
472-
assert output == [Document(page_content="foo")]
502+
assert document_eq(output[0], Document(page_content="foo"))
473503

474504
docsearch.drop()
475505

@@ -489,14 +519,14 @@ def test_reindex() -> None:
489519
)
490520

491521
output = docsearch.similarity_search("foo", k=1)
492-
assert output == [Document(page_content="foo")]
522+
assert document_eq(output[0], Document(page_content="foo"))
493523

494524
docsearch.add_texts(["qwe", "asd", "zxc"])
495525

496526
output = docsearch.similarity_search("foo", k=1)
497-
assert output == [Document(page_content="foo")]
527+
assert document_eq(output[0], Document(page_content="foo"))
498528

499529
output = docsearch.similarity_search("zxc", k=1)
500-
assert output == [Document(page_content="zxc")]
530+
assert document_eq(output[0], Document(page_content="zxc"))
501531

502532
docsearch.drop()

0 commit comments

Comments
 (0)