88from .fake_embeddings import ConsistentFakeEmbeddings
99
1010
11+ def document_eq (doc1 : Document , doc2 : Document , check_id : bool = False ) -> bool :
12+ """Compare two documents, optionally checking the id."""
13+ return (
14+ doc1 .page_content == doc2 .page_content
15+ and doc1 .metadata == doc2 .metadata
16+ and (not check_id or doc1 .id == doc2 .id )
17+ )
18+
19+
1120@pytest .mark .parametrize ("vector_pass_as_bytes" , [True , False ])
1221def test_ydb (vector_pass_as_bytes : bool ) -> None :
1322 """Test end to end construction and search."""
@@ -19,7 +28,7 @@ def test_ydb(vector_pass_as_bytes: bool) -> None:
1928 config .table = "test_ydb"
2029 docsearch = YDB .from_texts (texts , ConsistentFakeEmbeddings (), config = config )
2130 output = docsearch .similarity_search ("foo" , k = 1 )
22- assert output == [ Document (page_content = "foo" )]
31+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
2332 docsearch .drop ()
2433
2534
@@ -37,7 +46,7 @@ async def test_ydb_async(vector_pass_as_bytes: bool) -> None:
3746 texts = texts , embedding = ConsistentFakeEmbeddings (), config = config
3847 )
3948 output = await docsearch .asimilarity_search ("foo" , k = 1 )
40- assert output == [ Document (page_content = "foo" )]
49+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
4150 docsearch .drop ()
4251
4352
@@ -56,7 +65,7 @@ def test_ydb_with_custom_column_names() -> None:
5665 config .table = "test_ydb_custom_col_names"
5766 docsearch = YDB .from_texts (texts , ConsistentFakeEmbeddings (), config = config )
5867 output = docsearch .similarity_search ("bar" , k = 1 )
59- assert output == [ Document (page_content = "bar" )]
68+ assert document_eq ( output [ 0 ], Document (page_content = "bar" ))
6069 docsearch .drop ()
6170
6271
@@ -87,7 +96,7 @@ def test_create_ydb_with_metadatas() -> None:
8796 metadatas = metadatas ,
8897 )
8998 output = docsearch .similarity_search ("foo" , k = 1 )
90- assert output == [ Document (page_content = "foo" , metadata = {"page" : "0" })]
99+ assert document_eq ( output [ 0 ], Document (page_content = "foo" , metadata = {"page" : "0" }))
91100 docsearch .drop ()
92101
93102
@@ -120,7 +129,7 @@ def test_create_ydb_with_empty_metadatas() -> None:
120129 metadatas = metadatas ,
121130 )
122131 output = docsearch .similarity_search ("foo" , k = 1 )
123- assert output == [ Document (page_content = "foo" )]
132+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
124133 docsearch .drop ()
125134
126135
@@ -166,6 +175,27 @@ def test_delete() -> None:
166175 docsearch .drop ()
167176
168177
178+ def test_id_persistence () -> None :
179+ """Test id persistence."""
180+ texts = ["foo" , "bar" , "baz" ]
181+ ids = ["1" , "2" , "3" ]
182+ config = YDBSettings (drop_existing_table = True )
183+ config .table = "test_id_persistence"
184+ docsearch = YDB .from_texts (
185+ texts = texts ,
186+ embedding = ConsistentFakeEmbeddings (),
187+ config = config ,
188+ ids = ids ,
189+ )
190+ output = docsearch .similarity_search ("foo" , k = 1 )
191+ assert document_eq (output [0 ], Document (page_content = "foo" , id = "1" ), check_id = True )
192+ output = docsearch .similarity_search ("bar" , k = 1 )
193+ assert document_eq (output [0 ], Document (page_content = "bar" , id = "2" ), check_id = True )
194+ output = docsearch .similarity_search ("baz" , k = 1 )
195+ assert document_eq (output [0 ], Document (page_content = "baz" , id = "3" ), check_id = True )
196+ docsearch .drop ()
197+
198+
169199def test_delete_with_ids () -> None :
170200 """Test delete with specified ids."""
171201 texts = ["foo" , "bar" , "baz" ]
@@ -181,7 +211,7 @@ def test_delete_with_ids() -> None:
181211 docsearch .delete (ids [:2 ])
182212
183213 output = docsearch .similarity_search ("sometext" , k = 1 )
184- assert output == [ Document (page_content = "baz" )]
214+ assert document_eq ( output [ 0 ], Document (page_content = "baz" ))
185215
186216 docsearch .drop ()
187217
@@ -200,13 +230,13 @@ def test_search_with_filter() -> None:
200230 )
201231
202232 output = docsearch .similarity_search ("sometext" , filter = {"page" : "0" }, k = 1 )
203- assert output == [ Document (page_content = "foo" , metadata = {"page" : "0" })]
233+ assert document_eq ( output [ 0 ], Document (page_content = "foo" , metadata = {"page" : "0" }))
204234
205235 output = docsearch .similarity_search ("sometext" , filter = {"page" : "1" }, k = 1 )
206- assert output == [ Document (page_content = "bar" , metadata = {"page" : "1" })]
236+ assert document_eq ( output [ 0 ], Document (page_content = "bar" , metadata = {"page" : "1" }))
207237
208238 output = docsearch .similarity_search ("sometext" , filter = {"page" : "2" }, k = 1 )
209- assert output == [ Document (page_content = "baz" , metadata = {"page" : "2" })]
239+ assert document_eq ( output [ 0 ], Document (page_content = "baz" , metadata = {"page" : "2" }))
210240
211241 docsearch .drop ()
212242
@@ -237,7 +267,7 @@ def test_search_with_complex_filter() -> None:
237267 output = docsearch .similarity_search (
238268 "sometext" , filter = {"page" : "1" , "index" : "2" }, k = 1
239269 )
240- assert output == []
270+ assert len ( output ) == 0
241271
242272 docsearch .drop ()
243273
@@ -267,7 +297,7 @@ def test_different_search_strategies(strategy: YDBSearchStrategy) -> None:
267297 )
268298
269299 output = docsearch .similarity_search ("foo" , k = 1 )
270- assert output == [ Document (page_content = "foo" )]
300+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
271301
272302 docsearch .drop ()
273303
@@ -279,7 +309,7 @@ def test_search_with_score() -> None:
279309 config .table = "test_ydb"
280310 docsearch = YDB .from_texts (texts , ConsistentFakeEmbeddings (), config = config )
281311 output = docsearch .similarity_search_with_score ("foo" , k = 1 )
282- assert output [0 ][0 ] == Document (page_content = "foo" )
312+ assert document_eq ( output [0 ][0 ], Document (page_content = "foo" ) )
283313 docsearch .drop ()
284314
285315
@@ -292,13 +322,13 @@ def test_ydb_with_persistence() -> None:
292322 embeddings = ConsistentFakeEmbeddings ()
293323 docsearch = YDB .from_texts (texts , embeddings , config = config )
294324 output = docsearch .similarity_search ("foo" , k = 1 )
295- assert output == [ Document (page_content = "foo" )]
325+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
296326
297327 config = YDBSettings ()
298328 config .table = "test_ydb_with_persistence"
299329 docsearch = YDB (embedding = embeddings , config = config )
300330 output = docsearch .similarity_search ("foo" , k = 1 )
301- assert output == [ Document (page_content = "foo" )]
331+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
302332
303333 docsearch .drop ()
304334
@@ -313,7 +343,7 @@ def test_search_from_retriever_interface() -> None:
313343 retriever = docsearch .as_retriever (search_kwargs = {"k" : 1 })
314344
315345 output = retriever .invoke ("foo" )
316- assert output == [ Document (page_content = "foo" )]
346+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
317347 docsearch .drop ()
318348
319349
@@ -333,7 +363,7 @@ def test_search_from_retriever_interface_with_filter() -> None:
333363 retriever = docsearch .as_retriever (search_kwargs = {"k" : 1 })
334364
335365 output = retriever .invoke ("sometext" , filter = {"page" : "1" })
336- assert output == [ Document (page_content = "bar" , metadata = {"page" : "1" })]
366+ assert document_eq ( output [ 0 ], Document (page_content = "bar" , metadata = {"page" : "1" }))
337367
338368 docsearch .drop ()
339369
@@ -469,7 +499,7 @@ def test_basic_vector_index(strategy: YDBSearchStrategy) -> None:
469499 )
470500
471501 output = docsearch .similarity_search ("foo" , k = 1 )
472- assert output == [ Document (page_content = "foo" )]
502+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
473503
474504 docsearch .drop ()
475505
@@ -489,14 +519,14 @@ def test_reindex() -> None:
489519 )
490520
491521 output = docsearch .similarity_search ("foo" , k = 1 )
492- assert output == [ Document (page_content = "foo" )]
522+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
493523
494524 docsearch .add_texts (["qwe" , "asd" , "zxc" ])
495525
496526 output = docsearch .similarity_search ("foo" , k = 1 )
497- assert output == [ Document (page_content = "foo" )]
527+ assert document_eq ( output [ 0 ], Document (page_content = "foo" ))
498528
499529 output = docsearch .similarity_search ("zxc" , k = 1 )
500- assert output == [ Document (page_content = "zxc" )]
530+ assert document_eq ( output [ 0 ], Document (page_content = "zxc" ))
501531
502532 docsearch .drop ()
0 commit comments