11# SPDX-FileCopyrightText: 2023-present Oori Data <info@oori.dev>
22# SPDX-License-Identifier: Apache-2.0
3- # test/embedding /conftest.py
3+ # test/store /conftest.py
44'''
5- Fixtures/setup/teardown for embedding tests
5+ Fixtures/setup/teardown for vector store tests
66
77General note: After setup as described in the README.md for this directory, run the tests with:
88
99pytest test
1010
11- or, for just embeddings tests:
11+ or, for just store tests:
1212
13- pytest test/embedding/
13+ pytest test/store/
14+
15+ By default, tests use in-memory implementations (no external dependencies).
16+ To run integration tests against real PostgreSQL:
17+
18+ pytest test/store/ -m integration --run-integration
1419'''
1520
1621import sys
2126from urllib .parse import quote_plus
2227
2328import numpy as np
29+
30+ # In-memory implementations (default for unit tests)
31+ from ogbujipt .store .memory import InMemoryDataDB , InMemoryMessageDB
32+
33+ # PostgreSQL implementations (for integration tests)
2434from ogbujipt .store .postgres .pgvector_message import MessageDB
2535from ogbujipt .store .postgres .pgvector_data import DataDB
2636
@@ -112,23 +122,68 @@ def __init__(self, model_name_or_path):
112122 self .encode = MagicMock ()
113123
114124
115- DB_CLASS = {
125+ # Mapping of test files to their in-memory implementations
126+ INMEMORY_CLASS = {
127+ 'test/store/test_pgvector_message.py' : InMemoryMessageDB ,
128+ 'test/store/test_pgvector_data.py' : DataDB , # Will map to InMemoryDataDB
129+ }
130+
131+ # Mapping for PostgreSQL integration tests
132+ PG_CLASS = {
116133 'test/store/test_pgvector_message.py' : MessageDB ,
117134 'test/store/test_pgvector_data.py' : DataDB ,
118135}
119136
120137# print(HOST, DB_NAME, USER, PASSWORD, PORT)
121138
122- @pytest_asyncio .fixture # Notice the async aware fixture declaration
139+
140+ @pytest_asyncio .fixture # Default fixture using in-memory store
123141async def DB (request ):
142+ '''
143+ Default DB fixture using in-memory implementation (no external dependencies).
144+ Fast and suitable for unit testing.
145+ '''
146+ testname = request .node .name
147+ testfile = request .node .location [0 ]
148+ collection_name = testname .lower ()
149+ print (f'DB setup (in-memory) for test: { testname } . Collection: { collection_name } ' , file = sys .stderr )
150+
151+ dummy_model = SentenceTransformer ('mock_transformer' )
152+ dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
153+
154+ # Determine which class to use based on test file
155+ if 'message' in testfile :
156+ vDB = InMemoryMessageDB (embedding_model = dummy_model , collection_name = collection_name )
157+ else :
158+ vDB = InMemoryDataDB (embedding_model = dummy_model , collection_name = collection_name )
159+
160+ # Setup
161+ await vDB .create_table ()
162+ assert await vDB .table_exists (), Exception ("Collection not initialized after setup" )
163+
164+ # The test will take control upon the yield
165+ yield vDB
166+
167+ # Teardown
168+ await vDB .drop_table ()
169+
170+
171+ @pytest_asyncio .fixture # PostgreSQL integration test fixture
172+ async def PG_DB (request ):
173+ '''
174+ PostgreSQL integration test fixture. Requires a running PostgreSQL instance.
175+ Only used for integration tests marked with @pytest.mark.integration
176+ '''
124177 testname = request .node .name
125178 testfile = request .node .location [0 ]
126179 table_name = testname .lower ()
127- print (f'DB setup for test: { testname } . Table name { table_name } ' , file = sys .stderr )
180+ print (f'DB setup (PostgreSQL) for test: { testname } . Table name { table_name } ' , file = sys .stderr )
181+
128182 dummy_model = SentenceTransformer ('mock_transformer' )
129183 dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
184+
130185 try :
131- vDB = await DB_CLASS [testfile ].from_conn_params (
186+ vDB = await PG_CLASS [testfile ].from_conn_params (
132187 embedding_model = dummy_model ,
133188 table_name = table_name ,
134189 db_name = DB_NAME ,
@@ -138,15 +193,14 @@ async def DB(request):
138193 password = PASSWORD )
139194 except ConnectionRefusedError :
140195 pytest .skip ("No Postgres instance made available for test. Skipping." , allow_module_level = True )
141- # Actually we want to propagate the error condition, in this case
142- # if vDB is None:
143- # pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
196+ except Exception as e :
197+ pytest .skip (f"Unable to connect to PostgreSQL: { e } " , allow_module_level = True )
144198
145199 await vDB .pool .expire_connections ()
146200 del vDB # Accelerate clean-up (I think)
147201
148202 # Re-acquire using DSN
149- vDB = await DB_CLASS [testfile ].from_conn_string (DSN , dummy_model , table_name , pool_min = 5 , pool_max = 10 )
203+ vDB = await PG_CLASS [testfile ].from_conn_string (DSN , dummy_model , table_name , pool_min = 5 , pool_max = 10 )
150204
151205 # Create table
152206 await vDB .drop_table ()
@@ -158,15 +212,35 @@ async def DB(request):
158212 # Teardown: Drop table
159213 await vDB .drop_table ()
160214
161- # FIXME: Lots of DRY violations!!!
162215
163- @pytest_asyncio .fixture # Notice the async aware fixture declaration
216+ @pytest_asyncio .fixture # Windowed message fixture (in-memory)
164217async def DB_WINDOWED2 (request ):
218+ '''In-memory MessageDB with window=2'''
219+ testname = request .node .name
220+ collection_name = testname .lower ()
221+ print (f'DB setup (in-memory, windowed) for test: { testname } . Collection: { collection_name } ' , file = sys .stderr )
222+
223+ dummy_model = SentenceTransformer ('mock_transformer' )
224+ dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
225+
226+ vDB = InMemoryMessageDB (embedding_model = dummy_model , collection_name = collection_name , window = 2 )
227+
228+ await vDB .create_table ()
229+ assert await vDB .table_exists (), Exception ("Collection not initialized after setup" )
230+ yield vDB
231+ await vDB .drop_table ()
232+
233+
234+ @pytest_asyncio .fixture # PostgreSQL windowed fixture for integration tests
235+ async def PG_DB_WINDOWED2 (request ):
236+ '''PostgreSQL MessageDB with window=2 (integration tests only)'''
165237 testname = request .node .name
166238 table_name = testname .lower ()
167- print (f'DB setup for test: { testname } . Table name { table_name } ' , file = sys .stderr )
239+ print (f'DB setup (PostgreSQL, windowed) for test: { testname } . Table name { table_name } ' , file = sys .stderr )
240+
168241 dummy_model = SentenceTransformer ('mock_transformer' )
169242 dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
243+
170244 try :
171245 vDB = await MessageDB .from_conn_params (
172246 embedding_model = dummy_model ,
@@ -177,36 +251,50 @@ async def DB_WINDOWED2(request):
177251 user = USER ,
178252 password = PASSWORD ,
179253 window = 2 )
180- except ConnectionRefusedError :
181- pytest .skip ("No Postgres instance made available for test. Skipping." , allow_module_level = True )
182- # Actually we want to propagate the error condition, in this case
183- # if vDB is None:
184- # pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
254+ except (ConnectionRefusedError , Exception ) as e :
255+ pytest .skip (f"No Postgres instance available: { e } " , allow_module_level = True )
185256
186257 await vDB .pool .expire_connections ()
187- del vDB # Accelerate clean-up (I think)
258+ del vDB
188259
189- # Re-acquire using DSN
190260 vDB = await MessageDB .from_conn_string (DSN , dummy_model , table_name , window = 2 , pool_min = 5 , pool_max = 10 )
191261
192- # Create table
193262 await vDB .drop_table ()
194263 assert not await vDB .table_exists (), Exception ("Table exists before creation" )
195264 await vDB .create_table ()
196265 assert await vDB .table_exists (), Exception ("Table does not exist after creation" )
197- # The test will take control upon the yield
198266 yield vDB
199- # Teardown: Drop table
200267 await vDB .drop_table ()
201268
202269
203- @pytest_asyncio .fixture # Notice the async aware fixture declaration
270+ @pytest_asyncio .fixture # In-memory DataDB (half_precision param ignored in-memory)
204271async def DB_HALF (request ):
272+ '''In-memory DataDB fixture (precision settings not applicable to in-memory)'''
273+ testname = request .node .name
274+ collection_name = testname .lower ()
275+ print (f'DB setup (in-memory) for test: { testname } . Collection: { collection_name } ' , file = sys .stderr )
276+
277+ dummy_model = SentenceTransformer ('mock_transformer' )
278+ dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
279+
280+ vDB = InMemoryDataDB (embedding_model = dummy_model , collection_name = collection_name )
281+
282+ await vDB .create_table ()
283+ assert await vDB .table_exists (), Exception ("Collection not initialized after setup" )
284+ yield vDB
285+ await vDB .drop_table ()
286+
287+
288+ @pytest_asyncio .fixture # PostgreSQL half-precision fixture for integration tests
289+ async def PG_DB_HALF (request ):
290+ '''PostgreSQL DataDB with half_precision=True (integration tests only)'''
205291 testname = request .node .name
206292 table_name = testname .lower ()
207- print (f'DB setup for half precision test: { testname } . Table name { table_name } ' , file = sys .stderr )
293+ print (f'DB setup (PostgreSQL, half-precision) for test: { testname } . Table name { table_name } ' , file = sys .stderr )
294+
208295 dummy_model = SentenceTransformer ('mock_transformer' )
209296 dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
297+
210298 try :
211299 vDB = await DataDB .from_conn_params (
212300 embedding_model = dummy_model ,
@@ -217,36 +305,50 @@ async def DB_HALF(request):
217305 user = USER ,
218306 password = PASSWORD ,
219307 half_precision = True )
220- except ConnectionRefusedError :
221- pytest .skip ("No Postgres instance made available for test. Skipping." , allow_module_level = True )
222- # Actually we want to propagate the error condition, in this case
223- # if vDB is None:
224- # pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
308+ except (ConnectionRefusedError , Exception ) as e :
309+ pytest .skip (f"No Postgres instance available: { e } " , allow_module_level = True )
225310
226311 await vDB .pool .expire_connections ()
227- del vDB # Accelerate clean-up (I think)
312+ del vDB
228313
229- # Re-acquire using DSN
230314 vDB = await DataDB .from_conn_string (DSN , dummy_model , table_name , pool_min = 5 , pool_max = 10 )
231315
232- # Create table
233316 await vDB .drop_table ()
234317 assert not await vDB .table_exists (), Exception ("Table exists before creation" )
235318 await vDB .create_table ()
236319 assert await vDB .table_exists (), Exception ("Table does not exist after creation" )
237- # The test will take control upon the yield
238320 yield vDB
239- # Teardown: Drop table
240321 await vDB .drop_table ()
241322
242323
243- @pytest_asyncio .fixture # Notice the async aware fixture declaration
324+ @pytest_asyncio .fixture # In-memory DataDB (index settings ignored in-memory)
244325async def DB_HALF_INDEX_HALF (request ):
326+ '''In-memory DataDB fixture (index settings not applicable to in-memory)'''
327+ testname = request .node .name
328+ collection_name = testname .lower ()
329+ print (f'DB setup (in-memory) for test: { testname } . Collection: { collection_name } ' , file = sys .stderr )
330+
331+ dummy_model = SentenceTransformer ('mock_transformer' )
332+ dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
333+
334+ vDB = InMemoryDataDB (embedding_model = dummy_model , collection_name = collection_name )
335+
336+ await vDB .create_table ()
337+ assert await vDB .table_exists (), Exception ("Collection not initialized after setup" )
338+ yield vDB
339+ await vDB .drop_table ()
340+
341+
342+ @pytest_asyncio .fixture # PostgreSQL half-precision with custom index for integration tests
343+ async def PG_DB_HALF_INDEX_HALF (request ):
344+ '''PostgreSQL DataDB with half_precision and custom index (integration tests only)'''
245345 testname = request .node .name
246346 table_name = testname .lower ()
247- print (f'DB setup for half precision test: { testname } . Table name { table_name } ' , file = sys .stderr )
347+ print (f'DB setup (PostgreSQL, half-precision+index) for test: { testname } . Table name { table_name } ' , file = sys .stderr )
348+
248349 dummy_model = SentenceTransformer ('mock_transformer' )
249350 dummy_model .encode .return_value = np .array ([1 , 2 , 3 ])
351+
250352 try :
251353 vDB = await DataDB .from_conn_params (
252354 embedding_model = dummy_model ,
@@ -259,24 +361,17 @@ async def DB_HALF_INDEX_HALF(request):
259361 half_precision = True ,
260362 itypes = ['halfvec' ],
261363 ifuncs = ['cosine' ])
262- except ConnectionRefusedError :
263- pytest .skip ("No Postgres instance made available for test. Skipping." , allow_module_level = True )
264- # Actually we want to propagate the error condition, in this case
265- # if vDB is None:
266- # pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
364+ except (ConnectionRefusedError , Exception ) as e :
365+ pytest .skip (f"No Postgres instance available: { e } " , allow_module_level = True )
267366
268367 await vDB .pool .expire_connections ()
269- del vDB # Accelerate clean-up (I think)
368+ del vDB
270369
271- # Re-acquire using DSN
272370 vDB = await DataDB .from_conn_string (DSN , dummy_model , table_name , pool_min = 5 , pool_max = 10 )
273371
274- # Create table
275372 await vDB .drop_table ()
276373 assert not await vDB .table_exists (), Exception ("Table exists before creation" )
277374 await vDB .create_table ()
278375 assert await vDB .table_exists (), Exception ("Table does not exist after creation" )
279- # The test will take control upon the yield
280376 yield vDB
281- # Teardown: Drop table
282377 await vDB .drop_table ()
0 commit comments