Skip to content

Commit bd97864

Browse files
committed
Missing changes
1 parent 6302747 commit bd97864

File tree

2 files changed

+153
-49
lines changed

2 files changed

+153
-49
lines changed

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,3 +239,12 @@ exclude_lines = [
239239
"if __name__ == .__main__.:",
240240
"if TYPE_CHECKING:",
241241
]
242+
243+
[tool.pytest.ini_options]
244+
markers = [
245+
"integration: marks tests as integration tests (deselected by default, require external services like PostgreSQL)",
246+
]
247+
# By default, skip integration tests
248+
addopts = "-m 'not integration'"
249+
asyncio_mode = "auto"
250+
asyncio_default_fixture_loop_scope = "function"

test/store/conftest.py

Lines changed: 144 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
# SPDX-FileCopyrightText: 2023-present Oori Data <info@oori.dev>
22
# SPDX-License-Identifier: Apache-2.0
3-
# test/embedding/conftest.py
3+
# test/store/conftest.py
44
'''
5-
Fixtures/setup/teardown for embedding tests
5+
Fixtures/setup/teardown for vector store tests
66
77
General note: After setup as described in the README.md for this directory, run the tests with:
88
99
pytest test
1010
11-
or, for just embeddings tests:
11+
or, for just store tests:
1212
13-
pytest test/embedding/
13+
pytest test/store/
14+
15+
By default, tests use in-memory implementations (no external dependencies).
16+
To run integration tests against real PostgreSQL:
17+
18+
pytest test/store/ -m integration --run-integration
1419
'''
1520

1621
import sys
@@ -21,6 +26,11 @@
2126
from urllib.parse import quote_plus
2227

2328
import numpy as np
29+
30+
# In-memory implementations (default for unit tests)
31+
from ogbujipt.store.memory import InMemoryDataDB, InMemoryMessageDB
32+
33+
# PostgreSQL implementations (for integration tests)
2434
from ogbujipt.store.postgres.pgvector_message import MessageDB
2535
from ogbujipt.store.postgres.pgvector_data import DataDB
2636

@@ -112,23 +122,68 @@ def __init__(self, model_name_or_path):
112122
self.encode = MagicMock()
113123

114124

115-
DB_CLASS = {
125+
# Mapping of test files to their in-memory implementations
126+
INMEMORY_CLASS = {
127+
'test/store/test_pgvector_message.py': InMemoryMessageDB,
128+
'test/store/test_pgvector_data.py': DataDB, # Will map to InMemoryDataDB
129+
}
130+
131+
# Mapping for PostgreSQL integration tests
132+
PG_CLASS = {
116133
'test/store/test_pgvector_message.py': MessageDB,
117134
'test/store/test_pgvector_data.py': DataDB,
118135
}
119136

120137
# print(HOST, DB_NAME, USER, PASSWORD, PORT)
121138

122-
@pytest_asyncio.fixture # Notice the async aware fixture declaration
139+
140+
@pytest_asyncio.fixture # Default fixture using in-memory store
123141
async def DB(request):
142+
'''
143+
Default DB fixture using in-memory implementation (no external dependencies).
144+
Fast and suitable for unit testing.
145+
'''
146+
testname = request.node.name
147+
testfile = request.node.location[0]
148+
collection_name = testname.lower()
149+
print(f'DB setup (in-memory) for test: {testname}. Collection: {collection_name}', file=sys.stderr)
150+
151+
dummy_model = SentenceTransformer('mock_transformer')
152+
dummy_model.encode.return_value = np.array([1, 2, 3])
153+
154+
# Determine which class to use based on test file
155+
if 'message' in testfile:
156+
vDB = InMemoryMessageDB(embedding_model=dummy_model, collection_name=collection_name)
157+
else:
158+
vDB = InMemoryDataDB(embedding_model=dummy_model, collection_name=collection_name)
159+
160+
# Setup
161+
await vDB.create_table()
162+
assert await vDB.table_exists(), Exception("Collection not initialized after setup")
163+
164+
# The test will take control upon the yield
165+
yield vDB
166+
167+
# Teardown
168+
await vDB.drop_table()
169+
170+
171+
@pytest_asyncio.fixture # PostgreSQL integration test fixture
172+
async def PG_DB(request):
173+
'''
174+
PostgreSQL integration test fixture. Requires a running PostgreSQL instance.
175+
Only used for integration tests marked with @pytest.mark.integration
176+
'''
124177
testname = request.node.name
125178
testfile = request.node.location[0]
126179
table_name = testname.lower()
127-
print(f'DB setup for test: {testname}. Table name {table_name}', file=sys.stderr)
180+
print(f'DB setup (PostgreSQL) for test: {testname}. Table name {table_name}', file=sys.stderr)
181+
128182
dummy_model = SentenceTransformer('mock_transformer')
129183
dummy_model.encode.return_value = np.array([1, 2, 3])
184+
130185
try:
131-
vDB = await DB_CLASS[testfile].from_conn_params(
186+
vDB = await PG_CLASS[testfile].from_conn_params(
132187
embedding_model=dummy_model,
133188
table_name=table_name,
134189
db_name=DB_NAME,
@@ -138,15 +193,14 @@ async def DB(request):
138193
password=PASSWORD)
139194
except ConnectionRefusedError:
140195
pytest.skip("No Postgres instance made available for test. Skipping.", allow_module_level=True)
141-
# Actually we want to propagate the error condition, in this case
142-
# if vDB is None:
143-
# pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
196+
except Exception as e:
197+
pytest.skip(f"Unable to connect to PostgreSQL: {e}", allow_module_level=True)
144198

145199
await vDB.pool.expire_connections()
146200
del vDB # Accelerate clean-up (I think)
147201

148202
# Re-acquire using DSN
149-
vDB = await DB_CLASS[testfile].from_conn_string(DSN, dummy_model, table_name, pool_min=5, pool_max=10)
203+
vDB = await PG_CLASS[testfile].from_conn_string(DSN, dummy_model, table_name, pool_min=5, pool_max=10)
150204

151205
# Create table
152206
await vDB.drop_table()
@@ -158,15 +212,35 @@ async def DB(request):
158212
# Teardown: Drop table
159213
await vDB.drop_table()
160214

161-
# FIXME: Lots of DRY violations!!!
162215

163-
@pytest_asyncio.fixture # Notice the async aware fixture declaration
216+
@pytest_asyncio.fixture # Windowed message fixture (in-memory)
164217
async def DB_WINDOWED2(request):
218+
'''In-memory MessageDB with window=2'''
219+
testname = request.node.name
220+
collection_name = testname.lower()
221+
print(f'DB setup (in-memory, windowed) for test: {testname}. Collection: {collection_name}', file=sys.stderr)
222+
223+
dummy_model = SentenceTransformer('mock_transformer')
224+
dummy_model.encode.return_value = np.array([1, 2, 3])
225+
226+
vDB = InMemoryMessageDB(embedding_model=dummy_model, collection_name=collection_name, window=2)
227+
228+
await vDB.create_table()
229+
assert await vDB.table_exists(), Exception("Collection not initialized after setup")
230+
yield vDB
231+
await vDB.drop_table()
232+
233+
234+
@pytest_asyncio.fixture # PostgreSQL windowed fixture for integration tests
235+
async def PG_DB_WINDOWED2(request):
236+
'''PostgreSQL MessageDB with window=2 (integration tests only)'''
165237
testname = request.node.name
166238
table_name = testname.lower()
167-
print(f'DB setup for test: {testname}. Table name {table_name}', file=sys.stderr)
239+
print(f'DB setup (PostgreSQL, windowed) for test: {testname}. Table name {table_name}', file=sys.stderr)
240+
168241
dummy_model = SentenceTransformer('mock_transformer')
169242
dummy_model.encode.return_value = np.array([1, 2, 3])
243+
170244
try:
171245
vDB = await MessageDB.from_conn_params(
172246
embedding_model=dummy_model,
@@ -177,36 +251,50 @@ async def DB_WINDOWED2(request):
177251
user=USER,
178252
password=PASSWORD,
179253
window=2)
180-
except ConnectionRefusedError:
181-
pytest.skip("No Postgres instance made available for test. Skipping.", allow_module_level=True)
182-
# Actually we want to propagate the error condition, in this case
183-
# if vDB is None:
184-
# pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
254+
except (ConnectionRefusedError, Exception) as e:
255+
pytest.skip(f"No Postgres instance available: {e}", allow_module_level=True)
185256

186257
await vDB.pool.expire_connections()
187-
del vDB # Accelerate clean-up (I think)
258+
del vDB
188259

189-
# Re-acquire using DSN
190260
vDB = await MessageDB.from_conn_string(DSN, dummy_model, table_name, window=2, pool_min=5, pool_max=10)
191261

192-
# Create table
193262
await vDB.drop_table()
194263
assert not await vDB.table_exists(), Exception("Table exists before creation")
195264
await vDB.create_table()
196265
assert await vDB.table_exists(), Exception("Table does not exist after creation")
197-
# The test will take control upon the yield
198266
yield vDB
199-
# Teardown: Drop table
200267
await vDB.drop_table()
201268

202269

203-
@pytest_asyncio.fixture # Notice the async aware fixture declaration
270+
@pytest_asyncio.fixture # In-memory DataDB (half_precision param ignored in-memory)
204271
async def DB_HALF(request):
272+
'''In-memory DataDB fixture (precision settings not applicable to in-memory)'''
273+
testname = request.node.name
274+
collection_name = testname.lower()
275+
print(f'DB setup (in-memory) for test: {testname}. Collection: {collection_name}', file=sys.stderr)
276+
277+
dummy_model = SentenceTransformer('mock_transformer')
278+
dummy_model.encode.return_value = np.array([1, 2, 3])
279+
280+
vDB = InMemoryDataDB(embedding_model=dummy_model, collection_name=collection_name)
281+
282+
await vDB.create_table()
283+
assert await vDB.table_exists(), Exception("Collection not initialized after setup")
284+
yield vDB
285+
await vDB.drop_table()
286+
287+
288+
@pytest_asyncio.fixture # PostgreSQL half-precision fixture for integration tests
289+
async def PG_DB_HALF(request):
290+
'''PostgreSQL DataDB with half_precision=True (integration tests only)'''
205291
testname = request.node.name
206292
table_name = testname.lower()
207-
print(f'DB setup for half precision test: {testname}. Table name {table_name}', file=sys.stderr)
293+
print(f'DB setup (PostgreSQL, half-precision) for test: {testname}. Table name {table_name}', file=sys.stderr)
294+
208295
dummy_model = SentenceTransformer('mock_transformer')
209296
dummy_model.encode.return_value = np.array([1, 2, 3])
297+
210298
try:
211299
vDB = await DataDB.from_conn_params(
212300
embedding_model=dummy_model,
@@ -217,36 +305,50 @@ async def DB_HALF(request):
217305
user=USER,
218306
password=PASSWORD,
219307
half_precision=True)
220-
except ConnectionRefusedError:
221-
pytest.skip("No Postgres instance made available for test. Skipping.", allow_module_level=True)
222-
# Actually we want to propagate the error condition, in this case
223-
# if vDB is None:
224-
# pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
308+
except (ConnectionRefusedError, Exception) as e:
309+
pytest.skip(f"No Postgres instance available: {e}", allow_module_level=True)
225310

226311
await vDB.pool.expire_connections()
227-
del vDB # Accelerate clean-up (I think)
312+
del vDB
228313

229-
# Re-acquire using DSN
230314
vDB = await DataDB.from_conn_string(DSN, dummy_model, table_name, pool_min=5, pool_max=10)
231315

232-
# Create table
233316
await vDB.drop_table()
234317
assert not await vDB.table_exists(), Exception("Table exists before creation")
235318
await vDB.create_table()
236319
assert await vDB.table_exists(), Exception("Table does not exist after creation")
237-
# The test will take control upon the yield
238320
yield vDB
239-
# Teardown: Drop table
240321
await vDB.drop_table()
241322

242323

243-
@pytest_asyncio.fixture # Notice the async aware fixture declaration
324+
@pytest_asyncio.fixture # In-memory DataDB (index settings ignored in-memory)
244325
async def DB_HALF_INDEX_HALF(request):
326+
'''In-memory DataDB fixture (index settings not applicable to in-memory)'''
327+
testname = request.node.name
328+
collection_name = testname.lower()
329+
print(f'DB setup (in-memory) for test: {testname}. Collection: {collection_name}', file=sys.stderr)
330+
331+
dummy_model = SentenceTransformer('mock_transformer')
332+
dummy_model.encode.return_value = np.array([1, 2, 3])
333+
334+
vDB = InMemoryDataDB(embedding_model=dummy_model, collection_name=collection_name)
335+
336+
await vDB.create_table()
337+
assert await vDB.table_exists(), Exception("Collection not initialized after setup")
338+
yield vDB
339+
await vDB.drop_table()
340+
341+
342+
@pytest_asyncio.fixture # PostgreSQL half-precision with custom index for integration tests
343+
async def PG_DB_HALF_INDEX_HALF(request):
344+
'''PostgreSQL DataDB with half_precision and custom index (integration tests only)'''
245345
testname = request.node.name
246346
table_name = testname.lower()
247-
print(f'DB setup for half precision test: {testname}. Table name {table_name}', file=sys.stderr)
347+
print(f'DB setup (PostgreSQL, half-precision+index) for test: {testname}. Table name {table_name}', file=sys.stderr)
348+
248349
dummy_model = SentenceTransformer('mock_transformer')
249350
dummy_model.encode.return_value = np.array([1, 2, 3])
351+
250352
try:
251353
vDB = await DataDB.from_conn_params(
252354
embedding_model=dummy_model,
@@ -259,24 +361,17 @@ async def DB_HALF_INDEX_HALF(request):
259361
half_precision=True,
260362
itypes=['halfvec'],
261363
ifuncs=['cosine'])
262-
except ConnectionRefusedError:
263-
pytest.skip("No Postgres instance made available for test. Skipping.", allow_module_level=True)
264-
# Actually we want to propagate the error condition, in this case
265-
# if vDB is None:
266-
# pytest.skip("Unable to create a valid DB instance. Skipping.", allow_module_level=True)
364+
except (ConnectionRefusedError, Exception) as e:
365+
pytest.skip(f"No Postgres instance available: {e}", allow_module_level=True)
267366

268367
await vDB.pool.expire_connections()
269-
del vDB # Accelerate clean-up (I think)
368+
del vDB
270369

271-
# Re-acquire using DSN
272370
vDB = await DataDB.from_conn_string(DSN, dummy_model, table_name, pool_min=5, pool_max=10)
273371

274-
# Create table
275372
await vDB.drop_table()
276373
assert not await vDB.table_exists(), Exception("Table exists before creation")
277374
await vDB.create_table()
278375
assert await vDB.table_exists(), Exception("Table does not exist after creation")
279-
# The test will take control upon the yield
280376
yield vDB
281-
# Teardown: Drop table
282377
await vDB.drop_table()

0 commit comments

Comments
 (0)