Skip to content

Commit 760a38a

Browse files
committed
add sort of integration test
1 parent 895ba19 commit 760a38a

File tree

1 file changed

+92
-0
lines changed

1 file changed

+92
-0
lines changed

tests/test_library_io.py

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# tests/test_library_workflow.py
2+
from pathlib import Path
3+
import numpy as np
4+
import pytest
5+
from matchms.importing import load_spectra
6+
from ms2query import MS2QueryDatabase
7+
from ms2query.library_io import create_new_library, load_created_library
8+
9+
10+
TEST_COMP_ID = "ZBSGKPYXQINNGF" # expected InChIKey14 present in the test data
11+
EXPECTED_METADATA_SHAPE = (5, 11)
12+
EXPECTED_METADATA_FIELDS = [
13+
"precursor_mz", "ionmode", "smiles", "inchikey", "inchi", "name",
14+
"charge", "instrument_type", "adduct", "collision_energy",
15+
]
16+
17+
18+
def _data_dir() -> Path:
19+
return Path(__file__).parent / "test_data"
20+
21+
22+
def _paths():
23+
data_dir = _data_dir()
24+
spectra_file = data_dir / "10_spectra.mgf"
25+
model_path = data_dir / "ms2deepscore_testmodel_v1.pt"
26+
assert spectra_file.exists(), f"Missing test spectra file: {spectra_file}"
27+
assert model_path.exists(), f"Missing test model file: {model_path}"
28+
return spectra_file, model_path
29+
30+
31+
@pytest.mark.filterwarnings("ignore::UserWarning")
32+
def test_create_and_load_library(tmp_path: Path):
33+
spectra_file, model_path = _paths()
34+
35+
# ---------- Create ----------
36+
outdir = tmp_path / "results"
37+
outdir.mkdir(parents=True, exist_ok=True)
38+
39+
lib = create_new_library(
40+
spectra_files=[str(spectra_file)],
41+
annotation_files=[], # currently unused in the workflow
42+
output_folder=str(outdir),
43+
model_path=str(model_path),
44+
# Keep the index small/fast for CI:
45+
embedding_index_params={"M": 8, "ef_construction": 50, "post_init_ef": 50, "batch_rows": 100_000},
46+
)
47+
# basic sanity
48+
assert lib is not None
49+
assert isinstance(lib.db, MS2QueryDatabase)
50+
51+
# SQLite must exist
52+
db_path = outdir / "ms2query_library.sqlite"
53+
assert db_path.exists(), "MS2Query database file was not created."
54+
55+
# ---------- DB content checks ----------
56+
ms2query_db = lib.db
57+
58+
# Metadata query by compound id (expected shape from your snippet)
59+
df_meta = ms2query_db.metadata_by_comp_id(TEST_COMP_ID)
60+
assert tuple(df_meta.shape) == EXPECTED_METADATA_SHAPE
61+
62+
# Metadata fields presence both in db wrapper and in returned dataframe
63+
md_fields = ms2query_db.metadata_fields
64+
for f in EXPECTED_METADATA_FIELDS:
65+
assert f in md_fields, f"Metadata field '{f}' is missing in the database."
66+
assert f in df_meta.columns, f"Metadata field '{f}' missing in metadata_by_comp_id result."
67+
68+
# ---------- Embedding index artifacts ----------
69+
# The workflow saves with a base prefix (no extension). NMSLIB writes two files: <prefix> and <prefix>.dat
70+
emb_prefix = outdir / "embedding_index"
71+
pair_exists = emb_prefix.exists() and (emb_prefix.with_suffix(".dat")).exists()
72+
assert pair_exists, "EmbeddingIndex files not found."
73+
74+
# ---------- Load ----------
75+
lib2 = load_created_library(str(outdir))
76+
assert lib2 is not None
77+
assert isinstance(lib2.db, MS2QueryDatabase)
78+
79+
# ---------- Tiny embedding-index query ----------
80+
# Take one spectrum from the same file and try a small top-k query
81+
spectra = list(load_spectra(spectra_file))
82+
assert len(spectra) > 0, "No spectra parsed from test file."
83+
84+
# queries should return non-empty hits with spec_ids
85+
results = lib2.query_embedding_index(spectra[0], k=3, return_dataframe=False)
86+
assert isinstance(results, list)
87+
assert len(results) == 1
88+
89+
hits = results[0]
90+
assert len(hits) == 3, "k was set to 3, but returned different number of hits."
91+
assert set(hits[0].keys()) >= {"rank", "spec_id", "score"}
92+
assert isinstance(hits[0]["spec_id"], (str, np.str_))

0 commit comments

Comments
 (0)