Skip to content

Commit c3d7b62

Browse files
committed
add test and clean
1 parent ff6e2b6 commit c3d7b62

File tree

2 files changed

+62
-3
lines changed

2 files changed

+62
-3
lines changed

ms2query/ms2query_library.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ def query_by_spec_ids(
352352
normalized=True,
353353
)
354354

355-
# If DB returns nothing, keep the old API behaviour
355+
# If DB returns nothing
356356
if X.size == 0:
357357
return [] if not return_dataframe else self._empty_result_df()
358358

tests/test_spectral_database.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
# test_spectral_database.py
21
import numpy as np
32
import pandas as pd
43
import pytest
@@ -149,10 +148,70 @@ def test_missing_ids_handling(tmp_db, spectra_small):
149148

150149
# Implementation skips missing IDs but preserves order of the ones that exist
151150
assert [s.metadata["spec_id"] for s in out_spectra] == [ids[1]]
152-
assert list(out_meta["spec_id"]) == [ids[1]]
151+
152+
# get_metadata_by_ids now returns one row per requested ID
153+
assert out_meta.shape[0] == 2
154+
assert list(out_meta["spec_id"]) == req
155+
156+
# First row corresponds to missing ID: all metadata fields should be None/NaN
157+
missing_row = out_meta.iloc[0]
158+
assert missing_row["spec_id"] == req[0]
159+
assert missing_row[tmp_db.metadata_fields].isna().all()
160+
161+
# Exactly one row has *any* metadata filled (the real spec_id)
162+
non_empty_rows = out_meta.dropna(how="all", subset=tmp_db.metadata_fields)
163+
assert non_empty_rows.shape[0] == 1
164+
assert non_empty_rows.iloc[0]["spec_id"] == ids[1]
165+
166+
# get_fragments_by_ids still skips missing IDs
153167
assert len(out_frags) == 1
154168

155169

170+
def test_get_metadata_by_ids_all_ids_included_even_if_same_compound(tmp_db):
171+
# Two spectra with different peaks but same compound-level metadata
172+
s1 = make_spectrum(
173+
[100, 200, 300],
174+
[10, 20, 30],
175+
precursor_mz=250.0,
176+
ionmode="positive",
177+
inchikey="SAME-IK",
178+
smiles="C",
179+
name="compound-1",
180+
)
181+
s2 = make_spectrum(
182+
[110, 210, 310],
183+
[5, 15, 25],
184+
precursor_mz=260.0,
185+
ionmode="positive",
186+
inchikey="SAME-IK", # same compound
187+
smiles="C",
188+
name="compound-1",
189+
)
190+
191+
ids = tmp_db.add_spectra([s1, s2])
192+
193+
# Request both spec_ids; we expect two rows, one per ID, same order
194+
df = tmp_db.get_metadata_by_ids(ids)
195+
196+
expected_cols = ["spec_id"] + tmp_db.metadata_fields
197+
assert list(df.columns) == expected_cols
198+
assert df.shape[0] == 2
199+
assert list(df["spec_id"]) == ids
200+
201+
# Both rows should carry the same compound-level metadata (same inchikey/smiles)
202+
assert df.loc[0, "inchikey"] == "SAME-IK"
203+
assert df.loc[1, "inchikey"] == "SAME-IK"
204+
assert df.loc[0, "smiles"] == "C"
205+
assert df.loc[1, "smiles"] == "C"
206+
207+
# If name is stored, it should be consistent across rows (but may be None)
208+
assert df.loc[0, "name"] == df.loc[1, "name"]
209+
210+
# The precursor m/z values differ per spectrum and should be preserved per ID
211+
assert df.loc[0, "precursor_mz"] == pytest.approx(250.0)
212+
assert df.loc[1, "precursor_mz"] == pytest.approx(260.0)
213+
214+
156215
def test_add_duplicates_are_ignored_and_ids_repeat(tmp_db, spectra_small):
157216
# First insert
158217
ids_first = tmp_db.add_spectra(spectra_small)

0 commit comments

Comments
 (0)