|
1 | | -# test_spectral_database.py |
2 | 1 | import numpy as np |
3 | 2 | import pandas as pd |
4 | 3 | import pytest |
@@ -149,10 +148,70 @@ def test_missing_ids_handling(tmp_db, spectra_small): |
149 | 148 |
|
150 | 149 | # Implementation skips missing IDs but preserves order of the ones that exist |
151 | 150 | assert [s.metadata["spec_id"] for s in out_spectra] == [ids[1]] |
152 | | - assert list(out_meta["spec_id"]) == [ids[1]] |
| 151 | + |
| 152 | + # get_metadata_by_ids now returns one row per requested ID |
| 153 | + assert out_meta.shape[0] == 2 |
| 154 | + assert list(out_meta["spec_id"]) == req |
| 155 | + |
| 156 | + # First row corresponds to missing ID: all metadata fields should be None/NaN |
| 157 | + missing_row = out_meta.iloc[0] |
| 158 | + assert missing_row["spec_id"] == req[0] |
| 159 | + assert missing_row[tmp_db.metadata_fields].isna().all() |
| 160 | + |
| 161 | + # Exactly one row has *any* metadata filled (the real spec_id) |
| 162 | + non_empty_rows = out_meta.dropna(how="all", subset=tmp_db.metadata_fields) |
| 163 | + assert non_empty_rows.shape[0] == 1 |
| 164 | + assert non_empty_rows.iloc[0]["spec_id"] == ids[1] |
| 165 | + |
| 166 | + # get_fragments_by_ids still skips missing IDs |
153 | 167 | assert len(out_frags) == 1 |
154 | 168 |
|
155 | 169 |
|
| 170 | +def test_get_metadata_by_ids_all_ids_included_even_if_same_compound(tmp_db): |
| 171 | + # Two spectra with different peaks but same compound-level metadata |
| 172 | + s1 = make_spectrum( |
| 173 | + [100, 200, 300], |
| 174 | + [10, 20, 30], |
| 175 | + precursor_mz=250.0, |
| 176 | + ionmode="positive", |
| 177 | + inchikey="SAME-IK", |
| 178 | + smiles="C", |
| 179 | + name="compound-1", |
| 180 | + ) |
| 181 | + s2 = make_spectrum( |
| 182 | + [110, 210, 310], |
| 183 | + [5, 15, 25], |
| 184 | + precursor_mz=260.0, |
| 185 | + ionmode="positive", |
| 186 | + inchikey="SAME-IK", # same compound |
| 187 | + smiles="C", |
| 188 | + name="compound-1", |
| 189 | + ) |
| 190 | + |
| 191 | + ids = tmp_db.add_spectra([s1, s2]) |
| 192 | + |
| 193 | + # Request both spec_ids; we expect two rows, one per ID, same order |
| 194 | + df = tmp_db.get_metadata_by_ids(ids) |
| 195 | + |
| 196 | + expected_cols = ["spec_id"] + tmp_db.metadata_fields |
| 197 | + assert list(df.columns) == expected_cols |
| 198 | + assert df.shape[0] == 2 |
| 199 | + assert list(df["spec_id"]) == ids |
| 200 | + |
| 201 | + # Both rows should carry the same compound-level metadata (same inchikey/smiles) |
| 202 | + assert df.loc[0, "inchikey"] == "SAME-IK" |
| 203 | + assert df.loc[1, "inchikey"] == "SAME-IK" |
| 204 | + assert df.loc[0, "smiles"] == "C" |
| 205 | + assert df.loc[1, "smiles"] == "C" |
| 206 | + |
| 207 | + # If name is stored, it should be consistent across rows (but may be None) |
| 208 | + assert df.loc[0, "name"] == df.loc[1, "name"] |
| 209 | + |
| 210 | + # The precursor m/z values differ per spectrum and should be preserved per ID |
| 211 | + assert df.loc[0, "precursor_mz"] == pytest.approx(250.0) |
| 212 | + assert df.loc[1, "precursor_mz"] == pytest.approx(260.0) |
| 213 | + |
| 214 | + |
156 | 215 | def test_add_duplicates_are_ignored_and_ids_repeat(tmp_db, spectra_small): |
157 | 216 | # First insert |
158 | 217 | ids_first = tmp_db.add_spectra(spectra_small) |
|
0 commit comments