Skip to content

Commit b4561c0

Browse files
committed
Do correct subsetting of inchikey sets
1 parent 8024263 commit b4561c0

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

ms2query/benchmarking/SpectrumDataSet.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,15 @@ def copy(self):
108108
new_instance.inchikey_fingerprint_pairs = copy.copy(self.inchikey_fingerprint_pairs)
109109
return new_instance
110110

111+
def subset_spectra(self, spectrum_indexes) -> "SpectraWithFingerprints":
112+
"""Returns a new instance of a subset of the spectra"""
113+
new_instance = super().subset_spectra(spectrum_indexes)
114+
# Only keep the fingerprints for which we have inchikeys.
115+
# Important note: This is not a deep copy!
116+
# And the fingerprint is not reset (so it is not always actually matching the most common inchi)
117+
new_instance.inchikey_fingerprint_pairs = {inchikey: self.inchikey_fingerprint_pairs[inchikey] for inchikey in new_instance.spectrum_indexes_per_inchikey.keys()}
118+
return new_instance
119+
111120

112121
class SpectraWithMS2DeepScoreEmbeddings(SpectraWithFingerprints):
113122
def __init__(self, spectra: List[Spectrum], ms2deepscore_model: SiameseSpectralModel, **kwargs):

tests/test_SpectrumDataSet.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,9 @@ def test_spectra_with_fingerprints(library):
9191
)
9292

9393
# test correct subsetting
94-
subset_indexes = [1, 4, 6, 7]
94+
subset_indexes = [1, 6, 7]
9595
subset = library.subset_spectra(subset_indexes)
96-
assert len(subset.inchikey_fingerprint_pairs) == 3
96+
assert len(subset.inchikey_fingerprint_pairs) == 2
9797
assert all(
9898
np.array_equal(library.inchikey_fingerprint_pairs[key], value)
9999
for key, value in subset.inchikey_fingerprint_pairs.items()

0 commit comments

Comments
 (0)