Skip to content

Commit 1855638

Browse files
committed
add new (draft) query methods
1 parent 52ce65b commit 1855638

File tree

1 file changed

+38
-4
lines changed

1 file changed

+38
-4
lines changed

ms2query/ms2query_library.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,31 @@ def query_embedding_index(
142142
df = pd.DataFrame(rows, columns=["query_ix", "rank", "spec_id", "score"])
143143
return df
144144

145+
def query_spectra_by_spectra(
146+
self,
147+
spectra: list[Spectrum],
148+
*,
149+
k_spectra: int = 100,
150+
ef: Optional[int] = None,
151+
):
152+
"""
153+
Query the embedding index with spectra, return top-k_spectra per spectrum.
154+
155+
Parameters
156+
----------
157+
spectra : list[Spectrum]
158+
Query spectra.
159+
k_spectra : int
160+
Number of top spectra to retrieve from the embedding index.
161+
ef : Optional[int]
162+
nmslib ef parameter (higher = better recall / slower).
163+
"""
164+
self._ensure_index()
165+
spectra = _ensure_spectra_list(spectra)
166+
167+
# Query spectral embeddings
168+
return self.query_embedding_index(spectra, k=k_spectra, ef=ef)
169+
145170
def query_compounds_by_spectra(
146171
self,
147172
spectra: list[Spectrum],
@@ -164,14 +189,11 @@ def query_compounds_by_spectra(
164189
ef : Optional[int]
165190
nmslib ef parameter (higher = better recall / slower).
166191
"""
167-
self._ensure_index()
168-
spectra = _ensure_spectra_list(spectra)
169-
170192
if k_compounds > k_spectra:
171193
raise ValueError("k_compounds cannot be larger than k_spectra")
172194

173195
# Step1: Query spectral embeddings
174-
results = self.query_embedding_index(spectra, k=k_spectra, ef=ef)
196+
results = self.query_spectra_by_spectra(spectra, k=k_spectra, ef=ef)
175197

176198
# Pick k_compounds top compounds from the k_spectra hits (if possible)
177199
spec_ids = results.spec_id.values
@@ -194,6 +216,18 @@ def query_compounds_by_spectra(
194216

195217
return df_selected
196218

219+
def analogue_search(
220+
self,
221+
spectra: list[Spectrum],
222+
):
223+
"""
224+
Perform an analogue search for the given spectra.
225+
TODO: implement analogue search logic here.
226+
"""
227+
top_compounds = self.query_compounds_by_spectra(spectra)
228+
# TODO: implement analogue search logic here
229+
return top_compounds.drop_duplicated("query_ix")
230+
197231

198232
# ----------------------------- helpers / optional glue -----------------------------
199233

0 commit comments

Comments
 (0)