Skip to content

Commit ec610b1

Browse files
committed
add ms2query database class
1 parent cdcd678 commit ec610b1

File tree

2 files changed

+170
-0
lines changed

2 files changed

+170
-0
lines changed

ms2query/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from .ms2query_database import MS2QueryDatabase
2+
3+
4+
__all__ = [
5+
"MS2QueryDatabase",
6+
]

ms2query/ms2query_database.py

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import sqlite3
2+
from dataclasses import dataclass, field
3+
from typing import Any, Dict, List, Optional, Tuple
4+
import numpy as np
5+
import pandas as pd
6+
from ms2query.data_processing import inchikey14_from_full
7+
from ms2query.database import (
8+
CompoundDatabase,
9+
SpecToCompoundMap,
10+
SpectralDatabase,
11+
blob_to_array,
12+
ensure_merged_tables,
13+
map_from_spectraldb_metadata,
14+
)
15+
16+
17+
# ================================ public wrapper ==============================
18+
19+
@dataclass
20+
class MS2QueryDatabase:
21+
"""Thin facade around the 3 SQLite-backed components + merged tables.
22+
23+
Responsibilities
24+
----------------
25+
* Own a single SQLite path and initialize component tables if needed.
26+
* Provide one-stop creation from raw `matchms.Spectrum` objects.
27+
* Offer ergonomic retrievals by `spec_id`, `comp_id` (inchikey14), and `merged_id`.
28+
* Keep *types and table access paths* in one place.
29+
30+
Notes
31+
-----
32+
- This wrapper uses separate connections created by the component classes.
33+
If strict single-transaction semantics across components is required,
34+
consider extending the components to accept an injected `sqlite3.Connection`.
35+
"""
36+
37+
sqlite_path: str
38+
metadata_fields: List[str] = field(default_factory=lambda: [
39+
"precursor_mz", "ionmode", "smiles", "inchikey", "inchi", "name",
40+
"instrument_type", "adduct", "collision_energy"
41+
])
42+
43+
# component singletons
44+
sdb: SpectralDatabase = field(init=False)
45+
cdb: CompoundDatabase = field(init=False)
46+
mapper: SpecToCompoundMap = field(init=False)
47+
48+
def __post_init__(self):
49+
# Initialize components (each manages its own connection)
50+
self.sdb = SpectralDatabase(self.sqlite_path, metadata_fields=self.metadata_fields)
51+
self.cdb = CompoundDatabase(self.sqlite_path)
52+
self.mapper = SpecToCompoundMap(self.sqlite_path)
53+
# Ensure merged tables exist on the *same* file
54+
with sqlite3.connect(self.sqlite_path) as conn:
55+
ensure_merged_tables(conn)
56+
57+
# ----------------------------- creation pipeline -----------------------------
58+
59+
def create_from_spectra(
60+
self,
61+
spectra: List[Any], # matchms.Spectrum
62+
*,
63+
map_compounds: bool = True,
64+
create_missing_compounds: bool = True,
65+
) -> Dict[str, int]:
66+
"""Ingest spectra -> (optionally) create spec↔comp links & upsert compounds.
67+
68+
Returns counts: {"n_inserted_spectra": int, "n_mapped": int, "n_new_compounds": int}
69+
"""
70+
spec_ids = self.sdb.add_spectra(spectra)
71+
n_mapped = 0
72+
n_new = 0
73+
if map_compounds and spec_ids:
74+
n_mapped, n_new = map_from_spectraldb_metadata(
75+
spectral_db_sqlite_path=self.sqlite_path,
76+
mapping_sqlite_path=self.sqlite_path,
77+
compounds_sqlite_path=self.sqlite_path,
78+
create_missing_compounds=create_missing_compounds,
79+
)
80+
return {
81+
"n_inserted_spectra": len(spec_ids),
82+
"n_mapped": int(n_mapped),
83+
"n_new_compounds": int(n_new),
84+
}
85+
86+
# --------------------------------- retrievals --------------------------------
87+
# ---- by spec_id ----
88+
89+
def spectra_by_spec_ids(self, spec_ids: List[int]):
90+
return self.sdb.get_spectra_by_ids(spec_ids)
91+
92+
def fragments_by_spec_ids(self, spec_ids: List[int]):
93+
return self.sdb.get_fragments_by_ids(spec_ids)
94+
95+
def metadata_by_spec_ids(self, spec_ids: List[int]) -> pd.DataFrame:
96+
return self.sdb.get_metadata_by_ids(spec_ids)
97+
98+
# ---- by comp_id (inchikey14) ----
99+
100+
def spec_ids_by_comp_id(self, comp_id: str) -> List[int]:
101+
return self.mapper.get_specs_for_comp(comp_id)
102+
103+
def spectra_by_comp_id(self, comp_id: str):
104+
return self.sdb.get_spectra_by_ids(self.spec_ids_by_comp_id(comp_id))
105+
106+
def metadata_by_comp_id(self, comp_id: str) -> pd.DataFrame:
107+
spec_ids = self.spec_ids_by_comp_id(comp_id)
108+
return self.sdb.get_metadata_by_ids(spec_ids)
109+
110+
def compound(self, comp_id: str) -> Optional[Dict[str, Any]]:
111+
return self.cdb.get_compound(comp_id)
112+
113+
# ---- merged spectra ----
114+
115+
def merged_rows_by_comp_id(self, comp_id: str) -> pd.DataFrame:
116+
with sqlite3.connect(self.sqlite_path) as conn:
117+
df = pd.read_sql_query(
118+
"SELECT * FROM merged_spectra WHERE comp_id = ? ORDER BY merged_id",
119+
conn,
120+
params=(comp_id,),
121+
)
122+
return df
123+
124+
def merged_row(self, merged_id: int) -> Optional[Dict[str, Any]]:
125+
with sqlite3.connect(self.sqlite_path) as conn:
126+
row = conn.execute("SELECT * FROM merged_spectra WHERE merged_id = ?", (merged_id,)).fetchone()
127+
return dict(row) if row else None
128+
129+
def merged_spectrum_arrays(self, merged_id: int) -> Optional[Tuple[np.ndarray, np.ndarray]]:
130+
r = self.merged_row(merged_id)
131+
if not r:
132+
return None
133+
# By convention in `spectra_merging.py`: mz=float64, intensities=float32
134+
mz = blob_to_array(r["mz"], np.float64)
135+
it = blob_to_array(r["intensities"], np.float32)
136+
return mz, it
137+
138+
# -------------------------------- convenience SQL ------------------------------
139+
140+
def sql(self, query: str) -> pd.DataFrame:
141+
"""Run a read-only SQL query on the shared SQLite file."""
142+
with sqlite3.connect(self.sqlite_path) as conn:
143+
conn.row_factory = sqlite3.Row
144+
return pd.read_sql_query(query, conn)
145+
146+
# ----------------------------------- utilities ---------------------------------
147+
148+
def inchikey_to_comp_id(self, inchikey_full: str) -> Optional[str]:
149+
return inchikey14_from_full(inchikey_full)
150+
151+
def close(self):
152+
# Close component connections
153+
try:
154+
self.sdb.close()
155+
except Exception:
156+
pass
157+
try:
158+
self.cdb.close()
159+
except Exception:
160+
pass
161+
try:
162+
self.mapper.close()
163+
except Exception:
164+
pass

0 commit comments

Comments
 (0)