Skip to content

Commit e5ea2d9

Browse files
authored
Merge pull request #249 from CompOmics/fix/support-sqlalchemy2
Fix support for sqlalchemy v2, keeping backwards compatibility with v1.4
2 parents 3fd23c2 + b6fa6dc commit e5ea2d9

File tree

4 files changed

+286
-50
lines changed

4 files changed

+286
-50
lines changed

ms2pip/_utils/dlib.py

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,18 @@
1818
TypeDecorator,
1919
)
2020
from sqlalchemy.dialects.sqlite import BLOB
21+
from sqlalchemy.engine import Connection
2122

2223
DLIB_VERSION = "0.1.14"
2324

2425

2526
class CompressedArray(TypeDecorator):
26-
""" Sqlite-like does not support arrays.
27-
Let's use a custom type decorator.
27+
"""Sqlite-like does not support arrays.
28+
Let's use a custom type decorator.
2829
29-
See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
30+
See http://docs.sqlalchemy.org/en/latest/core/types.html#sqlalchemy.types.TypeDecorator
3031
"""
32+
3133
impl = BLOB
3234

3335
def __init__(self, dtype, *args, **kwargs):
@@ -49,51 +51,55 @@ def copy(self):
4951

5052
metadata = MetaData()
5153

52-
big_float = numpy.dtype('>f4')
53-
big_double = numpy.dtype('>f8')
54+
big_float = numpy.dtype(">f4")
55+
big_double = numpy.dtype(">f8")
5456

5557
Entry = Table(
56-
'entries',
58+
"entries",
5759
metadata,
58-
Column('PrecursorMz', Float, nullable=False, index=True),
59-
Column('PrecursorCharge', Integer, nullable=False),
60-
Column('PeptideModSeq', String, nullable=False),
61-
Column('PeptideSeq', String, nullable=False, index=True),
62-
Column('Copies', Integer, nullable=False),
63-
Column('RTInSeconds', Float, nullable=False),
64-
Column('Score', Float, nullable=False),
65-
Column('MassEncodedLength', Integer, nullable=False),
66-
Column('MassArray', CompressedArray(big_double), nullable=False),
67-
Column('IntensityEncodedLength', Integer, nullable=False),
68-
Column('IntensityArray', CompressedArray(big_float), nullable=False),
69-
Column('CorrelationEncodedLength', Integer, nullable=True),
70-
Column('CorrelationArray', CompressedArray(big_float), nullable=True),
71-
Column('RTInSecondsStart', Float, nullable=True),
72-
Column('RTInSecondsStop', Float, nullable=True),
73-
Column('MedianChromatogramEncodedLength', Integer, nullable=True),
74-
Column('MedianChromatogramArray', CompressedArray(big_float), nullable=True),
75-
Column('SourceFile', String, nullable=False),
60+
Column("PrecursorMz", Float, nullable=False, index=True),
61+
Column("PrecursorCharge", Integer, nullable=False),
62+
Column("PeptideModSeq", String, nullable=False),
63+
Column("PeptideSeq", String, nullable=False, index=True),
64+
Column("Copies", Integer, nullable=False),
65+
Column("RTInSeconds", Float, nullable=False),
66+
Column("Score", Float, nullable=False),
67+
Column("MassEncodedLength", Integer, nullable=False),
68+
Column("MassArray", CompressedArray(big_double), nullable=False),
69+
Column("IntensityEncodedLength", Integer, nullable=False),
70+
Column("IntensityArray", CompressedArray(big_float), nullable=False),
71+
Column("CorrelationEncodedLength", Integer, nullable=True),
72+
Column("CorrelationArray", CompressedArray(big_float), nullable=True),
73+
Column("RTInSecondsStart", Float, nullable=True),
74+
Column("RTInSecondsStop", Float, nullable=True),
75+
Column("MedianChromatogramEncodedLength", Integer, nullable=True),
76+
Column("MedianChromatogramArray", CompressedArray(big_float), nullable=True),
77+
Column("SourceFile", String, nullable=False),
7678
)
7779

78-
Index('ix_entries_PeptideModSeq_PrecursorCharge_SourceFile', Entry.c.PeptideModSeq, Entry.c.PrecursorCharge, Entry.c.SourceFile)
80+
Index(
81+
"ix_entries_PeptideModSeq_PrecursorCharge_SourceFile",
82+
Entry.c.PeptideModSeq,
83+
Entry.c.PrecursorCharge,
84+
Entry.c.SourceFile,
85+
)
7986

8087
PeptideToProtein = Table(
81-
'peptidetoprotein',
88+
"peptidetoprotein",
8289
metadata,
83-
Column('PeptideSeq', String, nullable=False, index=True),
84-
Column('isDecoy', Boolean, nullable=True),
85-
Column('ProteinAccession', String, nullable=False, index=True),
90+
Column("PeptideSeq", String, nullable=False, index=True),
91+
Column("isDecoy", Boolean, nullable=True),
92+
Column("ProteinAccession", String, nullable=False, index=True),
8693
)
8794

8895
Metadata = Table(
89-
'metadata',
96+
"metadata",
9097
metadata,
91-
Column('Key', String, nullable=False, index=True),
92-
Column('Value', String, nullable=False),
98+
Column("Key", String, nullable=False, index=True),
99+
Column("Value", String, nullable=False),
93100
)
94101

95102

96-
def open_sqlite(filename: Union[str, Path]) -> sqlalchemy.engine.Connection:
103+
def open_sqlite(filename: Union[str, Path]) -> Connection:
97104
engine = sqlalchemy.create_engine(f"sqlite:///{filename}")
98-
metadata.bind = engine
99105
return engine.connect()

ms2pip/spectrum_output.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,15 @@
4747
from collections import defaultdict
4848
from io import StringIO
4949
from pathlib import Path
50+
from os import PathLike
5051
from time import localtime, strftime
5152
from typing import Any, Dict, Generator, List, Optional, Union
5253

5354
import numpy as np
5455
from psm_utils import PSM, Peptidoform
5556
from pyteomics import proforma
56-
from sqlalchemy import engine, select
57+
from sqlalchemy import select
58+
from sqlalchemy.engine import Connection
5759

5860
from ms2pip._utils import dlib
5961
from ms2pip.result import ProcessingResult
@@ -62,7 +64,7 @@
6264

6365

6466
def write_spectra(
65-
filename: Union[str, Path],
67+
filename: Union[str, PathLike],
6668
processing_results: List[ProcessingResult],
6769
file_format: str = "tsv",
6870
write_mode: str = "w",
@@ -92,7 +94,7 @@ class _Writer(ABC):
9294

9395
suffix = ""
9496

95-
def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
97+
def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
9698
self.filename = Path(filename).with_suffix(self.suffix)
9799
self.write_mode = write_mode
98100

@@ -466,7 +468,7 @@ class Bibliospec(_Writer):
466468
"ion-mobility",
467469
]
468470

469-
def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
471+
def __init__(self, filename: Union[str, PathLike], write_mode: str = "w"):
470472
super().__init__(filename, write_mode)
471473
self.ssl_file = self.filename.with_suffix(self.ssl_suffix)
472474
self.ms2_file = self.filename.with_suffix(self.ms2_suffix)
@@ -618,7 +620,7 @@ def _format_modified_sequence(peptidoform: Peptidoform) -> str:
618620
)
619621

620622
@staticmethod
621-
def _get_last_ssl_scan_number(ssl_file: Union[str, Path, StringIO]):
623+
def _get_last_ssl_scan_number(ssl_file: Union[str, PathLike, StringIO]):
622624
"""Read scan number of last line in a Bibliospec SSL file."""
623625
if isinstance(ssl_file, StringIO):
624626
ssl_file.seek(0)
@@ -653,7 +655,7 @@ def open(self):
653655
def write(self, processing_results: List[ProcessingResult]):
654656
"""Write MS2PIP predictions to a DLIB SQLite file."""
655657
connection = self._file_object
656-
dlib.metadata.create_all()
658+
dlib.metadata.create_all(connection.engine)
657659
self._write_metadata(connection)
658660
self._write_entries(processing_results, connection, self.filename)
659661
self._write_peptide_to_protein(processing_results, connection)
@@ -682,11 +684,11 @@ def _format_modified_sequence(peptidoform: Peptidoform) -> str:
682684
)
683685

684686
@staticmethod
685-
def _write_metadata(connection: engine.Connection):
687+
def _write_metadata(connection: Connection):
686688
"""Write metadata to DLIB SQLite file."""
687689
with connection.begin():
688690
version = connection.execute(
689-
select([dlib.Metadata.c.Value]).where(dlib.Metadata.c.Key == "version")
691+
select(dlib.Metadata.c.Value).where(dlib.Metadata.c.Key == "version")
690692
).scalar()
691693
if version is None:
692694
connection.execute(
@@ -699,8 +701,8 @@ def _write_metadata(connection: engine.Connection):
699701
@staticmethod
700702
def _write_entries(
701703
processing_results: List[ProcessingResult],
702-
connection: engine.Connection,
703-
output_filename: str,
704+
connection: Connection,
705+
output_filename: Union[str, PathLike],
704706
):
705707
"""Write spectra to DLIB SQLite file."""
706708
with connection.begin():
@@ -730,7 +732,7 @@ def _write_entries(
730732
)
731733

732734
@staticmethod
733-
def _write_peptide_to_protein(results: List[ProcessingResult], connection: engine.Connection):
735+
def _write_peptide_to_protein(results: List[ProcessingResult], connection: Connection):
734736
"""Write peptide-to-protein mappings to DLIB SQLite file."""
735737
peptide_to_proteins = {
736738
(result.psm.peptidoform.sequence, protein)
@@ -743,7 +745,7 @@ def _write_peptide_to_protein(results: List[ProcessingResult], connection: engin
743745
sql_peptide_to_proteins = set()
744746
proteins = {protein for _, protein in peptide_to_proteins}
745747
for peptide_to_protein in connection.execute(
746-
dlib.PeptideToProtein.select().where(
748+
select(dlib.PeptideToProtein).where(
747749
dlib.PeptideToProtein.c.ProteinAccession.in_(proteins)
748750
)
749751
):

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,7 @@ dependencies = [
3737
"pandas>=1,<3",
3838
"pyarrow",
3939
"pyteomics>=3.5,<5",
40-
"tomlkit>=0.5,<1",
41-
"sqlalchemy>=1.3,<2",
40+
"sqlalchemy>=1.4,<3",
4241
"click>=7,<9",
4342
"xgboost>=1.3,<3",
4443
"lxml>=4",

0 commit comments

Comments
 (0)