Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: ruff check --output-format=github .

- name: Install package and its dependencies
run: pip install --editable .[dev]
run: pip install --editable .[dev,idxml]

- name: Test with pytest and codecov
run: |
Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:
- name: Install package and its dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
pip install .[dev,idxml]

- name: Test imports
run: python -c "import psm_utils"
Expand Down
10 changes: 5 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ Goals and non-goals
Supported file formats
**********************

===================================================================================================================== ======================== =============== ===============
File format psm_utils tag Read support Write support
===================================================================================================================== ======================== =============== ===============
===================================================================================================================== ======================== =============== =============== ==========
File format psm_utils tag Read support Write support Comments
===================================================================================================================== ======================== =============== =============== ==========
`AlphaDIA precursors TSV <https://alphadia.readthedocs.io/en/latest/quickstart.html#output-files>`_ ``alphadia`` ✅ ❌
`DIA-NN TSV <https://github.com/vdemichev/DiaNN#output>`_ ``diann`` ✅ ❌
`FlashLFQ generic TSV <https://github.com/smith-chem-wisc/FlashLFQ/wiki/Identification-Input-Formats>`_ ``flashlfq`` ✅ ✅
`FragPipe PSM TSV <https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#psmtsv/>`_ ``fragpipe`` ✅ ❌
`ionbot CSV <https://ionbot.cloud/>`_ ``ionbot`` ✅ ❌
`OpenMS idXML <https://www.openms.de/>`_ ``idxml`` ✅ ✅
`OpenMS idXML <https://www.openms.de/>`_ ``idxml`` ✅ ✅ Requires the optional ``openms`` dependency (``pip install psm-utils[idxml]``)
`MaxQuant msms.txt <https://www.maxquant.org/>`_ ``msms`` ✅ ❌
`MS Amanda CSV <https://ms.imp.ac.at/?goto=msamanda>`_ ``msamanda`` ✅ ❌
`mzIdentML <https://psidev.info/mzidentml>`_ ``mzid`` ✅ ✅
Expand All @@ -108,7 +108,7 @@ Supported file formats
`ProteoScape Parquet <#>`_ ``proteoscape`` ✅ ❌
`TSV <https://psm-utils.readthedocs.io/en/stable/api/psm_utils.io/#module-psm_utils.io.tsv>`_ ``tsv`` ✅ ✅
`X!Tandem XML <https://www.thegpm.org/tandem/>`_ ``xtandem`` ✅ ❌
===================================================================================================================== ======================== =============== ===============
===================================================================================================================== ======================== =============== =============== ==========

Legend: ✅ Supported, ❌ Unsupported

Expand Down
11 changes: 10 additions & 1 deletion psm_utils/io/idxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@
module="pyopenms",
)

import pyopenms as oms #noqa: E402
try:
import pyopenms as oms #noqa: E402
except ImportError:
_has_openms = False
else:
_has_openms = True

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -99,6 +104,8 @@ def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None:
>>> psm_list = [psm for psm in reader]
"""
super().__init__(filename, *args, **kwargs)
if not _has_openms:
raise ImportError("pyOpenMS is required to read idXML files")
self.protein_ids, self.peptide_ids = self._parse_idxml()
self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0])
self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0])
Expand Down Expand Up @@ -326,6 +333,8 @@ def __init__(

"""
super().__init__(filename, *args, **kwargs)
if not _has_openms:
raise ImportError("pyOpenMS is required to write idXML files")
self.protein_ids = protein_ids
self.peptide_ids = peptide_ids
self._writer = None
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ dependencies = [
"psims",
"pyarrow",
"pydantic >= 2",
"pyopenms",
"pyteomics >= 4",
"rich",
"sqlalchemy",
Expand All @@ -46,6 +45,7 @@ docs = [
"toml",
]
online = ["streamlit", "plotly"]
idxml = ["pyopenms"]

[project.urls]
GitHub = "https://github.com/compomics/psm_utils"
Expand Down
Binary file added tests/test_data/test.pq
Binary file not shown.
37 changes: 14 additions & 23 deletions tests/test_io/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Tests for psm_utils.io.tsv."""

import hashlib
import os

from psm_utils.io.parquet import ParquetReader, ParquetWriter
Expand Down Expand Up @@ -32,40 +31,32 @@
]


def compute_checksum(filename):
hash_func = hashlib.sha256()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_func.update(chunk)
return hash_func.hexdigest()


class TestParquetWriter:
expected_checksum = "1e5ee7afc5d4131bce8f1d0908136b8c559303abb7bbd7d052afa111d5e64f0c"

def test_write_psm(self):
with ParquetWriter("test.pq") as writer:
for test_case in test_cases:
writer.write_psm(PSM(**test_case))
actual_checksum = compute_checksum("test.pq")
assert actual_checksum == self.expected_checksum, "Checksums do not match"

with ParquetReader("test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")

def test_write_file(self):
with ParquetWriter("test.pq") as writer:
writer.write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))
actual_checksum = compute_checksum("test.pq")
assert actual_checksum == self.expected_checksum, "Checksums do not match"
# os.remove("test.pq")

with ParquetReader("test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")


class TestParquetReader:
def test_iter(self):
# Write test cases to file
ParquetWriter("test.pq").write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))

# Read test cases from file
for i, psm in enumerate(ParquetReader("test.pq")):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")
with ParquetReader("tests/test_data/test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])
Loading