diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c46501d..edc817c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -27,7 +27,7 @@ jobs: run: ruff check --output-format=github . - name: Install package and its dependencies - run: pip install --editable .[dev] + run: pip install --editable .[dev,idxml] - name: Test with pytest and codecov run: | @@ -58,7 +58,7 @@ jobs: - name: Install package and its dependencies run: | python -m pip install --upgrade pip - pip install .[dev] + pip install .[dev,idxml] - name: Test imports run: python -c "import psm_utils" diff --git a/README.rst b/README.rst index c2c3227..4d5f9d1 100644 --- a/README.rst +++ b/README.rst @@ -86,15 +86,15 @@ Goals and non-goals Supported file formats ********************** -===================================================================================================================== ======================== =============== =============== - File format psm_utils tag Read support Write support -===================================================================================================================== ======================== =============== =============== +===================================================================================================================== ======================== =============== =============== ========== + File format psm_utils tag Read support Write support Comments +===================================================================================================================== ======================== =============== =============== ========== `AlphaDIA precursors TSV `_ ``alphadia`` ✅ ❌ `DIA-NN TSV `_ ``diann`` ✅ ❌ `FlashLFQ generic TSV `_ ``flashlfq`` ✅ ✅ `FragPipe PSM TSV `_ ``fragpipe`` ✅ ❌ `ionbot CSV `_ ``ionbot`` ✅ ❌ - `OpenMS idXML `_ ``idxml`` ✅ ✅ + `OpenMS idXML `_ ``idxml`` ✅ ✅ Requires the optional ``openms`` dependency (``pip install psm-utils[idxml]``) `MaxQuant msms.txt `_ ``msms`` ✅ ❌ `MS Amanda CSV `_ ``msamanda`` ✅ ❌ `mzIdentML `_ ``mzid`` ✅ ✅ @@ -108,7 +108,7 @@ Supported file formats `ProteoScape Parquet <#>`_ ``proteoscape`` ✅ ❌ `TSV `_ ``tsv`` ✅ ✅ `X!Tandem XML `_ ``xtandem`` ✅ ❌ -===================================================================================================================== ======================== =============== =============== +===================================================================================================================== ======================== =============== =============== ========== Legend: ✅ Supported, ❌ Unsupported diff --git a/psm_utils/io/idxml.py b/psm_utils/io/idxml.py index bff4412..a54726b 100644 --- a/psm_utils/io/idxml.py +++ b/psm_utils/io/idxml.py @@ -30,7 +30,12 @@ module="pyopenms", ) -import pyopenms as oms #noqa: E402 +try: + import pyopenms as oms #noqa: E402 +except ImportError: + _has_openms = False +else: + _has_openms = True logger = logging.getLogger(__name__) @@ -99,6 +104,8 @@ def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None: >>> psm_list = [psm for psm in reader] """ super().__init__(filename, *args, **kwargs) + if not _has_openms: + raise ImportError("pyOpenMS is required to read idXML files") self.protein_ids, self.peptide_ids = self._parse_idxml() self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0]) self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0]) @@ -326,6 +333,8 @@ def __init__( """ super().__init__(filename, *args, **kwargs) + if not _has_openms: + raise ImportError("pyOpenMS is required to write idXML files") self.protein_ids = protein_ids self.peptide_ids = peptide_ids self._writer = None diff --git a/pyproject.toml b/pyproject.toml index 5994f20..54c2479 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dependencies = [ "psims", "pyarrow", "pydantic >= 2", - "pyopenms", "pyteomics >= 4", "rich", "sqlalchemy", @@ -46,6 +45,7 @@ docs = [ "toml", ] online = ["streamlit", "plotly"] +idxml = ["pyopenms"] [project.urls] GitHub = "https://github.com/compomics/psm_utils" diff --git a/tests/test_data/test.pq b/tests/test_data/test.pq new file mode 100644 index 0000000..aaf4e6c Binary files /dev/null and b/tests/test_data/test.pq differ diff --git a/tests/test_io/test_parquet.py b/tests/test_io/test_parquet.py index 247a04e..929c81d 100644 --- a/tests/test_io/test_parquet.py +++ b/tests/test_io/test_parquet.py @@ -1,6 +1,5 @@ """Tests for psm_utils.io.tsv.""" -import hashlib import os from psm_utils.io.parquet import ParquetReader, ParquetWriter @@ -32,40 +31,32 @@ ] -def compute_checksum(filename): - hash_func = hashlib.sha256() - with open(filename, "rb") as f: - for chunk in iter(lambda: f.read(4096), b""): - hash_func.update(chunk) - return hash_func.hexdigest() - - class TestParquetWriter: - expected_checksum = "1e5ee7afc5d4131bce8f1d0908136b8c559303abb7bbd7d052afa111d5e64f0c" - def test_write_psm(self): with ParquetWriter("test.pq") as writer: for test_case in test_cases: writer.write_psm(PSM(**test_case)) - actual_checksum = compute_checksum("test.pq") - assert actual_checksum == self.expected_checksum, "Checksums do not match" + + with ParquetReader("test.pq") as reader: + for i, psm in enumerate(reader): + assert psm == PSM(**test_cases[i]) + os.remove("test.pq") def test_write_file(self): with ParquetWriter("test.pq") as writer: writer.write_file(PSMList(psm_list=[PSM(**t) for t in test_cases])) - actual_checksum = compute_checksum("test.pq") - assert actual_checksum == self.expected_checksum, "Checksums do not match" - # os.remove("test.pq") + + with ParquetReader("test.pq") as reader: + for i, psm in enumerate(reader): + assert psm == PSM(**test_cases[i]) + + os.remove("test.pq") class TestParquetReader: def test_iter(self): - # Write test cases to file - ParquetWriter("test.pq").write_file(PSMList(psm_list=[PSM(**t) for t in test_cases])) - # Read test cases from file - for i, psm in enumerate(ParquetReader("test.pq")): - assert psm == PSM(**test_cases[i]) - - os.remove("test.pq") + with ParquetReader("tests/test_data/test.pq") as reader: + for i, psm in enumerate(reader): + assert psm == PSM(**test_cases[i])