CompOmics · RalfG · Nov 18, 2024 · Nov 15, 2024 · Nov 18, 2024 · Nov 18, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,7 +27,7 @@ jobs:
         run: ruff check --output-format=github .
 
       - name: Install package and its dependencies
-        run: pip install --editable .[dev]
+        run: pip install --editable .[dev,idxml]
 
       - name: Test with pytest and codecov
         run: |
@@ -58,7 +58,7 @@ jobs:
       - name: Install package and its dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install .[dev]
+          pip install .[dev,idxml]
 
       - name: Test imports
         run: python -c "import psm_utils"

diff --git a/README.rst b/README.rst
@@ -86,15 +86,15 @@ Goals and non-goals
 Supported file formats
 **********************
 
-===================================================================================================================== ======================== =============== ===============
- File format                                                                                                           psm_utils tag            Read support    Write support
-===================================================================================================================== ======================== =============== ===============
+===================================================================================================================== ======================== =============== =============== ==========
+ File format                                                                                                           psm_utils tag            Read support    Write support   Comments
+===================================================================================================================== ======================== =============== =============== ==========
  `AlphaDIA precursors TSV <https://alphadia.readthedocs.io/en/latest/quickstart.html#output-files>`_                   ``alphadia``             ✅              ❌
  `DIA-NN TSV <https://github.com/vdemichev/DiaNN#output>`_                                                             ``diann``                ✅              ❌
  `FlashLFQ generic TSV <https://github.com/smith-chem-wisc/FlashLFQ/wiki/Identification-Input-Formats>`_               ``flashlfq``             ✅              ✅
  `FragPipe PSM TSV <https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#psmtsv/>`_                       ``fragpipe``             ✅              ❌
  `ionbot CSV <https://ionbot.cloud/>`_                                                                                 ``ionbot``               ✅              ❌
- `OpenMS idXML <https://www.openms.de/>`_                                                                              ``idxml``                ✅              ✅
+ `OpenMS idXML <https://www.openms.de/>`_                                                                              ``idxml``                ✅              ✅                Requires the optional ``openms`` dependency (``pip install psm-utils[idxml]``)
  `MaxQuant msms.txt <https://www.maxquant.org/>`_                                                                      ``msms``                 ✅              ❌
  `MS Amanda CSV <https://ms.imp.ac.at/?goto=msamanda>`_                                                                ``msamanda``             ✅              ❌
  `mzIdentML <https://psidev.info/mzidentml>`_                                                                          ``mzid``                 ✅              ✅
@@ -108,7 +108,7 @@ Supported file formats
  `ProteoScape Parquet <#>`_                                                                                            ``proteoscape``          ✅              ❌
  `TSV <https://psm-utils.readthedocs.io/en/stable/api/psm_utils.io/#module-psm_utils.io.tsv>`_                         ``tsv``                  ✅              ✅
  `X!Tandem XML <https://www.thegpm.org/tandem/>`_                                                                      ``xtandem``              ✅              ❌
-===================================================================================================================== ======================== =============== ===============
+===================================================================================================================== ======================== =============== =============== ==========
 
 Legend: ✅ Supported, ❌ Unsupported
 

diff --git a/psm_utils/io/idxml.py b/psm_utils/io/idxml.py
@@ -30,7 +30,12 @@
     module="pyopenms",
 )
 
-import pyopenms as oms  #noqa: E402
+try:
+    import pyopenms as oms  #noqa: E402
+except ImportError:
+    _has_openms = False
+else:
+    _has_openms = True
 
 logger = logging.getLogger(__name__)
 
@@ -99,6 +104,8 @@ def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None:
         >>> psm_list = [psm for psm in reader]
         """
         super().__init__(filename, *args, **kwargs)
+        if not _has_openms:
+            raise ImportError("pyOpenMS is required to read idXML files")
         self.protein_ids, self.peptide_ids = self._parse_idxml()
         self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0])
         self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0])
@@ -326,6 +333,8 @@ def __init__(
 
         """
         super().__init__(filename, *args, **kwargs)
+        if not _has_openms:
+            raise ImportError("pyOpenMS is required to write idXML files")
         self.protein_ids = protein_ids
         self.peptide_ids = peptide_ids
         self._writer = None

diff --git a/pyproject.toml b/pyproject.toml
@@ -27,7 +27,6 @@ dependencies = [
     "psims",
     "pyarrow",
     "pydantic >= 2",
-    "pyopenms",
     "pyteomics >= 4",
     "rich",
     "sqlalchemy",
@@ -46,6 +45,7 @@ docs = [
     "toml",
 ]
 online = ["streamlit", "plotly"]
+idxml = ["pyopenms"]
 
 [project.urls]
 GitHub = "https://github.com/compomics/psm_utils"

diff --git a/tests/test_data/test.pq b/tests/test_data/test.pq
diff --git a/tests/test_io/test_parquet.py b/tests/test_io/test_parquet.py
@@ -1,6 +1,5 @@
 """Tests for psm_utils.io.tsv."""
 
-import hashlib
 import os
 
 from psm_utils.io.parquet import ParquetReader, ParquetWriter
@@ -32,40 +31,32 @@
 ]
 
 
-def compute_checksum(filename):
-    hash_func = hashlib.sha256()
-    with open(filename, "rb") as f:
-        for chunk in iter(lambda: f.read(4096), b""):
-            hash_func.update(chunk)
-    return hash_func.hexdigest()
-
-
 class TestParquetWriter:
-    expected_checksum = "1e5ee7afc5d4131bce8f1d0908136b8c559303abb7bbd7d052afa111d5e64f0c"
-
     def test_write_psm(self):
         with ParquetWriter("test.pq") as writer:
             for test_case in test_cases:
                 writer.write_psm(PSM(**test_case))
-        actual_checksum = compute_checksum("test.pq")
-        assert actual_checksum == self.expected_checksum, "Checksums do not match"
+
+        with ParquetReader("test.pq") as reader:
+            for i, psm in enumerate(reader):
+                assert psm == PSM(**test_cases[i])
+
         os.remove("test.pq")
 
     def test_write_file(self):
         with ParquetWriter("test.pq") as writer:
             writer.write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))
-        actual_checksum = compute_checksum("test.pq")
-        assert actual_checksum == self.expected_checksum, "Checksums do not match"
-        # os.remove("test.pq")
+
+        with ParquetReader("test.pq") as reader:
+            for i, psm in enumerate(reader):
+                assert psm == PSM(**test_cases[i])
+
+        os.remove("test.pq")
 
 
 class TestParquetReader:
     def test_iter(self):
-        # Write test cases to file
-        ParquetWriter("test.pq").write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))
-
         # Read test cases from file
-        for i, psm in enumerate(ParquetReader("test.pq")):
-            assert psm == PSM(**test_cases[i])
-
-        os.remove("test.pq")
+        with ParquetReader("tests/test_data/test.pq") as reader:
+            for i, psm in enumerate(reader):
+                assert psm == PSM(**test_cases[i])