Skip to content

Commit 53f7d94

Browse files
committed
percolator support gzipped file
1 parent 95e8218 commit 53f7d94

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

psm_utils/io/percolator.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import annotations
1717

1818
import csv
19+
import gzip
1920
import logging
2021
import re
2122
from pathlib import Path
@@ -118,8 +119,12 @@ def __iter__(self) -> Iterable[PSM]:
118119

119120
@staticmethod
120121
def _read_header(filename):
121-
with open(filename, "rt") as f:
122-
fieldnames = f.readline().strip().lower().split("\t")
122+
if str(filename).endswith(".gz"):
123+
with gzip.open(filename, "rt") as f:
124+
fieldnames = f.readline().strip().lower().split("\t")
125+
else:
126+
with open(filename, "rt") as f:
127+
fieldnames = f.readline().strip().lower().split("\t")
123128
return fieldnames
124129

125130
@staticmethod
@@ -367,7 +372,12 @@ def _parse_existing_file(
367372
) -> Tuple[List[str], Optional[int]]:
368373
"""Parse existing Percolator Tab file to determine fieldnames and last ScanNr."""
369374
# Get fieldnames
370-
with open(filename, "rt") as open_file:
375+
if str(filename).endswith(".gz"):
376+
open_func = gzip.open
377+
else:
378+
open_func = open
379+
380+
with open_func(filename, "rt") as open_file:
371381
for line in open_file:
372382
fieldnames = line.strip().split("\t")
373383
break
@@ -382,7 +392,7 @@ def _parse_existing_file(
382392

383393
# Get last ScanNr
384394
last_scannr = None
385-
with open(filename, "rt") as open_file:
395+
with open_func(filename, "rt") as open_file:
386396
# Read last line
387397
open_file.seek(0)
388398
last_line = None
@@ -409,7 +419,11 @@ def _parse_existing_file(
409419
class _PercolatorTabIO:
410420
def __init__(self, *args, protein_separator="|||", **kwargs) -> None:
411421
"""File reader and writer for Percolator Tab files with fixed Proteins tab."""
412-
self._open_file = open(*args, **kwargs)
422+
filename = args[0]
423+
if str(filename).endswith(".gz"):
424+
self._open_file = gzip.open(*args, **kwargs)
425+
else:
426+
self._open_file = open(*args, **kwargs)
413427
self.protein_separator = protein_separator
414428

415429
def __enter__(self, *args, **kwargs) -> _PercolatorTabIO:

0 commit comments

Comments
 (0)