Skip to content

Commit 20f1fc8

Browse files
committed
Add support for pathlib Path objects in dataset reads and writes.
1 parent 7ad8083 commit 20f1fc8

File tree

2 files changed

+23
-19
lines changed

2 files changed

+23
-19
lines changed

riid/data/sampleset.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@
99
import json
1010
import logging
1111
import operator
12-
import os
1312
import random
1413
import re
1514
import warnings
1615
from datetime import datetime, timezone
1716
from enum import Enum
17+
from pathlib import Path
1818
from typing import Callable, Iterable, Tuple, Union
1919

2020
import numpy as np
@@ -1406,7 +1406,7 @@ def squash(self) -> SampleSet:
14061406

14071407
return flat_ss
14081408

1409-
def to_hdf(self, path: str, verbose=False, **kwargs):
1409+
def to_hdf(self, path: str | Path, verbose=False, **kwargs):
14101410
"""Write the `SampleSet` to disk as a HDF file.
14111411
14121412
Args:
@@ -1417,14 +1417,15 @@ def to_hdf(self, path: str, verbose=False, **kwargs):
14171417
Raises:
14181418
`ValueError` when provided path extension is invalid
14191419
"""
1420-
if not path.lower().endswith(riid.SAMPLESET_HDF_FILE_EXTENSION):
1420+
path = Path(path)
1421+
if path.suffix != riid.SAMPLESET_HDF_FILE_EXTENSION:
14211422
logging.warning(f"Path does not end in {riid.SAMPLESET_HDF_FILE_EXTENSION}")
14221423

14231424
_write_hdf(self, path, **kwargs)
14241425
if verbose:
14251426
logging.info(f"Saved SampleSet to '{path}'")
14261427

1427-
def to_json(self, path: str, verbose=False):
1428+
def to_json(self, path: str | Path, verbose=False):
14281429
"""Write the `SampleSet` to disk as a JSON file.
14291430
14301431
Warning: it is not recommended that you use this on large `SampleSet` objects.
@@ -1437,14 +1438,15 @@ def to_json(self, path: str, verbose=False):
14371438
Raises:
14381439
`ValueError` when provided path extension is invalid
14391440
"""
1440-
if not path.lower().endswith(riid.SAMPLESET_JSON_FILE_EXTENSION):
1441+
path = Path(path)
1442+
if path.suffix != riid.SAMPLESET_JSON_FILE_EXTENSION:
14411443
logging.warning(f"Path does not end in {riid.SAMPLESET_JSON_FILE_EXTENSION}")
14421444

14431445
_write_json(self, path)
14441446
if verbose:
14451447
logging.info(f"Saved SampleSet to '{path}'")
14461448

1447-
def to_pcf(self, path: str, verbose=False):
1449+
def to_pcf(self, path: str | Path, verbose=False):
14481450
"""Write the `SampleSet` to disk as a PCF.
14491451
14501452
Args:
@@ -1454,7 +1456,8 @@ def to_pcf(self, path: str, verbose=False):
14541456
Raises:
14551457
`ValueError` when provided path extension is invalid
14561458
"""
1457-
if not path.lower().endswith(riid.PCF_FILE_EXTENSION):
1459+
path = Path(path)
1460+
if path.suffix != riid.PCF_FILE_EXTENSION:
14581461
logging.warning(f"Path does not end in {riid.PCF_FILE_EXTENSION}")
14591462

14601463
_dict_to_pcf(_ss_to_pcf_dict(self, verbose), path, verbose)
@@ -1490,7 +1493,7 @@ def upsample_spectra(self, target_bins: int = 4096):
14901493
transformation.T))
14911494

14921495

1493-
def read_hdf(path: str) -> SampleSet:
1496+
def read_hdf(path: str | Path) -> SampleSet:
14941497
"""Read an HDF file in as a `SampleSet` object.
14951498
14961499
Args:
@@ -1499,8 +1502,8 @@ def read_hdf(path: str) -> SampleSet:
14991502
Returns:
15001503
`SampleSet` object
15011504
"""
1502-
expanded_path = os.path.expanduser(path)
1503-
if not os.path.isfile(expanded_path):
1505+
expanded_path = Path(path).expanduser()
1506+
if not expanded_path.is_file():
15041507
raise FileNotFoundError(f"No file found at location '{expanded_path}'.")
15051508

15061509
ss = _read_hdf(expanded_path)
@@ -1511,7 +1514,7 @@ def read_hdf(path: str) -> SampleSet:
15111514
return ss
15121515

15131516

1514-
def read_pcf(path: str, verbose: bool = False) -> SampleSet:
1517+
def read_pcf(path: str | Path, verbose: bool = False) -> SampleSet:
15151518
"""Read a PCF file in as a `SampleSet` object.
15161519
15171520
Args:
@@ -1521,7 +1524,7 @@ def read_pcf(path: str, verbose: bool = False) -> SampleSet:
15211524
Returns:
15221525
`Sampleset` object
15231526
"""
1524-
expanded_path = os.path.expanduser(path)
1527+
expanded_path = Path(path).expanduser()
15251528
return _pcf_dict_to_ss(_pcf_to_dict(expanded_path, verbose), verbose)
15261529

15271530

@@ -1640,7 +1643,7 @@ def _validate_hdf_store_keys(keys: list):
16401643
raise InvalidSampleSetFileError()
16411644

16421645

1643-
def _read_hdf(path: str) -> SampleSet:
1646+
def _read_hdf(path: Path) -> SampleSet:
16441647
"""Read `SampleSet` from an HDF file.
16451648
16461649
Args:
@@ -1683,7 +1686,7 @@ def _read_hdf(path: str) -> SampleSet:
16831686
return ss
16841687

16851688

1686-
def _write_hdf(ss: SampleSet, output_path: str, **kwargs):
1689+
def _write_hdf(ss: SampleSet, output_path: Path, **kwargs):
16871690
"""Write a `SampleSet` to an HDF file.
16881691
16891692
Args:
@@ -1910,15 +1913,15 @@ def _pcf_dict_to_ss(pcf_dict: dict, verbose=True):
19101913
return ss
19111914

19121915

1913-
def _write_json(ss: SampleSet, output_path: str):
1916+
def _write_json(ss: SampleSet, output_path: Path):
19141917
ss_dict = _ss_to_pcf_dict(ss)
19151918
ss_dict["detector_info"] = ss.detector_info
19161919
with open(output_path, "w") as fout:
19171920
json.dump(ss_dict, fout, indent=4)
19181921

19191922

1920-
def read_json(path: str) -> SampleSet:
1921-
expanded_path = os.path.expanduser(path)
1923+
def read_json(path: str | Path) -> SampleSet:
1924+
expanded_path = Path(path).expanduser()
19221925
with open(expanded_path, "r") as fin:
19231926
ss_dict = json.load(fin)
19241927
ss = _pcf_dict_to_ss(ss_dict)

riid/gadras/pcf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"""This module contains utilities for working with GADRAS PCF files."""
55
import struct
66
from collections import defaultdict
7+
from pathlib import Path
78
from typing import List, Tuple
89

910
import numpy as np
@@ -184,7 +185,7 @@ def _read_spectra(data: list, n_rec_per_spec: int, spec_rec_start_indx: int) ->
184185
return spectra
185186

186187

187-
def _pcf_to_dict(pcf_file_path: str, verbose: bool = False) -> dict:
188+
def _pcf_to_dict(pcf_file_path: Path, verbose: bool = False) -> dict:
188189
"""Convert a PCF into a dictionary representation.
189190
190191
Args:
@@ -255,7 +256,7 @@ def _spectrum_byte_offset(spectrum_index: int, n_records_per_spectrum: int,
255256
return 256 * (spec_rec_start_index + n_records_per_spectrum * (spectrum_index - 1) - 1)
256257

257258

258-
def _dict_to_pcf(pcf_dict: dict, save_path: str, verbose=False):
259+
def _dict_to_pcf(pcf_dict: dict, save_path: Path, verbose=False):
259260
"""Convert PCF dictionary representation to PCF binary.
260261
261262
Args:

0 commit comments

Comments
 (0)