diff --git a/.gitignore b/.gitignore index 10009fad..9eab5017 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ /src/lgdo/_version.py +#uv +uv.lock + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f841a4d5..bef6e909 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,6 +50,8 @@ repos: rev: "v2.4.1" hooks: - id: codespell + additional_dependencies: + - tomli - repo: https://github.com/shellcheck-py/shellcheck-py rev: "v0.10.0.1" diff --git a/src/lgdo/__init__.py b/src/lgdo/__init__.py index 4fd98acd..2fd947bd 100644 --- a/src/lgdo/__init__.py +++ b/src/lgdo/__init__.py @@ -45,7 +45,7 @@ from __future__ import annotations from ._version import version as __version__ -from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show +from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write from .types import ( LGDO, Array, @@ -69,7 +69,6 @@ "FixedSizeArray", "Histogram", "LH5Iterator", - "LH5Store", "Scalar", "Struct", "Table", @@ -77,8 +76,10 @@ "VectorOfVectors", "WaveformTable", "__version__", - "load_dfs", - "load_nda", "ls", + "read", + "read_as", + "read_n_rows", "show", + "write", ] diff --git a/src/lgdo/lh5/__init__.py b/src/lgdo/lh5/__init__.py index 9918a501..99c1c790 100644 --- a/src/lgdo/lh5/__init__.py +++ b/src/lgdo/lh5/__init__.py @@ -11,7 +11,7 @@ from .core import read, read_as, write from .iterator import LH5Iterator from .store import LH5Store -from .tools import load_dfs, load_nda, ls, show +from .tools import ls, show from .utils import read_n_rows __all__ = [ @@ -19,8 +19,6 @@ "LH5Iterator", "LH5Store", "concat", - "load_dfs", - "load_nda", "ls", "read", "read_as", diff --git a/src/lgdo/lh5/tools.py b/src/lgdo/lh5/tools.py index 2e7a689d..4005cf32 100644 --- a/src/lgdo/lh5/tools.py +++ b/src/lgdo/lh5/tools.py @@ -1,16 +1,10 @@ from __future__ import annotations import fnmatch -import glob import logging -import os from copy import copy -from warnings import warn import h5py -import numpy as np -import pandas as pd -from numpy.typing import NDArray from . import utils from .store import LH5Store @@ -223,108 +217,3 @@ def show( break key = k_new - - -def load_nda( - f_list: str | list[str], - par_list: list[str], - lh5_group: str = "", - idx_list: list[NDArray | list | tuple] | None = None, -) -> dict[str, NDArray]: - r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data. - - Given a list of files, a list of LH5 table parameters, and an optional - group path, return a NumPy array with all values for each parameter. - - Parameters - ---------- - f_list - A list of files. Can contain wildcards. - par_list - A list of parameters to read from each file. - lh5_group - group path within which to find the specified parameters. - idx_list - for fancy-indexed reads. Must be one index array for each file in - `f_list`. - - Returns - ------- - par_data - A dictionary of the parameter data keyed by the elements of `par_list`. - Each entry contains the data for the specified parameter concatenated - over all files in `f_list`. - """ - warn( - "load_nda() is deprecated. " - "Please replace it with LH5Store.read(...).view_as('np'), " - "or just read_as(..., 'np'). " - "load_nda() will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - - if isinstance(f_list, str): - f_list = [f_list] - if idx_list is not None: - idx_list = [idx_list] - if idx_list is not None and len(f_list) != len(idx_list): - msg = f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!" - raise ValueError(msg) - - # Expand wildcards - f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))] - - sto = LH5Store() - par_data = {par: [] for par in par_list} - for ii, ff in enumerate(f_list): - f = sto.gimme_file(ff, "r") - for par in par_list: - if f"{lh5_group}/{par}" not in f: - msg = f"'{lh5_group}/{par}' not in file {ff}" - raise RuntimeError(msg) - - if idx_list is None: - data, _ = sto.read(f"{lh5_group}/{par}", f) - else: - data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii]) - if not data: - continue - par_data[par].append(data.nda) - return {par: np.concatenate(par_data[par]) for par in par_list} - - -def load_dfs( - f_list: str | list[str], - par_list: list[str], - lh5_group: str = "", - idx_list: list[NDArray | list | tuple] | None = None, -) -> pd.DataFrame: - """Build a :class:`pandas.DataFrame` from LH5 data. - - Given a list of files (can use wildcards), a list of LH5 columns, and - optionally the group path, return a :class:`pandas.DataFrame` with all - values for each parameter. - - See Also - -------- - :func:`load_nda` - - Returns - ------- - dataframe - contains columns for each parameter in `par_list`, and rows containing - all data for the associated parameters concatenated over all files in - `f_list`. - """ - warn( - "load_dfs() is deprecated. " - "Please replace it with LH5Store.read(...).view_as('pd'), " - "or just read_as(..., 'pd'). " - "load_dfs() will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return pd.DataFrame( - load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list) - ) diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py deleted file mode 100644 index dc8d1d95..00000000 --- a/src/lgdo/lh5_store.py +++ /dev/null @@ -1,284 +0,0 @@ -""" -.. warning:: - This subpackage is deprecated, use :mod:`lgdo.lh5`. -""" - -from __future__ import annotations - -import sys -from collections.abc import Iterator -from typing import Union -from warnings import warn - -import h5py -import numpy as np -import pandas as pd - -from . import lh5 -from .types import ( - Array, - ArrayOfEncodedEqualSizedArrays, # noqa: F401 - ArrayOfEqualSizedArrays, # noqa: F401 - FixedSizeArray, # noqa: F401 - Histogram, # noqa: F401 - Scalar, - Struct, - Table, # noqa: F401 - VectorOfEncodedVectors, # noqa: F401 - VectorOfVectors, - WaveformTable, # noqa: F401 -) - -LGDO = Union[Array, Scalar, Struct, VectorOfVectors] - - -class LH5Iterator(lh5.LH5Iterator): - """ - .. warning:: - This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Iterator`. - - """ - - def __init__( - self, - lh5_files: str | list[str], - groups: str | list[str], - base_path: str = "", - entry_list: list[int] | list[list[int]] | None = None, - entry_mask: list[bool] | list[list[bool]] | None = None, - field_mask: dict[str, bool] | list[str] | tuple[str] | None = None, - buffer_len: int = 3200, - friend: Iterator | None = None, - ) -> None: - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator." - "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__( - lh5_files, - groups, - base_path, - entry_list, - entry_mask, - field_mask, - buffer_len, - friend, - ) - - def write_object( - self, - obj: LGDO, - name: str, - lh5_file: str | h5py.File, - group: str | h5py.Group = "/", - start_row: int = 0, - n_rows: int | None = None, - wo_mode: str = "append", - write_start: int = 0, - **h5py_kwargs, - ) -> None: - """ - .. warning:: - This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.write`. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. " - "The object you are calling this function from uses the old LH5Iterator class." - "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - self.write( - obj, - name, - lh5_file, - group, - start_row, - n_rows, - wo_mode, - write_start, - h5py_kwargs, - ) - - def read_object( - self, - name: str, - lh5_file: str | h5py.File | list[str | h5py.File], - start_row: int = 0, - n_rows: int = sys.maxsize, - idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None, - field_mask: dict[str, bool] | list[str] | tuple[str] | None = None, - obj_buf: LGDO = None, - obj_buf_start: int = 0, - decompress: bool = True, - ) -> tuple[LGDO, int]: - """ - .. warning:: - This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.read`. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. " - "The object you are calling this function from uses the old LH5Iterator class." - "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return self.read( - name, - lh5_file, - start_row, - n_rows, - idx, - field_mask, - obj_buf, - obj_buf_start, - decompress, - ) - - -class LH5Store(lh5.LH5Store): - """ - .. warning:: - This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Store`. - - """ - - def __init__(self, base_path: str = "", keep_open: bool = False): - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store. " - "Please replace 'from lgdo.lh5_store import LH5Store' with 'from lgdo.lh5 import LH5Store'." - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(base_path, keep_open) - - def read_object( - self, - name: str, - lh5_file: str | h5py.File | list[str | h5py.File], - **kwargs, - ) -> tuple[LGDO, int]: - """ - .. warning:: - This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.read`. - - """ - warn( - "LH5Store.read_object() has been renamed to LH5Store.read(), " - "Please update your code." - "LH5Store.read_object() will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return super().read(self, name, lh5_file, **kwargs) - - def write_object( - self, - obj: LGDO, - name: str, - lh5_file: str | h5py.File, - **kwargs, - ) -> tuple[LGDO, int]: - """ - .. warning:: - This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.write`. - - """ - warn( - "LH5Store.write_object() has been renamed to LH5Store.write(), " - "Please update your code." - "LH5Store.write_object() will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return super().read(self, obj, name, lh5_file, **kwargs) - - -def load_dfs( - f_list: str | list[str], - par_list: list[str], - lh5_group: str = "", - idx_list: list[np.ndarray | list | tuple] | None = None, -) -> pd.DataFrame: - """ - .. warning:: - This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to - view LGDO data as a Pandas data structure. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " - "Please replace 'from lgdo.lh5_store import load_dfs' with 'from lgdo.lh5 import load_dfs'. " - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return lh5.load_dfs(f_list, par_list, lh5_group, idx_list) - - -def load_nda( - f_list: str | list[str], - par_list: list[str], - lh5_group: str = "", - idx_list: list[np.ndarray | list | tuple] | None = None, -) -> dict[str, np.ndarray]: - """ - .. warning:: - This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to - view LGDO data as a NumPy data structure. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " - "Please replace 'from lgdo.lh5_store import load_nda' with 'from lgdo.lh5 import load_nda'. " - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return lh5.load_nda(f_list, par_list, lh5_group, idx_list) - - -def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: - """ - .. warning:: - This function is deprecated, import :func:`lgdo.lh5.tools.ls`. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " - "Please replace 'from lgdo.lh5_store import ls' with 'from lgdo.lh5 import ls'. " - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - return lh5.ls(lh5_file, lh5_group) - - -def show( - lh5_file: str | h5py.Group, - lh5_group: str = "/", - attrs: bool = False, - indent: str = "", - header: bool = True, -) -> None: - """ - .. warning:: - This function is deprecated, import :func:`lgdo.lh5.tools.show`. - - """ - warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " - "Please replace 'from lgdo.lh5_store import show' with 'from lgdo.lh5 import show'. " - "lgdo.lh5_store will be removed in a future release.", - DeprecationWarning, - stacklevel=2, - ) - lh5.show(lh5_file, lh5_group, attrs, indent, header) diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py index d1bddaf8..9c806da2 100644 --- a/src/lgdo/types/vectorofvectors.py +++ b/src/lgdo/types/vectorofvectors.py @@ -476,7 +476,10 @@ def _set_vector_unsafe( else: nan_val = np.nan vovutils._nb_fill( - vec, lens, nan_val, self.flattened_data.nda[start : cum_lens[-1]] + vec, + lens, + np.array([nan_val]).astype(self.flattened_data.nda.dtype), + self.flattened_data.nda[start : cum_lens[-1]], ) # add new vector(s) length to cumulative_length diff --git a/src/lgdo/types/vovutils.py b/src/lgdo/types/vovutils.py index abae760b..3a212413 100644 --- a/src/lgdo/types/vovutils.py +++ b/src/lgdo/types/vovutils.py @@ -131,7 +131,7 @@ def _nb_fill( for i, ll in enumerate(len_in): stop = start + ll if ll > max_len: - flattened_array_out[start : start + max_len] = aoa_in[i, :] + flattened_array_out[start : start + max_len] = aoa_in[i, :max_len] flattened_array_out[start + max_len : stop] = nan_val else: flattened_array_out[start:stop] = aoa_in[i, :ll] diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py index 1a61b4f3..7816128b 100644 --- a/tests/types/test_vectorofvectors.py +++ b/tests/types/test_vectorofvectors.py @@ -14,7 +14,7 @@ import lgdo from lgdo import Array, VectorOfVectors, lh5 -VovColl = namedtuple("VovColl", ["v2d", "v3d", "v4d"]) +VovColl = namedtuple("VovColl", ["v2d", "v3d", "v4d", "v2d_uint", "v2d_float"]) @pytest.fixture @@ -28,8 +28,14 @@ def testvov(): [[[5, 3], [1]]], ] ) + v2d_uint = VectorOfVectors( + [[1, 2], [3, 4, 5], [2], [4, 8, 9, 7], [5, 3, 1]], dtype="uint16" + ) + v2d_float = VectorOfVectors( + [[1, 2], [3, 4, 5], [2], [4, 8, 9, 7], [5, 3, 1]], dtype="float32" + ) - return VovColl(v2d, v3d, v4d) + return VovColl(v2d, v3d, v4d, v2d_uint, v2d_float) def test_init(testvov): @@ -381,38 +387,52 @@ def test_replace(testvov): def test_set_vector_unsafe(testvov): - testvov = testvov.v2d - - desired = [ - np.array([1, 2], dtype=testvov.dtype), - np.array([3, 4, 5], dtype=testvov.dtype), - np.array([2], dtype=testvov.dtype), - np.array([4, 8, 9, 7], dtype=testvov.dtype), - np.array([5, 3, 1], dtype=testvov.dtype), - ] - desired_aoa = np.zeros(shape=(5, 4), dtype=testvov.dtype) - desired_lens = np.array([len(arr) for arr in desired]) - - # test sequential filling - second_vov = lgdo.VectorOfVectors(shape_guess=(5, 5), dtype=testvov.dtype) - for i, arr in enumerate(desired): - second_vov._set_vector_unsafe(i, arr) - desired_aoa[i, : len(arr)] = arr - assert testvov == second_vov - - # test vectorized filling - third_vov = lgdo.VectorOfVectors(shape_guess=(5, 5), dtype=testvov.dtype) - third_vov._set_vector_unsafe(0, desired_aoa, desired_lens) - assert testvov == third_vov - - # test vectorized filling when len is longer than array - fourth_vov = lgdo.VectorOfVectors(shape_guess=(5, 5), dtype=testvov.dtype) - desired_lens[3] = 10 - fourth_vov._set_vector_unsafe(0, desired_aoa, desired_lens) - exp_entry_w_overflow = np.concatenate( - [desired[3], np.array([np.iinfo(testvov.dtype).min] * 6)] - ) - assert np.all(fourth_vov[3] == exp_entry_w_overflow) + for entry in ["v2d", "v2d_uint", "v2d_float"]: + current_testvov = testvov._asdict()[entry] + + desired = [ + np.array([1, 2], dtype=current_testvov.dtype), + np.array([3, 4, 5], dtype=current_testvov.dtype), + np.array([2], dtype=current_testvov.dtype), + np.array([4, 8, 9, 7], dtype=current_testvov.dtype), + np.array([5, 3, 1], dtype=current_testvov.dtype), + ] + desired_aoa = np.zeros(shape=(5, 4), dtype=current_testvov.dtype) + desired_lens = np.array([len(arr) for arr in desired]) + + # test sequential filling + second_vov = lgdo.VectorOfVectors( + shape_guess=(5, 5), dtype=current_testvov.dtype + ) + for i, arr in enumerate(desired): + second_vov._set_vector_unsafe(i, arr) + desired_aoa[i, : len(arr)] = arr + assert current_testvov == second_vov + + # test vectorized filling + third_vov = lgdo.VectorOfVectors( + shape_guess=(5, 5), dtype=current_testvov.dtype + ) + third_vov._set_vector_unsafe(0, desired_aoa, desired_lens) + assert current_testvov == third_vov + + # test vectorized filling when len is longer than array + fourth_vov = lgdo.VectorOfVectors( + shape_guess=(5, 5), dtype=current_testvov.dtype + ) + desired_lens[3] = 10 + fourth_vov._set_vector_unsafe(0, desired_aoa, desired_lens) + if current_testvov.dtype in ["int32", "int64", "uint16", "uint32"]: + exp_entry_w_overflow = np.concatenate( + [desired[3], np.array([np.iinfo(current_testvov.dtype).min] * 6)] + ) + else: + exp_entry_w_overflow = np.concatenate([desired[3], np.array([np.nan] * 6)]) + + assert np.all( + np.nan_to_num(fourth_vov[3], nan=0) + == np.nan_to_num(exp_entry_w_overflow, nan=0) + ) def test_iter(testvov): diff --git a/tests/types/test_vovutils.py b/tests/types/test_vovutils.py index 883f9d9d..3da10fc7 100644 --- a/tests/types/test_vovutils.py +++ b/tests/types/test_vovutils.py @@ -27,6 +27,58 @@ def testvov(): return VovColl(v2d, v3d, v4d) +def test_nb_fill(): + # test 1d array of int + aoa_in = np.arange(5, dtype="int32").reshape(1, 5) + len_in = np.array([5]) + nan_val = np.array([0], dtype=aoa_in.dtype) + flattened_array_out = np.empty(5, dtype=aoa_in.dtype) + + vovutils._nb_fill(aoa_in, len_in, nan_val, flattened_array_out) + assert np.array_equal( + flattened_array_out, np.array([0, 1, 2, 3, 4], dtype=aoa_in.dtype) + ) + # test 1d array of uint + aoa_in = np.arange(5, dtype="uint16").reshape(1, 5) + len_in = np.array([5]) + nan_val = np.array([0], dtype=aoa_in.dtype) + flattened_array_out = np.empty(5, dtype=aoa_in.dtype) + + vovutils._nb_fill(aoa_in, len_in, nan_val, flattened_array_out) + assert np.array_equal( + flattened_array_out, np.array([0, 1, 2, 3, 4], dtype=aoa_in.dtype) + ) + # test 1d array of float + aoa_in = np.arange(5, dtype="float32").reshape(1, 5) + len_in = np.array([5]) + nan_val = np.array([0], dtype=aoa_in.dtype) + flattened_array_out = np.empty(5, dtype=aoa_in.dtype) + + vovutils._nb_fill(aoa_in, len_in, nan_val, flattened_array_out) + assert np.array_equal( + flattened_array_out, np.array([0, 1, 2, 3, 4], dtype=aoa_in.dtype) + ) + # test 2d array of int + aoa_in = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32") + len_in = np.array([3, 3]) + nan_val = np.array([0], dtype=aoa_in.dtype) + flattened_array_out = np.empty(6, dtype=aoa_in.dtype) + vovutils._nb_fill(aoa_in, len_in, nan_val, flattened_array_out) + assert np.array_equal( + flattened_array_out, + np.array([1, 2, 3, 4, 5, 6], dtype=aoa_in.dtype), + ) + # test nan value addition + aoa_in = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32") + len_in = np.array([4, 3]) + flattened_array_out = np.empty(7, dtype=aoa_in[0].dtype) + vovutils._nb_fill(aoa_in, len_in, nan_val, flattened_array_out) + assert np.array_equal( + flattened_array_out, + np.array([1, 2, 3, 0, 4, 5, 6], dtype=aoa_in[0].dtype), + ) + + def test_ak_input_validity(testvov): for v in testvov: assert vovutils._ak_is_jagged(v) is True