Skip to content

Commit f6fb8ef

Browse files
authored
Merge pull request #207 from legend-exp/units
Support forwarding `units` ak.Array parameters to LGDO attributes
2 parents 21108bd + 61ba067 commit f6fb8ef

File tree

6 files changed

+120
-21
lines changed

6 files changed

+120
-21
lines changed

src/lgdo/types/array.py

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -40,20 +40,19 @@ class Array(LGDOCollection):
4040

4141
def __init__(
4242
self,
43-
nda: np.ndarray = None,
43+
nda: np.ndarray | ak.Array | None = None,
4444
shape: tuple[int, ...] = (),
45-
dtype: np.dtype = None,
45+
dtype: np.dtype | None = None,
4646
fill_val: float | int | None = None,
4747
attrs: dict[str, Any] | None = None,
4848
) -> None:
4949
"""
5050
Parameters
5151
----------
5252
nda
53-
An :class:`numpy.ndarray` to be used for this object's internal
54-
array. Note: the array is used directly, not copied. If not
55-
supplied, internal memory is newly allocated based on the shape and
56-
dtype arguments.
53+
An :class:`numpy.ndarray` or :class:`ak.Array` to be used for this
54+
object's internal array. If the Awkward array carries a ``units``
55+
parameter, it will forwarded as LGDO attribute.
5756
shape
5857
A numpy-format shape specification for shape of the internal
5958
ndarray. Required if `nda` is ``None``, otherwise unused.
@@ -65,7 +64,21 @@ def __init__(
6564
the array is allocated with all elements set to the corresponding
6665
fill value. If `nda` is not ``None``, this parameter is ignored.
6766
attrs
68-
A set of user attributes to be carried along with this LGDO.
67+
A set of user attributes to be carried along with this LGDO. These
68+
attributes have always precedence over all the others (e.g. those
69+
carried by `nda`).
70+
71+
Warning
72+
-------
73+
This constructor has partial units support. It supports fishing `units`
74+
from Awkward Array parameters but not (yet) from e.g. NumPy+Pint
75+
arrays. In any case, the user can always attach units later by
76+
modifying the dictionary held by :attr:`attrs`.
77+
78+
Note
79+
----
80+
The array is used directly, not copied. If not supplied, internal
81+
memory is newly allocated based on the shape and dtype arguments.
6982
"""
7083
if nda is None:
7184
if fill_val is None:
@@ -75,6 +88,26 @@ def __init__(
7588
else:
7689
nda = np.full(shape, fill_val, dtype=dtype)
7790

91+
elif isinstance(nda, ak.Array):
92+
# units: we don't just forward all ak parameters, there might be
93+
# some weird thing in there
94+
units = ak.parameters(nda).get("units", None)
95+
if units is not None:
96+
if attrs is None:
97+
attrs = {}
98+
99+
# give precedence to the user units
100+
attrs = {"units": units} | attrs
101+
102+
if nda.type.content.parameters.get("__array__") == "string":
103+
# Variable length strings aren't quite up to snuff yet, so pad the
104+
# fixed-width string length in case we want to update the array
105+
# TODO: numpy 2.2.5 fixes this; but it required python v3.10 or higher
106+
s_len = np.max(ak.num(nda))
107+
nda = np.array(nda, dtype=f"<U{s_len * 2}")
108+
else:
109+
nda = ak.to_numpy(nda) # this is zero-copy
110+
78111
elif isinstance(nda, Array):
79112
nda = nda.nda
80113

src/lgdo/types/table.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,24 @@ def __init__(
6464
instantiate this table using the supplied mapping of column names
6565
and array-like objects. Supported input types are: mapping of
6666
strings to LGDOCollections, :class:`pd.DataFrame` and :class:`ak.Array`.
67-
Note 1: no copy is performed, the objects are used directly (unless
68-
:class:`ak.Array` is provided). Note 2: if `size` is not ``None``,
69-
all arrays will be resized to match it. Note 3: if the arrays have
70-
different lengths, all will be resized to match the length of the
71-
first array.
7267
attrs
7368
A set of user attributes to be carried along with this LGDO.
7469
7570
Notes
7671
-----
77-
the :attr:`loc` attribute is initialized to 0.
72+
- The :attr:`loc` attribute is initialized to 0.
73+
- No copy is performed, the objects are used directly (unless
74+
:class:`ak.Array` is provided).
75+
- If `size` is not ``None``, all arrays will be resized to match it.
76+
- If the arrays have different lengths, all will be resized to match the
77+
length of the first array.
78+
79+
Warning
80+
-------
81+
This constructor has partial units support. It supports fishing `units`
82+
from Awkward Array parameters but not (yet) from e.g. NumPy+Pint
83+
arrays. In any case, the user can always attach units later by
84+
modifying the dictionary held by :attr:`attrs`.
7885
"""
7986
if isinstance(col_dict, pd.DataFrame):
8087
col_dict = {k: Array(v) for k, v in col_dict.items()}
@@ -624,12 +631,13 @@ def view_as(
624631
def _ak_to_lgdo_or_col_dict(array: ak.Array):
625632
if isinstance(array.type.content, ak.types.RecordType):
626633
return {field: _ak_to_lgdo_or_col_dict(array[field]) for field in array.fields}
627-
if array.type.content.parameters.get("__array__") == "string":
628-
# Variable length strings aren't quite up to snuff yet, so pad the
629-
# fixed-width string length in case we want to update the array
630-
# TODO: numpy 2.2.5 fixes this; but it required python v3.10 or higher
631-
s_len = np.max(ak.num(array))
632-
return Array(np.array(array, dtype=f"<U{s_len * 2}"))
633-
if isinstance(array.type.content, ak.types.NumpyType):
634-
return Array(ak.to_numpy(array))
634+
635+
# be smart and just use Array when it makes sense
636+
if (
637+
isinstance(array.type.content, ak.types.NumpyType)
638+
or array.type.content.parameters.get("__array__") == "string"
639+
):
640+
return Array(array)
641+
642+
# otherwise fallback to VoV
635643
return VectorOfVectors(array)

src/lgdo/types/vectorofvectors.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,16 @@ def __init__(
111111
if not isinstance(data, ak.Array):
112112
data = ak.Array(data)
113113

114+
# units: we don't just forward all ak parameters, there might be
115+
# some weird thing in there
116+
units = ak.parameters(data).get("units", None)
117+
if units is not None:
118+
if attrs is None:
119+
attrs = {}
120+
121+
# give precedence to the user units
122+
attrs = {"units": units} | attrs
123+
114124
if data.ndim < 2:
115125
# treat as a single-row VoV
116126
data = ak.Array([data])

tests/types/test_array.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,20 @@ def test_init():
2828
assert array.attrs == attrs | {"datatype": "array<1>{real}"}
2929

3030

31+
def test_ak_init():
32+
orig = ak.with_parameter([1, 2, 3, 4], "units", "mm")
33+
assert ak.parameters(orig)["units"] == "mm"
34+
35+
array = Array(orig)
36+
assert isinstance(array.nda, np.ndarray)
37+
assert array.attrs["units"] == "mm"
38+
assert (array.nda == orig.to_numpy()).all()
39+
40+
# check that this is a view
41+
array.nda[1] = -1
42+
assert orig[1] == -1
43+
44+
3145
def test_resize_and_capacity():
3246
array = Array(nda=np.array([1, 2, 3, 4]))
3347
assert array.get_capacity() == 4
@@ -103,3 +117,8 @@ def test_pickle():
103117
assert ex.attrs["attr1"] == 1
104118
assert ex.attrs["datatype"] == obj.attrs["datatype"]
105119
assert np.all(ex.nda == np.array([1, 2, 3, 4]))
120+
121+
122+
def test_string_array():
123+
array = Array(ak.Array(["e", "sticazzi", "non", "ce", "li", "metti?"]))
124+
assert array.dtype == "<U16"

tests/types/test_table.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def test_ak_array_init():
6363
"a": [1, 2, 3, 4],
6464
"b": [[1, 2], [3], [4], [5, 6, 7]],
6565
"c": {"f1": [[], [5], [3, 7, 6], []], "f2": [5, 6, 7, 8]},
66+
"d": ["boh", "hello", "there", "!"],
6667
}
6768
)
6869
tbl = Table(array)
@@ -73,6 +74,28 @@ def test_ak_array_init():
7374
assert isinstance(tbl.c.f2, lgdo.Array)
7475

7576

77+
def test_ak_array_init_attrs():
78+
array = ak.Array(
79+
{
80+
"a": ak.with_parameter([1, 2, 3, 4], "units", "mm"),
81+
"b": ak.with_parameter([[1, 2], [3], [4], [5, 6, 7]], "units", "keV"),
82+
"c": {
83+
"f1": [[], [5], [3, 7, 6], []],
84+
"f2": ak.with_parameter([5, 6, 7, 8], "units", "C"),
85+
},
86+
}
87+
)
88+
tbl = Table(array)
89+
assert isinstance(tbl.a, lgdo.Array)
90+
assert tbl.a.attrs["units"] == "mm"
91+
assert isinstance(tbl.b, lgdo.VectorOfVectors)
92+
assert tbl.b.attrs["units"] == "keV"
93+
assert isinstance(tbl.c, Table)
94+
assert isinstance(tbl.c.f1, lgdo.VectorOfVectors)
95+
assert isinstance(tbl.c.f2, lgdo.Array)
96+
assert tbl.c.f2.attrs["units"] == "C"
97+
98+
7699
def test_datatype_name():
77100
tbl = Table()
78101
assert tbl.datatype_name() == "table"

tests/types/test_vectorofvectors.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,12 @@ def test_init(testvov):
100100
)
101101

102102

103+
def test_init_with_units():
104+
v = ak.with_parameter([[1], [2, 3]], "units", "mm")
105+
vov = VectorOfVectors(v)
106+
assert vov.attrs["units"] == "mm"
107+
108+
103109
def test_eq(testvov):
104110
assert testvov.v2d == VectorOfVectors(
105111
[[1, 2], [3, 4, 5], [2], [4, 8, 9, 7], [5, 3, 1]]

0 commit comments

Comments
 (0)