Skip to content

Commit 0732ea9

Browse files
authored
support for "no unit" values (#125)
* add new "no unit" values * remove the integer "no unit" value * add tests to check whether overriding a unit works * add None back * separate the roles for None * allow "no unit" values in quantify * remove obsolete TODO comments Creating a pint.Unit instance directly will create it from the default registry, so the first and second TODO comments actually referred to the same issue. We also decided to have specific packages (i.e. cf-xarray) provide the unit registry. * update the comment about the type check * also check the non-dimension coordinate * docs fix * document the "no unit" values * update whats-new.rst * remove the fill_value parameter again * override with a different unit * add tests to make sure "no unit" values on attrs won't override * only return the attrs' no unit value if units got the default value * fix the test * don't use "1" as a no unit value * use pytest.raises instead of getattr with a default * add examples demonstrating the use the "no unit" values * fix the docs [skip-ci]
1 parent eb2377a commit 0732ea9

File tree

4 files changed

+98
-28
lines changed

4 files changed

+98
-28
lines changed

docs/whats-new.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ What's new
55

66
0.3 (*unreleased*)
77
------------------
8-
8+
- allow special "no unit" values in :py:meth:`Dataset.pint.quantify` and
9+
:py:meth:`DataArray.pint.quantify` (:pull:`125`)
10+
By `Justus Magin <https://github.com/keewis>`_.
911

1012
0.2 (May 10 2021)
1113
-----------------

pint_xarray/accessors.py

Lines changed: 59 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
from xarray.core.dtypes import NA
99

1010
from . import conversion
11+
from .conversion import no_unit_values
1112
from .errors import format_error_message
1213

14+
_default = object()
15+
1316

1417
def setup_registry(registry):
1518
"""set up the given registry for use with pint_xarray
@@ -91,7 +94,7 @@ def units_to_str_or_none(mapping, unit_format):
9194
# based on xarray.core.utils.either_dict_or_kwargs
9295
# https://github.com/pydata/xarray/blob/v0.15.1/xarray/core/utils.py#L249-L268
9396
def either_dict_or_kwargs(positional, keywords, method_name):
94-
if positional is not None:
97+
if positional not in (_default, None):
9598
if not is_dict_like(positional):
9699
raise ValueError(
97100
f"the first argument to .{method_name} must be a dictionary"
@@ -131,17 +134,16 @@ def get_registry(unit_registry, new_units, existing_units):
131134

132135

133136
def _decide_units(units, registry, unit_attribute):
134-
if units is None and unit_attribute is None:
137+
if units is _default and unit_attribute is _default:
135138
# or warn and return None?
136139
raise ValueError("no units given")
137-
elif units is None:
138-
# TODO option to read and decode units according to CF conventions (see MetPy)?
140+
elif units in no_unit_values or isinstance(units, Unit):
141+
# TODO what happens if they pass in a Unit from a different registry
142+
return units
143+
elif units is _default:
144+
if unit_attribute in no_unit_values:
145+
return unit_attribute
139146
units = registry.parse_units(unit_attribute)
140-
elif isinstance(units, Unit):
141-
# TODO do we have to check what happens if someone passes a Unit instance
142-
# without creating a unit registry?
143-
# TODO and what happens if they pass in a Unit from a different registry
144-
pass
145147
else:
146148
units = registry.parse_units(units)
147149
return units
@@ -243,7 +245,7 @@ class PintDataArrayAccessor:
243245
def __init__(self, da):
244246
self.da = da
245247

246-
def quantify(self, units=None, unit_registry=None, **unit_kwargs):
248+
def quantify(self, units=_default, unit_registry=None, **unit_kwargs):
247249
"""
248250
Attach units to the DataArray.
249251
@@ -269,7 +271,7 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
269271
pint.Unit, will be used as the DataArray's units. If a
270272
dict-like, it should map a variable name to the desired
271273
unit (use the DataArray's name to refer to its data). If
272-
not provided, will try to read them from
274+
not provided, ``quantify`` will try to read them from
273275
``DataArray.attrs['units']`` using pint's parser. The
274276
``"units"`` attribute will be removed from all variables
275277
except from dimension coordinates.
@@ -285,6 +287,11 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
285287
DataArray whose wrapped array data will now be a Quantity
286288
array with the specified units.
287289
290+
Notes
291+
-----
292+
``"none"`` and ``None`` can be used to mark variables that should not
293+
be quantified.
294+
288295
Examples
289296
--------
290297
>>> da = xr.DataArray(
@@ -297,6 +304,18 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
297304
<Quantity([0.4 0.9 1.7 4.8 3.2 9.1], 'hertz')>
298305
Coordinates:
299306
* wavelength (wavelength) float64 0.0001 0.0002 0.0004 0.0006 0.001 0.002
307+
308+
Don't quantify the data:
309+
310+
>>> da = xr.DataArray(
311+
... data=[0.4, 0.9],
312+
... dims=["wavelength"],
313+
... attrs={"units": "Hz"},
314+
... )
315+
>>> da.pint.quantify(units=None)
316+
<xarray.DataArray (wavelength: 2)>
317+
array([0.4, 0.9])
318+
Dimensions without coordinates: wavelength
300319
"""
301320

302321
if isinstance(self.da.data, Quantity):
@@ -305,7 +324,7 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
305324
f"already has units {self.da.data.units}"
306325
)
307326

308-
if isinstance(units, (str, pint.Unit)):
327+
if units is None or isinstance(units, (str, pint.Unit)):
309328
if self.da.name in unit_kwargs:
310329
raise ValueError(
311330
f"ambiguous values given for {repr(self.da.name)}:"
@@ -320,15 +339,15 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
320339

321340
unit_attrs = conversion.extract_unit_attributes(self.da)
322341

323-
possible_new_units = zip_mappings(units, unit_attrs)
342+
possible_new_units = zip_mappings(units, unit_attrs, fill_value=_default)
324343
new_units = {}
325344
invalid_units = {}
326345
for name, (unit, attr) in possible_new_units.items():
327-
if unit is not None or attr is not None:
346+
if unit is not _default or attr is not _default:
328347
try:
329348
new_units[name] = _decide_units(unit, registry, attr)
330349
except (ValueError, pint.UndefinedUnitError) as e:
331-
if unit is not None:
350+
if unit is not _default:
332351
type = "parameter"
333352
reported_unit = unit
334353
else:
@@ -880,7 +899,7 @@ class PintDatasetAccessor:
880899
def __init__(self, ds):
881900
self.ds = ds
882901

883-
def quantify(self, units=None, unit_registry=None, **unit_kwargs):
902+
def quantify(self, units=_default, unit_registry=None, **unit_kwargs):
884903
"""
885904
Attach units to the variables of the Dataset.
886905
@@ -905,10 +924,10 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
905924
units : mapping of hashable to unit-like, optional
906925
Physical units to use for particular DataArrays in this
907926
Dataset. It should map variable names to units (unit names
908-
or ``pint.Unit`` objects). If not provided, will try to
909-
read them from ``Dataset[var].attrs['units']`` using
910-
pint's parser. The ``"units"`` attribute will be removed
911-
from all variables except from dimension coordinates.
927+
or ``pint.Unit`` objects). If not provided, ``quantify``
928+
will try to read them from ``Dataset[var].attrs['units']``
929+
using pint's parser. The ``"units"`` attribute will be
930+
removed from all variables except from dimension coordinates.
912931
unit_registry : pint.UnitRegistry, optional
913932
Unit registry to be used for the units attached to each
914933
DataArray in this Dataset. If not given then a default
@@ -922,6 +941,11 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
922941
Dataset whose variables will now contain Quantity arrays
923942
with units.
924943
944+
Notes
945+
-----
946+
``"none"`` and ``None`` can be used to mark variables
947+
that should not be quantified.
948+
925949
Examples
926950
--------
927951
>>> ds = xr.Dataset(
@@ -947,21 +971,33 @@ def quantify(self, units=None, unit_registry=None, **unit_kwargs):
947971
Data variables:
948972
a (x) int64 [m] 0 3 2
949973
b (x) int64 [dm] 5 -2 1
974+
975+
Don't quantify specific variables:
976+
977+
>>> ds.pint.quantify({"a": None})
978+
<xarray.Dataset>
979+
Dimensions: (x: 3)
980+
Coordinates:
981+
* x (x) int64 0 1 2
982+
u (x) int64 [s] -1 0 1
983+
Data variables:
984+
a (x) int64 0 3 2
985+
b (x) int64 5 -2 1
950986
"""
951987
units = either_dict_or_kwargs(units, unit_kwargs, "quantify")
952988
registry = get_registry(unit_registry, units, conversion.extract_units(self.ds))
953989

954990
unit_attrs = conversion.extract_unit_attributes(self.ds)
955991

956-
possible_new_units = zip_mappings(units, unit_attrs)
992+
possible_new_units = zip_mappings(units, unit_attrs, fill_value=_default)
957993
new_units = {}
958994
invalid_units = {}
959995
for name, (unit, attr) in possible_new_units.items():
960-
if unit is not None or attr is not None:
996+
if unit is not _default or attr is not _default:
961997
try:
962998
new_units[name] = _decide_units(unit, registry, attr)
963999
except (ValueError, pint.UndefinedUnitError) as e:
964-
if unit is not None:
1000+
if unit is not _default:
9651001
type = "parameter"
9661002
reported_unit = unit
9671003
else:

pint_xarray/conversion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from .errors import format_error_message
77

8+
no_unit_values = ("none", None)
89
unit_attribute_name = "units"
910
slice_attributes = ("start", "stop", "step")
1011

@@ -23,8 +24,7 @@ def array_attach_units(data, unit):
2324
-------
2425
quantity : pint.Quantity
2526
"""
26-
27-
if unit is None:
27+
if unit in no_unit_values:
2828
return data
2929

3030
if not isinstance(unit, pint.Unit):

pint_xarray/tests/test_accessors.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ def example_quantity_da():
6565
class TestQuantifyDataArray:
6666
def test_attach_units_from_str(self, example_unitless_da):
6767
orig = example_unitless_da
68-
result = orig.pint.quantify("m")
68+
result = orig.pint.quantify("s")
6969
assert_array_equal(result.data.magnitude, orig.data)
7070
# TODO better comparisons for when you can't access the unit_registry?
71-
assert str(result.data.units) == "meter"
71+
assert str(result.data.units) == "second"
7272

7373
def test_attach_units_given_registry(self, example_unitless_da):
7474
orig = example_unitless_da
@@ -86,13 +86,30 @@ def test_attach_units_from_attrs(self, example_unitless_da):
8686
remaining_attrs = conversion.extract_unit_attributes(result)
8787
assert {k: v for k, v in remaining_attrs.items() if v is not None} == {}
8888

89+
def test_attach_units_from_str_attr_no_unit(self, example_unitless_da):
90+
orig = example_unitless_da
91+
orig.attrs["units"] = "none"
92+
result = orig.pint.quantify("m")
93+
assert_array_equal(result.data.magnitude, orig.data)
94+
assert str(result.data.units) == "meter"
95+
8996
def test_attach_units_given_unit_objs(self, example_unitless_da):
9097
orig = example_unitless_da
9198
ureg = UnitRegistry(force_ndarray=True)
9299
result = orig.pint.quantify(ureg.Unit("m"), unit_registry=ureg)
93100
assert_array_equal(result.data.magnitude, orig.data)
94101
assert result.data.units == ureg.Unit("m")
95102

103+
@pytest.mark.parametrize("no_unit_value", conversion.no_unit_values)
104+
def test_override_units(self, example_unitless_da, no_unit_value):
105+
orig = example_unitless_da
106+
result = orig.pint.quantify(no_unit_value, u=no_unit_value)
107+
108+
with pytest.raises(AttributeError):
109+
result.data.units
110+
with pytest.raises(AttributeError):
111+
result["u"].data.units
112+
96113
def test_error_when_already_units(self, example_quantity_da):
97114
da = example_quantity_da
98115
with raises_regex(ValueError, "already has units"):
@@ -247,6 +264,21 @@ def test_attach_units_given_unit_objs(self, example_unitless_ds):
247264
assert_array_equal(result["users"].data.magnitude, orig["users"].data)
248265
assert str(result["users"].data.units) == "dimensionless"
249266

267+
def test_attach_units_from_str_attr_no_unit(self, example_unitless_ds):
268+
orig = example_unitless_ds
269+
orig["users"].attrs["units"] = "none"
270+
result = orig.pint.quantify({"users": "m"})
271+
assert_array_equal(result["users"].data.magnitude, orig["users"].data)
272+
assert str(result["users"].data.units) == "meter"
273+
274+
@pytest.mark.parametrize("no_unit_value", conversion.no_unit_values)
275+
def test_override_units(self, example_unitless_ds, no_unit_value):
276+
orig = example_unitless_ds
277+
result = orig.pint.quantify({"users": no_unit_value})
278+
assert (
279+
getattr(result["users"].data, "units", "not a quantity") == "not a quantity"
280+
)
281+
250282
def test_error_when_already_units(self, example_quantity_ds):
251283
with raises_regex(ValueError, "already has units"):
252284
example_quantity_ds.pint.quantify({"funds": "pounds"})

0 commit comments

Comments
 (0)