diff --git a/lib/iris/__init__.py b/lib/iris/__init__.py index fc83615a40..a7c9778e4b 100644 --- a/lib/iris/__init__.py +++ b/lib/iris/__init__.py @@ -183,6 +183,7 @@ def __init__( date_microseconds=False, derived_bounds=False, lam_pole_offset=False, + use_cfpint_units=False, ): """Container for run-time options controls. @@ -228,6 +229,11 @@ def __init__( to a PP file will set the pole longitude (PP field ``bplon``) to 180.0 degrees if the grid is defined on a standard pole. Does not affect global or rotated-pole domains. + use_cfpint_units : bool, default=False + When True, units attributes will be created as :class:`cfpint.Unit` by + default. At present you can still assign class:`cf_units.Unit` objects + explicitly, and either may be used. However, support for cf_units will + eventually be retired. """ # The flag 'example_future_flag' is provided as a reference for the @@ -243,6 +249,7 @@ def __init__( self.__dict__["date_microseconds"] = date_microseconds self.__dict__["derived_bounds"] = derived_bounds self.__dict__["lam_pole_offset"] = lam_pole_offset + self.__dict__["use_cfpint_units"] = use_cfpint_units # TODO: next major release: set IrisDeprecation to subclass # DeprecationWarning instead of UserWarning. diff --git a/lib/iris/common/mixin.py b/lib/iris/common/mixin.py index 4cb176dc2f..bcdc7f5f01 100644 --- a/lib/iris/common/mixin.py +++ b/lib/iris/common/mixin.py @@ -9,10 +9,25 @@ from collections.abc import Mapping from datetime import timedelta from functools import wraps -from typing import Any +from typing import Any, TypeAlias import warnings -import cf_units +import cftime + +from iris.util import is_masked + +# TODO: use a FUTURE flag to control this +try: + import cf_units +except ImportError: + cf_units = None + +try: + import cfpint + import pint +except ImportError: + cfpint = None + import numpy as np import iris.std_names @@ -22,6 +37,10 @@ __all__ = ["CFVariableMixin", "LimitedAttributeDict"] +if cfpint is None and cf_units is None: + raise ImportError("Either 'cfpint' and or 'cf_units' must be installed.") + + def _get_valid_standard_name(name): # Standard names are optionally followed by a standard name # modifier, separated by one or more blank spaces @@ -152,66 +171,394 @@ def update(self, other, **kwargs): dict.update(self, other, **kwargs) -class Unit(cf_units.Unit): - # TODO: remove this subclass once FUTURE.date_microseconds is removed. - - @classmethod - def from_unit(cls, unit: cf_units.Unit): - """Cast a :class:`cf_units.Unit` to an :class:`Unit`.""" - if isinstance(unit, Unit): - result = unit - elif isinstance(unit, cf_units.Unit): - result = cls.__new__(cls) - result.__dict__.update(unit.__dict__) - else: - message = f"Expected a cf_units.Unit, got {type(unit)}" - raise TypeError(message) - return result +if cf_units is not None: - def num2date( - self, - time_value, - only_use_cftime_datetimes=True, - only_use_python_datetimes=False, - ): - # Used to patch the cf_units.Unit.num2date method to round to the - # nearest second, which was the legacy behaviour. This is under a FUTURE - # flag - users will need to adapt to microsecond precision eventually, - # which may involve floating point issues. - from iris import FUTURE + class CfUnit(cf_units.Unit): + # TODO: remove this subclass once FUTURE.date_microseconds is removed. - def _round(date): - if date.microsecond == 0: - return date - elif date.microsecond < 500000: - return date - timedelta(microseconds=date.microsecond) + @classmethod + def from_unit(cls, unit: cf_units.Unit): + """Cast a :class:`cf_units.Unit` to an :class:`Unit`.""" + if isinstance(unit, CfUnit): + result = unit + elif isinstance(unit, cf_units.Unit): + result = cls.__new__(cls) + result.__dict__.update(unit.__dict__) else: - return ( - date - + timedelta(seconds=1) - - timedelta(microseconds=date.microsecond) + message = f"Expected a cf_units.Unit, got {type(unit)}" + raise TypeError(message) + return result + + def num2date( + self, + time_value, + only_use_cftime_datetimes=True, + only_use_python_datetimes=False, + ): + # Used to patch the cf_units.Unit.num2date method to round to the + # nearest second, which was the legacy behaviour. This is under a FUTURE + # flag - users will need to adapt to microsecond precision eventually, + # which may involve floating point issues. + from iris import FUTURE + + def _round(date): + if date.microsecond == 0: + return date + elif date.microsecond < 500000: + return date - timedelta(microseconds=date.microsecond) + else: + return ( + date + + timedelta(seconds=1) + - timedelta(microseconds=date.microsecond) + ) + + result = super().num2date( + time_value, only_use_cftime_datetimes, only_use_python_datetimes + ) + if FUTURE.date_microseconds is False: + message = ( + "You are using legacy date precision for Iris units - max " + "precision is seconds. In future, Iris will use microsecond " + "precision - available since cf-units version 3.3 - which may " + "affect core behaviour. To opt-in to the " + "new behaviour, set `iris.FUTURE.date_microseconds = True`." ) + warnings.warn(message, category=FutureWarning) - result = super().num2date( - time_value, only_use_cftime_datetimes, only_use_python_datetimes - ) - if FUTURE.date_microseconds is False: - message = ( - "You are using legacy date precision for Iris units - max " - "precision is seconds. In future, Iris will use microsecond " - "precision - available since cf-units version 3.3 - which may " - "affect core behaviour. To opt-in to the " - "new behaviour, set `iris.FUTURE.date_microseconds = True`." - ) - warnings.warn(message, category=FutureWarning) + if hasattr(result, "shape"): + vfunc = np.frompyfunc(_round, 1, 1) + result = vfunc(result) + else: + result = _round(result) - if hasattr(result, "shape"): - vfunc = np.frompyfunc(_round, 1, 1) - result = vfunc(result) + return result + + +if cfpint is not None: + + class CfpintUnit(cfpint.Unit): + """Specialisation of cfpint.Unit adding methods for Iris convenience. + + We subclass the basic cfpint.Unit and add convenience operations. + Most of this is, effectively, for backwards compatibility with cf_units. + But we probably want to avoid publicising that ! + + In the future, it would be great to drop most of this, but we probably will + continue to need to support the Iris-internal unit categories "unknown" and + "no_unit". + Ideally, the other methods can progressively be dropped by making Iris code + pint-aware. But this is awkward whilst we are still supporting both. + """ + + # TODO: ideally we would get rid of this class altogether. + @classmethod + def from_unit(cls, unit): + """Cast anything into the standard Unit class for use within Iris. + + Unit may be a string, + """ + if isinstance(unit, CfpintUnit): + result = unit + elif isinstance(unit, cf_units.Unit): + # We need a special case for cf_unit conversion. + # Although we fallback to str() for native Pint units ('else' below), + # we can't do that for cf-units **because the str() omits calendars**. + result = CfpintUnit(str(unit), calendar=unit.calendar) + elif unit is None: + # A special case, so we can support "None" -> "unknown" for object + # creation with no given units. + result = CfpintUnit("unknown") else: - result = _round(result) + # E.G. probably a string, or a native Pint unit: take the str() + result = CfpintUnit(str(unit)) + return result + + _IRIS_EXTRA_CATEGORIES = { + "unknown": ["unknown", "?", ""], + "no_unit": ["no-unit", "no_unit", "-"], + } + + def __init__(self, *args, **kwargs): + self.category = "regular" + if args and (arg := args[0]) is None or isinstance(arg, str): + # Catch + transform "extra" special-category cases. + if arg is None: + arg = "" + arg = arg.lower() + for name, matches in self._IRIS_EXTRA_CATEGORIES.items(): + if arg in matches: + self.category = name + arg = "1" # this is how we do it... + if self.category != "regular": + # Replace args[0] + args = tuple([arg] + list(args[1:])) + super().__init__(*args, **kwargs) + self.calendar = self.calendar_string + + def __str__(self): + # N.B. cfpint.Unit.__repr__ is based on __str__, so we only overload this. + if self.category != "regular": + result = self.category + else: + result = super().__str__() + if self.is_datelike() or self.is_time(): + # Recognise short time units + replace with long forms + # -- weird, as most Pint units work "the other way", + # e.g. "m" -> "metre" ! + # -- but probably due to the cfxarray-like "short_formatter" + # - see "cfpint._cfarray_units_like.short_formatter" + # TODO: this should probably be fixed **in cfpint** ? + result = self._make_unitstr_cftimelike(result) + return result + + # remove <> from reprs, since CDL seems to use this + # (? so calendars are recorded, due to not appearing in str(date-unit) ?) + # TODO: remove this + _REPR_NO_LTGT = True + + def __repr__(self): + """Correct the repr. + + For fuller backwards-compatibility with cf_units, + mostly because assert_CML (i.e. the xml methods) need it. + TODO: remove this + """ + if self.category != "regular": + result = f"" + elif self.dimensionless: + # Cfpint fixes this for "str" but not "repr" + result = f"" + else: + result = super().__repr__() + + if self._REPR_NO_LTGT: + # Just strip off the "<>" wrapping. Result should then be equivalent + if len(result) and result[0] == "<": + result = result[1:] + if len(result) and result[-1] == ">": + result = result[:-1] + + if self.is_datelike() or self.is_time(): + # TODO: this should probably be fixed **in cfpint** ? + result = self._make_unitstr_cftimelike(result) + return result + + def convert(self, arraylike, other): + is_masked = np.ma.isMaskedArray(arraylike) + if is_masked: + arraylike = arraylike.data + quantity = arraylike * self + quantity = quantity.to(str(other)) + # TODO: I *think* this is the appropriate way to strip the units. + result = quantity.m + if is_masked: + result = np.ma.masked_array(result, arraylike.mask) + return result + + def _make_unitstr_cftimelike(self, units: str) -> str: + """Make a unit string cftime-compatible.""" + # Some kludges needed for now! + # TODO: to aid use of cftime, this fix **should be in cfpint*** + # - ideally, fix how cfpint units represent h/m/s/d + # - if *not* fixed in basic units registry, at least fix str(), as here + reps = {"s": "seconds", "m": "minutes", "h": "hours", "d": "days"} + for char, name in reps.items(): + if units == char: + units = name + elif units.startswith(char + " "): + units = units.replace(char + " ", name + " ", 1) + return units + + def num2date( + self, + time_value, + only_use_cftime_datetimes=True, + only_use_python_datetimes=False, + ) -> np.ndarray: + """Convert numeric time value(s) to datetimes (1 second resolution). + + The units of the numeric time value are described by the unit and its + ``.calendar`` property. The returned datetime object(s) represent UTC with + no time-zone offset, even if the specified unit contain a time-zone + offset. + + The current unit must be of the form: + ' since ' + e.g. 'hours since 1970-01-01 00:00:00' + + By default, the datetime instances returned are ``cftime.datetime`` objects, + regardless of calendar. If both ``only_use_cftime_datetimes`` and + ``only_use_python_datetimes`` keywords are set ``False``, then the return + type depends on date : they are datetime.datetime objects if the date falls + in the Gregorian calendar (i.e. calendar is + 'proleptic_gregorian', 'standard' or 'gregorian' **and** the date is after + 1582-10-15); **otherwise** they are ``cftime.datetime`` objects. + The datetime instances do not contain a time-zone offset, even if the + specified unit contains one. + + Works for scalars, sequences and numpy arrays. Returns a scalar + if input is a scalar, else returns a numpy array. + + Args: + + * time_value (float or arraylike): + Numeric time value/s. Maximum resolution is 1 second. + + Kwargs: + + * only_use_cftime_datetimes (bool): + If True, will always return cftime datetime objects, regardless of + calendar. If False, returns datetime.datetime instances where + possible. Defaults to True. + + * only_use_python_datetimes (bool): + If True, will always return datetime.datetime instances where + possible, and raise an exception if not. Ignored if + only_use_cftime_datetimes is True. Defaults to False. + + Returns + ------- + datetime, or numpy.ndarray of datetime object. + Either Python ``datetime.datetime`` or ``cftime.datetime`` are possible. + + Notes + ----- + This mimic what we already have in Iris, derived from cf_units behaviour. + But here, it is explicitly re-implemented using only cftime. + Ultimately, we will lose this, and users should use cftime explicitly. + """ + if not self.is_datelike(): + raise ValueError(f"Called 'num2date' on a non-datelike unit: {self!r}.") + units_str = str(self) + # TODO: this should probably be fixed **in cfpint** ? + units_str = self._make_unitstr_cftimelike(units_str) + calendar = self.calendar + if calendar is None: + calendar = "standard" + result = cftime.num2date( + time_value, + units=units_str, + calendar=calendar, + only_use_cftime_datetimes=only_use_cftime_datetimes, + only_use_python_datetimes=only_use_python_datetimes, + ) + return result + + def date2num(self, date): + """Convert datetime(s) to the numeric time (offset) values. + + Calculated from the datetime objects using the current unit, including its + ``calendar_string`` property. + + The current unit must be of the form: + ' since ' + e.g. 'hours since 1970-01-01 00:00:00' + + Works for scalars, sequences and numpy arrays. Returns a scalar + if input is a scalar, else returns a numpy array. - return result + Return type will be of type `integer` if (all) the times can be + encoded exactly as an integer with the specified units, + otherwise a float type will be returned. + + Args: + + * date (datetime): + A datetime object or a sequence of datetime objects. + The datetime objects should not include a time-zone offset. + Both Python ``datetime.datetime`` and ``cftime.datetime`` are supported. + + Returns + ------- + float/integer or numpy.ndarray of floats/integers + + Notes + ----- + This mimics what we already have in Iris, derived from cf_units behaviour. + But here, it is explicitly re-implemented using only cftime. + Ultimately, we will lose this, and users should use cftime explicitly. + """ + if not self.is_datelike(): + raise ValueError(f"Called 'date2num' on a non-datelike unit: {self!r}.") + units_str = str(self) + # TODO: this should probably be fixed **in cfpint** ? + units_str = self._make_unitstr_cftimelike(units_str) + result = cftime.date2num(date, units_str, self.calendar) + return result + + def is_udunits(self): + # TODO: For now! + return self.category == "regular" + + def is_unknown(self): + return self.category == "unknown" + + def is_no_unit(self): + return self.category == "no_unit" + + def is_time_reference(self): + return self.is_datelike() + + def is_long_time_interval(self): + return False + + def is_convertible(self, other): + if not isinstance(other, cfpint.Unit): + other = CfpintUnit(other) + return self.dimensionality == other.dimensionality + + def is_dimensionless(self): + return self.dimensionless + + def is_time(self): + return self.dimensionality == {"[time]": 1} + + def is_vertical(self): + pressure_dims = {"[length]": -1, "[mass]": 1, "[time]": -2} + height_dims = {"[length]": 1} + return self.dimensionality in (pressure_dims, height_dims) + + +# FOR NOW: insist on pint units +_DEFAULT_UNITCLASS: type = CfpintUnit + +# And force pint too. +# TODO: since we may have seen problems with doing this dynamically, this could affect +# the whole attempt to divide functions between Iris and Cfpint functionality + + +# See: https://pint.readthedocs.io/en/stable/advanced/custom-registry-class.html#custom-quantity-and-unit-class +class IrispintRegistry(pint.registry.UnitRegistry): + Quantity: TypeAlias = pint.Quantity + Unit: TypeAlias = CfpintUnit + + +# Create our own registry, based on our own UnitRegistry subclass +from cfpint._cfarray_units_like import make_registry + +IRIS_PINT_REGISTRY: IrispintRegistry = make_registry( + IrispintRegistry +) # include all 'normal' features +pint.set_application_registry(IRIS_PINT_REGISTRY) +pint.application_registry.default_system = "SI" +pint.application_registry.default_format = "cfu" + + +def default_units_class(): + if _DEFAULT_UNITCLASS is not None: + result = _DEFAULT_UNITCLASS + else: + from iris import FUTURE + + result = CfpintUnit if FUTURE.use_cfpint_units else CfUnit + return result + + +# +# if cfpint is None or cf_units is None: +# # Only one choice to make, so make it now + fix it for all future use. +# _DEFAULT_UNITCLASS = default_units_class() class CFVariableMixin: @@ -276,14 +623,14 @@ def var_name(self, name: str | None) -> None: self._metadata_manager.var_name = name @property - def units(self) -> cf_units.Unit: + def units(self) -> cf_units.Unit | cfpint.Unit: """The S.I. unit of the object.""" return self._metadata_manager.units @units.setter - def units(self, unit: cf_units.Unit | str | None) -> None: - unit = cf_units.as_unit(unit) - self._metadata_manager.units = Unit.from_unit(unit) + def units(self, unit: cf_units.Unit | cfpint.Unit | str | None) -> None: + # unit = cf_units.as_unit(unit) + self._metadata_manager.units = default_units_class().from_unit(unit) @property def attributes(self) -> LimitedAttributeDict: diff --git a/lib/iris/tests/unit/common/mixin/test_pintunits.py b/lib/iris/tests/unit/common/mixin/test_pintunits.py new file mode 100644 index 0000000000..359c4c175e --- /dev/null +++ b/lib/iris/tests/unit/common/mixin/test_pintunits.py @@ -0,0 +1,121 @@ +from datetime import datetime + +import cf_units +import numpy as np +import pytest + +import iris.common.mixin +from iris.common.mixin import CfpintUnit + + +def test_num2date(): + unit = CfpintUnit("days since 1970-01-01") + vals = np.array([1.0, 2]) + result = unit.num2date(vals) + assert np.all(result == [datetime(1970, 1, 2), datetime(1970, 1, 3)]) + + +def test_date2num(): + unit = CfpintUnit("days since 1970-01-01") + vals = np.array([datetime(1970, 1, 2), datetime(1970, 1, 3)]) + result = unit.date2num(vals) + assert np.all(result == [1.0, 2]) + + +def test_nounit_eq(): + unit = CfpintUnit("m") + assert unit != "no_unit" + + +def test_calendar(): + unit = CfpintUnit("days since 1970-01-01", calendar="360_day") + assert repr(unit) == "" + # TODO: should really add the calendar to the string format + # I think this is a bit horrible, + # .. but it is cf_units behaviour + currently required for correct netcdf saving + # it also means that calendar is not checked in unit/string eq (!!!) + assert str(unit) == "days since 1970-01-01" + + +_UNKNOWN_NAMES = iris.common.mixin.CfpintUnit._IRIS_EXTRA_CATEGORIES["unknown"] +_NOUNIT_NAMES = iris.common.mixin.CfpintUnit._IRIS_EXTRA_CATEGORIES["no_unit"] + + +class TestFromUnit: + """Test CfpintUnit creation from various sources.""" + + def test_none_unknown(self): + unit = CfpintUnit.from_unit(None) + assert unit.category == "unknown" + assert unit.calendar is None + assert unit == "unknown" + + @pytest.mark.parametrize("name", _UNKNOWN_NAMES) + def test_str_unknown(self, name): + unit = CfpintUnit.from_unit(None) + assert unit.category == "unknown" + assert unit.calendar is None + assert all(unit == form for form in _UNKNOWN_NAMES) # string equivalence + + def test_cfunits_unknown(self): + cfunit = cf_units.Unit(None) + unit = CfpintUnit.from_unit(None) + assert unit.is_unknown() + + @pytest.mark.parametrize("name", _NOUNIT_NAMES) + def test_str_nounit(self, name): + unit = CfpintUnit.from_unit(name) + assert unit.category == "no_unit" + assert unit.calendar is None + assert all(unit == form for form in _NOUNIT_NAMES) # string equivalence + + def test_cfunits_nounit(self): + cfunit = cf_units.Unit("no_unit") + unit = CfpintUnit.from_unit(cfunit) + assert unit.is_no_unit() + + def test_str(self): + unit = CfpintUnit.from_unit("m") + assert unit == "metres" # string equivalence + assert unit.calendar is None + assert unit.category == "regular" + + def test_cfunits(self): + cfunit = cf_units.Unit("m") + unit = CfpintUnit.from_unit(cfunit) + assert unit == "metre" + + def test_str_date(self): + unit = CfpintUnit.from_unit("days since 1970-01-01") + assert unit == "days since 1970-01-01" + assert unit.category == "regular" + assert unit.is_datelike() + assert unit.calendar == "standard" + + @pytest.mark.skip("from_unit does not support calendar (yet?)") + def test_str_date_calendar(self): + unit = CfpintUnit.from_unit("days since 1970-01-01", calendar="360_day") + # YUCK!! cf_units compatibility + # TODO: this needs to change + assert unit == "days since 1970-01-01" + assert unit.category == "regular" + assert unit.is_datelike() + assert unit.calendar == "360_day" + + def test_cfunits_date(self): + cfunit = cf_units.Unit("hours since 1800-03-09 11:11") + unit = CfpintUnit.from_unit(cfunit) + # NB time ref is reproduced as-is + # TODO: should get normalised + assert unit == "hours since 1800-03-09 11:11" + assert unit.is_datelike() + assert unit.calendar == "standard" + + def test_cfunits_date_calendar(self): + cfunit = cf_units.Unit("hours since 1800-03-09 11:11", calendar="365_day") + unit = CfpintUnit.from_unit(cfunit) + # NB time ref is reproduced as-is + # TODO: should get normalised + assert unit == "hours since 1800-03-09 11:11" + assert unit.is_datelike() + assert unit.calendar == "365_day" diff --git a/requirements/py313.yml b/requirements/py313.yml index eaf556b564..7598ffef42 100644 --- a/requirements/py313.yml +++ b/requirements/py313.yml @@ -19,6 +19,7 @@ dependencies: - matplotlib-base >=3.5, !=3.9.1 - netcdf4 - numpy >=1.24, !=1.24.3 + - pint - python-xxhash - pyproj - scipy