diff --git a/.conda/openfisca-country-template/recipe.yaml b/.conda/openfisca-country-template/recipe.yaml index 7b75cf22c..3c9198dee 100644 --- a/.conda/openfisca-country-template/recipe.yaml +++ b/.conda/openfisca-country-template/recipe.yaml @@ -13,8 +13,9 @@ source: sha256: b2f2ac9945d9ccad467aed0925bd82f7f4d5ce4e96b212324cd071b8bee46914 build: + number: 1 noarch: python - script: pip install . -v + script: pip install . -v --no-deps requirements: host: @@ -25,12 +26,7 @@ requirements: run: - numpy - python - - openfisca-core >=42,<43 - -tests: -- python: - imports: - - openfisca_country_template + - openfisca-core >=42,<44 about: summary: OpenFisca Rules as Code model for Country-Template. diff --git a/.conda/openfisca-extension-template/recipe.yaml b/.conda/openfisca-extension-template/recipe.yaml index 03e53d5dd..94075b227 100644 --- a/.conda/openfisca-extension-template/recipe.yaml +++ b/.conda/openfisca-extension-template/recipe.yaml @@ -13,8 +13,9 @@ source: sha256: e16ee9cbefdd5e9ddc1c2c0e12bcd74307c8cb1be55353b3b2788d64a90a5df9 build: + number: 1 noarch: python - script: pip install . -v + script: pip install . -v --no-deps requirements: host: @@ -25,12 +26,7 @@ requirements: run: - numpy - python - - openfisca-country-template >=7,<8 - -tests: -- python: - imports: - - openfisca_extension_template + - openfisca-country-template >=7.1.5,<8 about: summary: An OpenFisca extension that adds some variables to an already-existing diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f900c330..549588caf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,55 @@ # Changelog +# 43.0.0 [#1224](https://github.com/openfisca/openfisca-core/pull/1224) + +#### Technical changes + +- Add documentation to the `indexed_enums` module +- Fix type definitions in the enums module +- Fix doctests +- Fix bug in `Enum.encode` when passing a scalar +- Fix bug in `Enum.encode` when encoding values not present in the enum + +#### New features + +- Introduce `indexed_enums.EnumType` + - Allows for actually fancy indexing `indexed_enums.Enum` + +#### Note + +This changeset has not breaking changes to the `indexed_enums` public API. +However, as a conservative measure concerning data preparation for large +population simulations, it has been marked as a major release. + +##### Before + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([0]) +``` + +##### After + +```python +from openfisca_core import indexed_enums as enum + +class TestEnum(enum.Enum): + ONE = "one" + TWO = "two" + +TestEnum.encode([2]) +# EnumArray([]) + +TestEnum.encode([0,1,2,5]) +# EnumArray([ ]) +``` + ### 42.0.7 [#1264](https://github.com/openfisca/openfisca-core/pull/1264) #### Technical changes diff --git a/openfisca_core/commons/__init__.py b/openfisca_core/commons/__init__.py index 1a3d065ee..550088141 100644 --- a/openfisca_core/commons/__init__.py +++ b/openfisca_core/commons/__init__.py @@ -1,55 +1,4 @@ -"""Common tools for contributors and users. - -The tools in this sub-package are intended, to help both contributors -to OpenFisca Core and to country packages. - -Official Public API: - * :func:`.apply_thresholds` - * :func:`.average_rate` - * :func:`.concat` - * :func:`.empty_clone` - * :func:`.eval_expression` - * :func:`.marginal_rate` - * :func:`.stringify_array` - * :func:`.switch` - -Deprecated: - * :class:`.Dummy` - -Note: - The ``deprecated`` imports are transitional, in order to ensure non-breaking - changes, and could be removed from the codebase in the next - major release. - -Note: - How imports are being used today:: - - from openfisca_core.commons import * # Bad - from openfisca_core.commons.formulas import switch # Bad - from openfisca_core.commons.decorators import deprecated # Bad - - - The previous examples provoke cyclic dependency problems, that prevent us - from modularizing the different components of the library, which would make - them easier to test and to maintain. - - How they could be used in a future release:: - - from openfisca_core import commons - from openfisca_core.commons import deprecated - - deprecated() # Good: import classes as publicly exposed - commons.switch() # Good: use functions as publicly exposed - - .. seealso:: `PEP8#Imports`_ and `OpenFisca's Styleguide`_. - - .. _PEP8#Imports: - https://www.python.org/dev/peps/pep-0008/#imports - - .. _OpenFisca's Styleguide: - https://github.com/openfisca/openfisca-core/blob/master/STYLEGUIDE.md - -""" +"""Common tools for contributors and users.""" from . import types from .dummy import Dummy diff --git a/openfisca_core/commons/formulas.py b/openfisca_core/commons/formulas.py index a184ad2dc..d83f187e2 100644 --- a/openfisca_core/commons/formulas.py +++ b/openfisca_core/commons/formulas.py @@ -24,10 +24,7 @@ def apply_thresholds( choices: A list of the possible values to choose from. Returns: - Array[numpy.float32]: A list of the values chosen. - - Raises: - AssertionError: When thresholds and choices are incompatible. + ndarray[float32]: A list of the values chosen. Examples: >>> input = numpy.array([4, 5, 6, 7, 8]) @@ -37,7 +34,6 @@ def apply_thresholds( array([10, 10, 15, 15, 20]) """ - condlist: list[t.Array[numpy.bool_] | bool] condlist = [input <= threshold for threshold in thresholds] @@ -66,7 +62,7 @@ def concat( that: Another array to concatenate. Returns: - Array[numpy.str_]: An array with the concatenated values. + ndarray[str_]: An array with the concatenated values. Examples: >>> this = ["this", "that"] @@ -75,7 +71,6 @@ def concat( array(['this1.0', 'that2.5']...) """ - if not isinstance(this, numpy.ndarray): this = numpy.array(this) @@ -105,10 +100,7 @@ def switch( value_by_condition: Values to replace for each condition. Returns: - Array: An array with the replaced values. - - Raises: - AssertionError: When ``value_by_condition`` is empty. + ndarray[float32]: An array with the replaced values. Examples: >>> conditions = numpy.array([1, 1, 1, 2]) diff --git a/openfisca_core/commons/misc.py b/openfisca_core/commons/misc.py index ba9687619..e3e55948d 100644 --- a/openfisca_core/commons/misc.py +++ b/openfisca_core/commons/misc.py @@ -13,7 +13,7 @@ def empty_clone(original: object) -> object: original: An object to clone. Returns: - The cloned, empty, object. + object: The cloned, empty, object. Examples: >>> Foo = type("Foo", (list,), {}) @@ -50,7 +50,7 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str: array: An array. Returns: - str: "None" if the ``array`` is None. + str: ``"None"`` if the ``array`` is ``None``. str: The stringified ``array`` otherwise. Examples: @@ -71,7 +71,6 @@ def stringify_array(array: None | t.Array[numpy.generic]) -> str: "[, {}, >> target = numpy.array([1, 2, 3]) @@ -37,7 +37,6 @@ def average_rate( array([ nan, 0. , -0.5]) """ - if not isinstance(varying, numpy.ndarray): varying = numpy.array(varying, dtype=numpy.float32) @@ -79,9 +78,9 @@ def marginal_rate( trim: The lower and upper bounds of the marginal rate. Returns: - Array[numpy.float32]: The marginal rate for each target. When ``trim`` - is provided, values that are out of the provided bounds are replaced by - :any:`numpy.nan`. + ndarray[float32]: The marginal rate for each target. When ``trim`` + is provided, values that are out of the provided bounds are + replaced by :class:`numpy.nan`. Examples: >>> target = numpy.array([1, 2, 3]) @@ -91,7 +90,6 @@ def marginal_rate( array([nan, 0.5]) """ - if not isinstance(varying, numpy.ndarray): varying = numpy.array(varying, dtype=numpy.float32) diff --git a/openfisca_core/commons/tests/test_dummy.py b/openfisca_core/commons/tests/test_dummy.py index dfe04b3e4..4dd13eaba 100644 --- a/openfisca_core/commons/tests/test_dummy.py +++ b/openfisca_core/commons/tests/test_dummy.py @@ -5,6 +5,5 @@ def test_dummy_deprecation() -> None: """Dummy throws a deprecation warning when instantiated.""" - with pytest.warns(DeprecationWarning): assert Dummy() diff --git a/openfisca_core/commons/tests/test_formulas.py b/openfisca_core/commons/tests/test_formulas.py index 130df9505..6fa98a7c2 100644 --- a/openfisca_core/commons/tests/test_formulas.py +++ b/openfisca_core/commons/tests/test_formulas.py @@ -7,7 +7,6 @@ def test_apply_thresholds_when_several_inputs() -> None: """Make a choice for any given input.""" - input_ = numpy.array([4, 5, 6, 7, 8, 9, 10]) thresholds = [5, 7, 9] choices = [10, 15, 20, 25] @@ -19,7 +18,6 @@ def test_apply_thresholds_when_several_inputs() -> None: def test_apply_thresholds_when_too_many_thresholds() -> None: """Raise an AssertionError when thresholds > choices.""" - input_ = numpy.array([6]) thresholds = [5, 7, 9, 11] choices = [10, 15, 20] @@ -30,7 +28,6 @@ def test_apply_thresholds_when_too_many_thresholds() -> None: def test_apply_thresholds_when_too_many_choices() -> None: """Raise an AssertionError when thresholds < choices - 1.""" - input_ = numpy.array([6]) thresholds = [5, 7] choices = [10, 15, 20, 25] @@ -41,7 +38,6 @@ def test_apply_thresholds_when_too_many_choices() -> None: def test_concat_when_this_is_array_not_str() -> None: """Cast ``this`` to ``str`` when it is a NumPy array other than string.""" - this = numpy.array([1, 2]) that = numpy.array(["la", "o"]) @@ -52,7 +48,6 @@ def test_concat_when_this_is_array_not_str() -> None: def test_concat_when_that_is_array_not_str() -> None: """Cast ``that`` to ``str`` when it is a NumPy array other than string.""" - this = numpy.array(["ho", "cha"]) that = numpy.array([1, 2]) @@ -63,7 +58,6 @@ def test_concat_when_that_is_array_not_str() -> None: def test_concat_when_args_not_str_array_like() -> None: """Cast ``this`` and ``that`` to a NumPy array or strings.""" - this = (1, 2) that = (3, 4) @@ -74,7 +68,6 @@ def test_concat_when_args_not_str_array_like() -> None: def test_switch_when_values_are_empty() -> None: """Raise an AssertionError when the values are empty.""" - conditions = [1, 1, 1, 2] value_by_condition = {} diff --git a/openfisca_core/commons/tests/test_rates.py b/openfisca_core/commons/tests/test_rates.py index c266582fc..fbee4cc83 100644 --- a/openfisca_core/commons/tests/test_rates.py +++ b/openfisca_core/commons/tests/test_rates.py @@ -8,7 +8,6 @@ def test_average_rate_when_varying_is_zero() -> None: """Yield infinity when the varying gross income crosses zero.""" - target = numpy.array([1, 2, 3]) varying = [0, 0, 0] @@ -19,7 +18,6 @@ def test_average_rate_when_varying_is_zero() -> None: def test_marginal_rate_when_varying_is_zero() -> None: """Yield infinity when the varying gross income crosses zero.""" - target = numpy.array([1, 2, 3]) varying = numpy.array([0, 0, 0]) diff --git a/openfisca_core/data_storage/__init__.py b/openfisca_core/data_storage/__init__.py index 9f63047fb..4dbbb8954 100644 --- a/openfisca_core/data_storage/__init__.py +++ b/openfisca_core/data_storage/__init__.py @@ -1,25 +1,4 @@ -# Transitional imports to ensure non-breaking changes. -# Could be deprecated in the next major release. -# -# How imports are being used today: -# -# >>> from openfisca_core.module import symbol -# -# The previous example provokes cyclic dependency problems -# that prevent us from modularizing the different components -# of the library so to make them easier to test and to maintain. -# -# How could them be used after the next major release: -# -# >>> from openfisca_core import module -# >>> module.symbol() -# -# And for classes: -# -# >>> from openfisca_core.module import Symbol -# >>> Symbol() -# -# See: https://www.python.org/dev/peps/pep-0008/#imports +"""Different storage backends for the data of a simulation.""" from . import types from .in_memory_storage import InMemoryStorage diff --git a/openfisca_core/data_storage/in_memory_storage.py b/openfisca_core/data_storage/in_memory_storage.py index 18387ff64..cd8f9ef08 100644 --- a/openfisca_core/data_storage/in_memory_storage.py +++ b/openfisca_core/data_storage/in_memory_storage.py @@ -29,13 +29,15 @@ def __init__(self, is_eternal: bool = False) -> None: self.is_eternal = is_eternal def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: - """Retrieve the data for the specified period from memory. + """Retrieve the data for the specified :obj:`.Period` from memory. Args: - period: The period for which data should be retrieved. + period: The :obj:`.Period` for which data should be retrieved. Returns: - The data for the specified period, or None if no data is available. + None: If no data is available. + EnumArray: The data for the specified :obj:`.Period`. + ndarray[generic]: The data for the specified :obj:`.Period`. Examples: >>> import numpy @@ -53,7 +55,6 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: array([1, 2, 3]) """ - if self.is_eternal: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) @@ -64,11 +65,11 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: return values def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: - """Store the specified data in memory for the specified period. + """Store the specified data in memory for the specified :obj:`.Period`. Args: value: The data to store - period: The period for which the data should be stored. + period: The :obj:`.Period` for which the data should be stored. Examples: >>> import numpy @@ -86,7 +87,6 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: array(['1', '2', 'salary'], dtype=' None: self._arrays[period] = value def delete(self, period: None | t.Period = None) -> None: - """Delete the data for the specified period from memory. + """Delete the data for the specified :obj:`.Period` from memory. Args: - period: The period for which data should be deleted. + period: The :obj:`.Period` for which data should be deleted. Note: If ``period`` is specified, all data will be deleted. @@ -128,7 +128,6 @@ def delete(self, period: None | t.Period = None) -> None: >>> storage.get(period) """ - if period is None: self._arrays = {} return @@ -147,7 +146,7 @@ def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: - A sequence containing the storage's known periods. + KeysView[Period]: A sequence containing the storage's known periods. Examples: >>> from openfisca_core import data_storage, periods @@ -164,14 +163,13 @@ def get_known_periods(self) -> KeysView[t.Period]: dict_keys([Period(('year', Instant((2017, 1, 1)), 1))]) """ - return self._arrays.keys() def get_memory_usage(self) -> t.MemoryUsage: """Memory usage of the storage. Returns: - A dictionary representing the storage's memory usage. + MemoryUsage: A dictionary representing the storage's memory usage. Examples: >>> from openfisca_core import data_storage @@ -181,7 +179,6 @@ def get_memory_usage(self) -> t.MemoryUsage: {'nb_arrays': 0, 'total_nb_bytes': 0, 'cell_size': nan} """ - if not self._arrays: return { "nb_arrays": 0, diff --git a/openfisca_core/data_storage/on_disk_storage.py b/openfisca_core/data_storage/on_disk_storage.py index d1b8e2c4e..818cec22e 100644 --- a/openfisca_core/data_storage/on_disk_storage.py +++ b/openfisca_core/data_storage/on_disk_storage.py @@ -33,7 +33,7 @@ class OnDiskStorage: #: Whether to preserve the storage directory. preserve_storage_dir: bool - #: Mapping of file paths to possible Enum values. + #: Mapping of file paths to possible :class:`.Enum` values. _enums: MutableMapping[str, type[t.Enum]] #: Mapping of periods to file paths. @@ -52,17 +52,18 @@ def __init__( self.storage_dir = storage_dir def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: - """Decode a file by loading its contents as a ``numpy`` array. + """Decode a file by loading its contents as a :mod:`numpy` array. Args: file: Path to the file to be decoded. Returns: - ``numpy`` array or ``EnumArray`` representing the data in the file. + EnumArray: Representing the data in the file. + ndarray[generic]: Representing the data in the file. Note: - If the file is associated with ``Enum`` values, the array is - converted back to an ``EnumArray`` object. + If the file is associated with :class:`~indexed_enums.Enum` values, the + array is converted back to an :obj:`~indexed_enums.EnumArray` object. Examples: >>> import tempfile @@ -86,10 +87,9 @@ def _decode_file(self, file: str) -> t.Array[t.DTypeGeneric]: ... storage = data_storage.OnDiskStorage(directory) ... storage.put(value, period) ... storage._decode_file(storage._files[period]) - EnumArray([]) + EnumArray([Housing.TENANT]) """ - enum = self._enums.get(file) if enum is not None: @@ -106,8 +106,9 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: period: The period for which data should be retrieved. Returns: - A ``numpy`` array or ``EnumArray`` representing the vector for the - specified period, or ``None`` if no vector is stored. + None: If no data is available. + EnumArray: Representing the data for the specified period. + ndarray[generic]: Representing the data for the specified period. Examples: >>> import tempfile @@ -127,7 +128,6 @@ def get(self, period: None | t.Period = None) -> None | t.Array[t.DTypeGeneric]: array([1, 2, 3]) """ - if self.is_eternal: period = periods.period(DateUnit.ETERNITY) period = periods.period(period) @@ -162,7 +162,6 @@ def put(self, value: t.Array[t.DTypeGeneric], period: None | t.Period) -> None: array(['1', '2', 'salary'], dtype=' None: ... storage.get(period) """ - if period is None: self._files = {} return @@ -231,7 +229,7 @@ def get_known_periods(self) -> KeysView[t.Period]: """List of storage's known periods. Returns: - A sequence containing the storage's known periods. + KeysView[Period]: A sequence containing the storage's known periods. Examples: >>> import tempfile @@ -255,7 +253,6 @@ def get_known_periods(self) -> KeysView[t.Period]: dict_keys([Period(('year', Instant((2017, 1, 1)), 1))]) """ - return self._files.keys() def restore(self) -> None: @@ -289,7 +286,6 @@ def restore(self) -> None: >>> directory.cleanup() """ - self._files = files = {} # Restore self._files from content of storage_dir. for filename in os.listdir(self.storage_dir): diff --git a/openfisca_core/indexed_enums/__init__.py b/openfisca_core/indexed_enums/__init__.py index 9c4ff7dd6..494601fc8 100644 --- a/openfisca_core/indexed_enums/__init__.py +++ b/openfisca_core/indexed_enums/__init__.py @@ -1,27 +1,8 @@ -# Transitional imports to ensure non-breaking changes. -# Could be deprecated in the next major release. -# -# How imports are being used today: -# -# >>> from openfisca_core.module import symbol -# -# The previous example provokes cyclic dependency problems -# that prevent us from modularizing the different components -# of the library so to make them easier to test and to maintain. -# -# How could them be used after the next major release: -# -# >>> from openfisca_core import module -# >>> module.symbol() -# -# And for classes: -# -# >>> from openfisca_core.module import Symbol -# >>> Symbol() -# -# See: https://www.python.org/dev/peps/pep-0008/#imports +"""Enumerations for variables with a limited set of possible values.""" from . import types +from ._enum_type import EnumType +from ._errors import EnumEncodingError, EnumMemberNotFoundError from .config import ENUM_ARRAY_DTYPE from .enum import Enum from .enum_array import EnumArray @@ -30,5 +11,8 @@ "ENUM_ARRAY_DTYPE", "Enum", "EnumArray", + "EnumEncodingError", + "EnumMemberNotFoundError", + "EnumType", "types", ] diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py new file mode 100644 index 000000000..8083a6d49 --- /dev/null +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +from typing import final + +import numpy + +from . import types as t + + +@final +class EnumType(t.EnumType): + """Meta class for creating an indexed :class:`.Enum`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Enum(enum.Enum, metaclass=enum.EnumType): + ... pass + + >>> Enum.items + Traceback (most recent call last): + AttributeError: ... + + >>> class Housing(Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.indices + array([0, 1], dtype=uint8) + + >>> Housing.names + array(['OWNER', 'TENANT'], dtype='>> Housing.enums + array([Housing.OWNER, Housing.TENANT], dtype=object) + + """ + + def __new__( + metacls, + name: str, + bases: tuple[type, ...], + classdict: t.EnumDict, + **kwds: object, + ) -> t.EnumType: + """Create a new indexed enum class.""" + # Create the enum class. + cls = super().__new__(metacls, name, bases, classdict, **kwds) + + # If the enum class has no members, return it as is. + if not cls.__members__: + return cls + + # Add the indices attribute to the enum class. + cls.indices = numpy.arange(len(cls), dtype=t.EnumDType) + + # Add the names attribute to the enum class. + cls.names = numpy.array(cls._member_names_, dtype=t.StrDType) + + # Add the enums attribute to the enum class. + cls.enums = numpy.array(cls, dtype=t.ObjDType) + + # Return the modified enum class. + return cls + + def __dir__(cls) -> list[str]: + return sorted({"indices", "names", "enums", *super().__dir__()}) + + +__all__ = ["EnumType"] diff --git a/openfisca_core/indexed_enums/_errors.py b/openfisca_core/indexed_enums/_errors.py new file mode 100644 index 000000000..e9b543fc7 --- /dev/null +++ b/openfisca_core/indexed_enums/_errors.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from . import types as t + + +class EnumEncodingError(TypeError): + """Raised when an enum is encoded with an unsupported type.""" + + def __init__( + self, enum_class: type[t.Enum], value: t.VarArray | t.ArrayLike[object] + ) -> None: + msg = ( + f"Failed to encode \"{value}\" of type '{value[0].__class__.__name__}', " + "as it is not supported. Please, try again with an array of " + f"'{int.__name__}', '{str.__name__}', or '{enum_class.__name__}'." + ) + super().__init__(msg) + + +class EnumMemberNotFoundError(IndexError): + """Raised when a member is not found in an enum.""" + + def __init__(self, enum_class: type[t.Enum]) -> None: + index = [str(enum.index) for enum in enum_class] + names = [enum.name for enum in enum_class] + msg = ( + f"Some members were not found in enum '{enum_class.__name__}'. " + f"Possible values are: {', '.join(names[:-1])}, and {names[-1]!s}; " + f"or their corresponding indices: {', '.join(index[:-1])}, and " + f"{index[-1]}." + ) + super().__init__(msg) + + +__all__ = ["EnumEncodingError", "EnumMemberNotFoundError"] diff --git a/openfisca_core/indexed_enums/_guards.py b/openfisca_core/indexed_enums/_guards.py new file mode 100644 index 000000000..6c47471b3 --- /dev/null +++ b/openfisca_core/indexed_enums/_guards.py @@ -0,0 +1,209 @@ +from __future__ import annotations + +from typing import Final +from typing_extensions import TypeIs + +import numpy + +from . import types as t + +#: Types for int arrays. +ints: Final = { + numpy.uint8, + numpy.uint16, + numpy.uint32, + numpy.uint64, + numpy.int8, + numpy.int16, + numpy.int32, + numpy.int64, +} + +#: Types for object arrays. +objs: Final = {numpy.object_} + +#: Types for str arrays. +strs: Final = {numpy.str_} + + +def _is_enum_array(array: t.VarArray) -> TypeIs[t.ObjArray]: + """Narrow the type of a given array to an array of :obj:`numpy.object_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.object_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Enum = enum.Enum("Enum", ["A", "B"]) + >>> array = numpy.array([Enum.A], dtype=numpy.object_) + >>> _is_enum_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_enum_array(array) + False + + """ + return array.dtype.type in objs + + +def _is_enum_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[t.Enum]]: + """Narrow the type of a given array-like to an sequence of :class:`.Enum`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array-like of :class:`.Enum`, False otherwise. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = [Housing.OWNER] + >>> _is_enum_array_like(array) + True + + >>> array = ["owner"] + >>> _is_enum_array_like(array) + False + + """ + return all(isinstance(item, t.Enum) for item in array) + + +def _is_int_array(array: t.VarArray) -> TypeIs[t.IndexArray]: + """Narrow the type of a given array to an array of :obj:`numpy.integer`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.integer`, False otherwise. + + Examples: + >>> import numpy + + >>> array = numpy.array([1], dtype=numpy.int16) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1], dtype=numpy.int32) + >>> _is_int_array(array) + True + + >>> array = numpy.array([1.0]) + >>> _is_int_array(array) + False + + """ + return array.dtype.type in ints + + +def _is_int_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[int]]: + """Narrow the type of a given array-like to a sequence of :obj:`int`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array-like of :obj:`int`, False otherwise. + + Examples: + >>> array = [1] + >>> _is_int_array_like(array) + True + + >>> array = (1, 2) + >>> _is_int_array_like(array) + True + + >>> array = [1.0] + >>> _is_int_array_like(array) + False + + """ + return all(isinstance(item, int) for item in array) + + +def _is_str_array(array: t.VarArray) -> TypeIs[t.StrArray]: + """Narrow the type of a given array to an array of :obj:`numpy.str_`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array of :obj:`numpy.str_`, False otherwise. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = numpy.array([Housing.OWNER]) + >>> _is_str_array(array) + False + + >>> array = numpy.array(["owner"]) + >>> _is_str_array(array) + True + + """ + return array.dtype.type in strs + + +def _is_str_array_like( + array: t.VarArray | t.ArrayLike[object], +) -> TypeIs[t.ArrayLike[str]]: + """Narrow the type of a given array-like to an sequence of :obj:`str`. + + Args: + array: Array to check. + + Returns: + bool: True if ``array`` is an array-like of :obj:`str`, False otherwise. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "owner" + ... TENANT = "tenant" + + >>> array = [Housing.OWNER] + >>> _is_str_array_like(array) + False + + >>> array = ["owner"] + >>> _is_str_array_like(array) + True + + """ + return all(isinstance(item, str) for item in array) + + +__all__ = [ + "_is_enum_array", + "_is_enum_array_like", + "_is_int_array", + "_is_int_array_like", + "_is_str_array", + "_is_str_array_like", +] diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py new file mode 100644 index 000000000..aa676b92f --- /dev/null +++ b/openfisca_core/indexed_enums/_utils.py @@ -0,0 +1,187 @@ +from __future__ import annotations + +import numpy + +from . import types as t + + +def _enum_to_index(value: t.ObjArray | t.ArrayLike[t.Enum]) -> t.IndexArray: + """Transform an array of enum members into an index array. + + Args: + value: The enum members array to encode. + + Returns: + The index array. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> class Rogue(enum.Enum): + ... BOULEVARD = "More like a shady impasse, to be honest." + + >>> _enum_to_index(Road.AVENUE) + Traceback (most recent call last): + TypeError: 'Road' object is not iterable + + >>> _enum_to_index([Road.AVENUE]) + array([1], dtype=uint8) + + >>> _enum_to_index(numpy.array(Road.AVENUE)) + Traceback (most recent call last): + TypeError: iteration over a 0-d array + + >>> _enum_to_index(numpy.array([Road.AVENUE])) + array([1], dtype=uint8) + + >>> value = numpy.array([Road.STREET, Road.AVENUE, Road.STREET]) + >>> _enum_to_index(value) + array([0, 1, 0], dtype=uint8) + + >>> value = numpy.array([Road.AVENUE, Road.AVENUE, Rogue.BOULEVARD]) + >>> _enum_to_index(value) + array([1, 1, 0], dtype=uint8) + + """ + return numpy.array([enum.index for enum in value], t.EnumDType) + + +def _int_to_index( + enum_class: type[t.Enum], value: t.IndexArray | t.ArrayLike[int] +) -> t.IndexArray: + """Transform an integer array into an index array. + + Args: + enum_class: The enum class to encode the integer array. + value: The integer array to encode. + + Returns: + The index array. + + Examples: + >>> from array import array + + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> _int_to_index(Road, 1) + Traceback (most recent call last): + TypeError: 'int' object is not iterable + + >>> _int_to_index(Road, [1]) + array([1], dtype=uint8) + + >>> _int_to_index(Road, array("B", [1])) + array([1], dtype=uint8) + + >>> _int_to_index(Road, memoryview(array("B", [1]))) + array([1], dtype=uint8) + + >>> _int_to_index(Road, numpy.array(1)) + Traceback (most recent call last): + TypeError: iteration over a 0-d array + + >>> _int_to_index(Road, numpy.array([1])) + array([1], dtype=uint8) + + >>> _int_to_index(Road, numpy.array([0, 1, 0])) + array([0, 1, 0], dtype=uint8) + + >>> _int_to_index(Road, numpy.array([1, 1, 2])) + array([1, 1], dtype=uint8) + + """ + return numpy.array( + [index for index in value if index < len(enum_class.__members__)], t.EnumDType + ) + + +def _str_to_index( + enum_class: type[t.Enum], value: t.StrArray | t.ArrayLike[str] +) -> t.IndexArray: + """Transform a string array into an index array. + + Args: + enum_class: The enum class to encode the string array. + value: The string array to encode. + + Returns: + The index array. + + Examples: + >>> from array import array + + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Road(enum.Enum): + ... STREET = ( + ... "A public road that connects two points, but also has " + ... "buildings on both sides of it; these typically run " + ... "perpendicular to avenues." + ... ) + ... AVENUE = ( + ... "A public way that also has buildings and/or trees on both " + ... "sides; these run perpendicular to streets and are " + ... "traditionally wider." + ... ) + + >>> _str_to_index(Road, "AVENUE") + array([], dtype=uint8) + + >>> _str_to_index(Road, ["AVENUE"]) + array([1], dtype=uint8) + + >>> _str_to_index(Road, numpy.array("AVENUE")) + Traceback (most recent call last): + TypeError: iteration over a 0-d array + + >>> _str_to_index(Road, numpy.array(["AVENUE"])) + array([1], dtype=uint8) + + >>> _str_to_index(Road, numpy.array(["STREET", "AVENUE", "STREET"])) + array([0, 1, 0], dtype=uint8) + + >>> _str_to_index(Road, numpy.array(["AVENUE", "AVENUE", "BOULEVARD"])) + array([1, 1], dtype=uint8) + + """ + return numpy.array( + [ + enum_class.__members__[name].index + for name in value + if name in enum_class._member_names_ + ], + t.EnumDType, + ) + + +__all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] diff --git a/openfisca_core/indexed_enums/config.py b/openfisca_core/indexed_enums/config.py index f7da69b84..abb8817de 100644 --- a/openfisca_core/indexed_enums/config.py +++ b/openfisca_core/indexed_enums/config.py @@ -1,3 +1,6 @@ import numpy ENUM_ARRAY_DTYPE = numpy.int16 + + +__all__ = ["ENUM_ARRAY_DTYPE"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a6fd5d7f9..d116a56ba 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,89 +1,237 @@ from __future__ import annotations +from collections.abc import Sequence + import numpy from . import types as t -from .config import ENUM_ARRAY_DTYPE +from ._enum_type import EnumType +from ._errors import EnumEncodingError, EnumMemberNotFoundError +from ._guards import ( + _is_enum_array, + _is_enum_array_like, + _is_int_array, + _is_int_array_like, + _is_str_array, + _is_str_array_like, +) +from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray -class Enum(t.Enum): - """Enum based on `enum34 `_, whose items - have an index. +class Enum(t.Enum, metaclass=EnumType): + """Enum based on `enum34 `_. + + Its items have an :class:`int` index, useful and performant when running + :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> repr(Housing) + "" + + >>> repr(Housing.TENANT) + 'Housing.TENANT' + + >>> str(Housing.TENANT) + 'Housing.TENANT' + + >>> dict([(Housing.TENANT, Housing.TENANT.value)]) + {Housing.TENANT: 'Tenant'} + + >>> list(Housing) + [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] + + >>> Housing["TENANT"] + Housing.TENANT + + >>> Housing("Tenant") + Housing.TENANT + + >>> Housing.TENANT in Housing + True + + >>> len(Housing) + 4 + + >>> Housing.TENANT == Housing.TENANT + True + + >>> Housing.TENANT != Housing.TENANT + False + + >>> Housing.TENANT.index + 1 + + >>> Housing.TENANT.name + 'TENANT' + + >>> Housing.TENANT.value + 'Tenant' + """ - # Tweak enums to add an index attribute to each enum item - def __init__(self, name: str) -> None: - # When the enum item is initialized, self._member_names_ contains the - # names of the previously initialized items, so its length is the index - # of this item. + #: The :attr:`index` of the :class:`.Enum` member. + index: int + + def __init__(self, *__args: object, **__kwargs: object) -> None: + """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. + + When the enum is initialised, ``_member_names_`` contains the names of + the already initialized items, so its length is the index of this item. + + Args: + *__args: Positional arguments. + **__kwargs: Keyword arguments. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Housing = enum.Enum("Housing", "owner tenant") + >>> Housing.tenant.index + 1 + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.TENANT.index + 1 + + >>> array = numpy.array([[1, 2], [3, 4]]) + >>> array[Housing.TENANT.index] + array([3, 4]) + + Note: + ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. + + """ self.index = len(self._member_names_) - # Bypass the slow Enum.__eq__ - __eq__ = object.__eq__ + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" - # In Python 3, __hash__ must be defined if __eq__ is defined to stay - # hashable. - __hash__ = object.__hash__ + def __hash__(self) -> int: + return object.__hash__(self.__class__.__name__ + self.name) + + def __eq__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index == other.index + return NotImplemented + + def __ne__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index != other.index + return NotImplemented @classmethod - def encode( - cls, - array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, - ) -> EnumArray: - """Encode a string numpy array, an enum item numpy array, or an int numpy - array into an :any:`EnumArray`. See :any:`EnumArray.decode` for - decoding. - - :param numpy.ndarray array: Array of string identifiers, or of enum - items, to encode. - - :returns: An :any:`EnumArray` encoding the input array values. - :rtype: :any:`EnumArray` - - For instance: - - >>> string_identifier_array = asarray(["free_lodger", "owner"]) - >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) - >>> encoded_array[0] - 2 # Encoded value - - >>> free_lodger = HousingOccupancyStatus.free_lodger - >>> owner = HousingOccupancyStatus.owner - >>> enum_item_array = asarray([free_lodger, owner]) - >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) - >>> encoded_array[0] - 2 # Encoded value + def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: + """Encode an encodable array into an :class:`.EnumArray`. + + Args: + array: :class:`~numpy.ndarray` to encode. + + Returns: + EnumArray: An :class:`.EnumArray` with the encoded input values. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + # EnumArray + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> Housing.encode(enum_array) + EnumArray([Housing.TENANT]) + + # Array of Enum + + >>> array = numpy.array([Housing.TENANT]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of integers + + >>> array = numpy.array([1]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of strings + + >>> array = numpy.array(["TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of bytes + + >>> array = numpy.array([b"TENANT"]) + >>> enum_array = Housing.encode(array) + Traceback (most recent call last): + EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... + + .. seealso:: + :meth:`.EnumArray.decode` for decoding. + """ if isinstance(array, EnumArray): return array + if len(array) == 0: + return EnumArray(numpy.asarray(array, t.EnumDType), cls) + if isinstance(array, Sequence): + return cls._encode_array_like(array) + return cls._encode_array(array) - # String array - if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: - array = numpy.select( - [array == item.name for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - # Enum items arrays - elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: - cls = array[0].__class__ - - array = numpy.select( - [array == item for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - return EnumArray(array, cls) + @classmethod + def _encode_array(cls, value: t.VarArray) -> t.EnumArray: + if _is_int_array(value): + indices = _int_to_index(cls, value) + elif _is_str_array(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + @classmethod + def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: + if _is_int_array_like(value): + indices = _int_to_index(cls, value) + elif _is_str_array_like(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array_like(value): + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + +__all__ = ["Enum"] diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index a1479d5b8..98f9b4c6a 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, NoReturn +from typing import NoReturn from typing_extensions import Self import numpy @@ -9,51 +9,232 @@ class EnumArray(t.EnumArray): - """NumPy array subclass representing an array of enum items. + """A subclass of :class:`~numpy.ndarray` of :class:`.Enum`. + + :class:`.Enum` arrays are encoded as :class:`int` to improve performance. + + Note: + Subclassing :class:`~numpy.ndarray` is a little tricky™. To read more + about the :meth:`.__new__` and :meth:`.__array_finalize__` methods + below, see `Subclassing ndarray`_. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum, variables + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> array = numpy.array([1], dtype=numpy.int16) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> repr(enum.EnumArray) + "" + + >>> repr(enum_array) + 'EnumArray([Housing.TENANT])' + + >>> str(enum_array) + "['TENANT']" + + >>> list(map(int, enum_array)) + [1] + + >>> int(enum_array[0]) + 1 + + >>> enum_array[0] in enum_array + True + + >>> len(enum_array) + 1 + + >>> enum_array = enum.EnumArray(list(Housing), Housing) + Traceback (most recent call last): + AttributeError: 'list' object has no attribute 'view' + + >>> class OccupancyStatus(variables.Variable): + ... value_type = enum.Enum + ... possible_values = Housing + + >>> enum.EnumArray(array, OccupancyStatus.possible_values) + EnumArray([Housing.TENANT]) + + .. _Subclassing ndarray: + https://numpy.org/doc/stable/user/basics.subclassing.html - EnumArrays are encoded as ``int`` arrays to improve performance """ - # Subclassing ndarray is a little tricky. - # To read more about the two following methods, see: - # https://docs.scipy.org/doc/numpy-1.13.0/user/basics.subclassing.html#slightly-more-realistic-example-attribute-added-to-existing-array. + #: Enum type of the array items. + possible_values: None | type[t.Enum] + def __new__( cls, - input_array: t.Array[t.DTypeEnum], - possible_values: None | type[t.Enum] = None, + input_array: t.IndexArray, + possible_values: type[t.Enum], ) -> Self: - obj = numpy.asarray(input_array).view(cls) + """See comment above.""" + obj = input_array.view(cls) obj.possible_values = possible_values return obj - # See previous comment - def __array_finalize__(self, obj: numpy.int32 | None) -> None: + def __array_finalize__(self, obj: None | t.EnumArray | t.VarArray) -> None: + """See comment above.""" if obj is None: return - self.possible_values = getattr(obj, "possible_values", None) - def __eq__(self, other: object) -> bool: - # When comparing to an item of self.possible_values, use the item index - # to speed up the comparison. - if other.__class__.__name__ is self.possible_values.__name__: - # Use view(ndarray) so that the result is a classic ndarray, not an - # EnumArray. - return self.view(numpy.ndarray) == other.index + def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] + """Compare equality with the item's :attr:`~.Enum.index`. + + When comparing to an item of :attr:`.possible_values`, use the + item's :attr:`~.Enum.index`. to speed up the comparison. + + Whenever possible, use :any:`numpy.ndarray.view` so that the result is + a classic :class:`~numpy.ndarray`, not an :obj:`.EnumArray`. + + Args: + other: Another :class:`object` to compare to. + + Returns: + bool: When ??? + ndarray[bool_]: When ??? + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array == Housing + array([False, True]) + + >>> enum_array == Housing.TENANT + array([ True]) + + >>> enum_array == 1 + array([ True]) + + >>> enum_array == [1] + array([ True]) + + >>> enum_array == [2] + array([False]) + + >>> enum_array == "1" + array([False]) + + >>> enum_array is None + False + + >>> enum_array == enum.EnumArray(numpy.array([1]), Housing) + array([ True]) - return self.view(numpy.ndarray) == other + Note: + This breaks the `Liskov substitution principle`_. - def __ne__(self, other: object) -> bool: + .. _Liskov substitution principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + """ + result: t.BoolArray + + if self.possible_values is None: + return NotImplemented + if other is None: + return NotImplemented + if ( + isinstance(other, type(t.Enum)) + and other.__name__ is self.possible_values.__name__ + ): + result = ( + self.view(numpy.ndarray) + == self.possible_values.indices[ + self.possible_values.indices <= max(self) + ] + ) + return result + if ( + isinstance(other, t.Enum) + and other.__class__.__name__ is self.possible_values.__name__ + ): + result = self.view(numpy.ndarray) == other.index + return result + # For NumPy >=1.26.x. + if isinstance(is_equal := self.view(numpy.ndarray) == other, numpy.ndarray): + return is_equal + # For NumPy <1.26.x. + return numpy.array([is_equal], dtype=t.BoolDType) + + def __ne__(self, other: object) -> t.BoolArray: # type: ignore[override] + """Inequality. + + Args: + other: Another :class:`object` to compare to. + + Returns: + bool: When ??? + ndarray[bool_]: When ??? + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + + >>> enum_array != Housing + array([ True, False]) + + >>> enum_array != Housing.TENANT + array([False]) + + >>> enum_array != 1 + array([False]) + + >>> enum_array != [1] + array([False]) + + >>> enum_array != [2] + array([ True]) + + >>> enum_array != "1" + array([ True]) + + >>> enum_array is not None + True + + Note: + This breaks the `Liskov substitution principle`_. + + .. _Liskov substitution principle: + https://en.wikipedia.org/wiki/Liskov_substitution_principle + + """ return numpy.logical_not(self == other) - def _forbidden_operation(self, other: Any) -> NoReturn: + @staticmethod + def _forbidden_operation(*__args: object, **__kwds: object) -> NoReturn: msg = ( "Forbidden operation. The only operations allowed on EnumArrays " "are '==' and '!='." ) - raise TypeError( - msg, - ) + raise TypeError(msg) __add__ = _forbidden_operation __mul__ = _forbidden_operation @@ -64,42 +245,81 @@ def _forbidden_operation(self, other: Any) -> NoReturn: __and__ = _forbidden_operation __or__ = _forbidden_operation - def decode(self) -> numpy.object_: - """Return the array of enum items corresponding to self. + def decode(self) -> t.ObjArray: + """Decode itself to a normal array. + + Returns: + ndarray[Enum]: The items of the :obj:`.EnumArray`. - For instance: + Raises: + TypeError: When the :attr:`.possible_values` is not defined. - >>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode()[0] - + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode() + array([Housing.TENANT], dtype=object) - Decoded value: enum item """ - return numpy.select( - [self == item.index for item in self.possible_values], - list(self.possible_values), - ) + result: t.ObjArray + if self.possible_values is None: + msg = ( + f"The possible values of the {self.__class__.__name__} are " + f"not defined." + ) + raise TypeError(msg) + array = self.reshape(1).astype(t.EnumDType) if self.ndim == 0 else self + result = self.possible_values.enums[array] + return result - def decode_to_str(self) -> numpy.str_: - """Return the array of string identifiers corresponding to self. + def decode_to_str(self) -> t.StrArray: + """Decode itself to an array of strings. - For instance: + Returns: + ndarray[str_]: The string values of the :obj:`.EnumArray`. + + Raises: + TypeError: When the :attr:`.possible_values` is not defined. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> enum_array.decode_to_str() + array(['TENANT'], dtype='>> enum_array = household("housing_occupancy_status", period) - >>> enum_array[0] - >>> 2 # Encoded value - >>> enum_array.decode_to_str()[0] - 'free_lodger' # String identifier """ - return numpy.select( - [self == item.index for item in self.possible_values], - [item.name for item in self.possible_values], - ) + result: t.StrArray + if self.possible_values is None: + msg = ( + f"The possible values of the {self.__class__.__name__} are " + f"not defined." + ) + raise TypeError(msg) + array = self.reshape(1).astype(t.EnumDType) if self.ndim == 0 else self + result = self.possible_values.names[array] + return result def __repr__(self) -> str: return f"{self.__class__.__name__}({self.decode()!s})" def __str__(self) -> str: return str(self.decode_to_str()) + + +__all__ = ["EnumArray"] diff --git a/openfisca_core/indexed_enums/py.typed b/openfisca_core/indexed_enums/py.typed new file mode 100644 index 000000000..e69de29bb diff --git a/openfisca_core/indexed_enums/tests/__init__.py b/openfisca_core/indexed_enums/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py new file mode 100644 index 000000000..2e49c1742 --- /dev/null +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -0,0 +1,135 @@ +import numpy +import pytest +from numpy.testing import assert_array_equal + +from openfisca_core import indexed_enums as enum + + +class Animal(enum.Enum): + CAT = b"Cat" + DOG = b"Dog" + + +class Colour(enum.Enum): + INCARNADINE = "incarnadine" + TURQUOISE = "turquoise" + AMARANTH = "amaranth" + + +# Arrays of Enum + + +def test_enum_encode_with_array_of_enum(): + """Does encode when called with an array of enums.""" + array = numpy.array([Animal.DOG, Animal.DOG, Animal.CAT]) + enum_array = Animal.encode(array) + assert_array_equal(enum_array, numpy.array([1, 1, 0])) + + +def test_enum_encode_with_enum_sequence(): + """Does encode when called with an enum sequence.""" + sequence = list(Animal) + enum_array = Animal.encode(sequence) + assert Animal.DOG in enum_array + + +def test_enum_encode_with_enum_scalar_array(): + """Does not encode when called with an enum scalar array.""" + array = numpy.array(Animal.DOG) + with pytest.raises(TypeError): + Animal.encode(array) + + +def test_enum_encode_with_enum_with_bad_value(): + """Does not encode when called with a value not in an Enum.""" + array = numpy.array([Colour.AMARANTH]) + with pytest.raises(TypeError): + Animal.encode(array) + + +# Arrays of int + + +def test_enum_encode_with_array_of_int(): + """Does encode when called with an array of int.""" + array = numpy.array([1, 1, 0]) + enum_array = Animal.encode(array) + assert_array_equal(enum_array, numpy.array([1, 1, 0])) + + +def test_enum_encode_with_int_sequence(): + """Does encode when called with an int sequence.""" + sequence = (0, 1) + enum_array = Animal.encode(sequence) + assert Animal.DOG in enum_array + + +def test_enum_encode_with_int_scalar_array(): + """Does not encode when called with an int scalar array.""" + array = numpy.array(1) + with pytest.raises(TypeError): + Animal.encode(array) + + +def test_enum_encode_with_int_with_bad_value(): + """Does not encode when called with a value not in an Enum.""" + array = numpy.array([2]) + with pytest.raises(IndexError): + Animal.encode(array) + + +# Arrays of strings + + +def test_enum_encode_with_array_of_string(): + """Does encode when called with an array of string.""" + array = numpy.array(["DOG", "DOG", "CAT"]) + enum_array = Animal.encode(array) + assert_array_equal(enum_array, numpy.array([1, 1, 0])) + + +def test_enum_encode_with_str_sequence(): + """Does encode when called with a str sequence.""" + sequence = ("DOG", "CAT") + enum_array = Animal.encode(sequence) + assert Animal.DOG in enum_array + + +def test_enum_encode_with_str_scalar_array(): + """Does not encode when called with a str scalar array.""" + array = numpy.array("DOG") + with pytest.raises(TypeError): + Animal.encode(array) + + +def test_enum_encode_with_str_with_bad_value(): + """Encode encode when called with a value not in an Enum.""" + array = numpy.array(["JAIBA"]) + with pytest.raises(IndexError): + Animal.encode(array) + + +# Unsupported encodings + + +def test_enum_encode_with_any_array(): + """Does not encode when called with unsupported types.""" + value = {"animal": "dog"} + array = numpy.array([value]) + with pytest.raises(TypeError): + Animal.encode(array) + + +def test_enum_encode_with_any_scalar_array(): + """Does not encode when called with unsupported types.""" + value = 1.5 + array = numpy.array(value) + with pytest.raises(TypeError): + Animal.encode(array) + + +def test_enum_encode_with_any_sequence(): + """Does not encode when called with unsupported types.""" + sequence = memoryview(b"DOG") + with pytest.raises(IndexError): + Animal.encode(sequence) diff --git a/openfisca_core/indexed_enums/tests/test_enum_array.py b/openfisca_core/indexed_enums/tests/test_enum_array.py new file mode 100644 index 000000000..1ab247468 --- /dev/null +++ b/openfisca_core/indexed_enums/tests/test_enum_array.py @@ -0,0 +1,30 @@ +import numpy +import pytest + +from openfisca_core import indexed_enums as enum + + +class Fruit(enum.Enum): + APPLE = b"apple" + BERRY = b"berry" + + +@pytest.fixture +def enum_array(): + return enum.EnumArray(numpy.array([1]), Fruit) + + +def test_enum_array_eq_operation(enum_array): + """The equality operation is permitted.""" + assert enum_array == enum.EnumArray(numpy.array([1]), Fruit) + + +def test_enum_array_ne_operation(enum_array): + """The non-equality operation is permitted.""" + assert enum_array != enum.EnumArray(numpy.array([0]), Fruit) + + +def test_enum_array_any_other_operation(enum_array): + """Only equality and non-equality operations are permitted.""" + with pytest.raises(TypeError, match="Forbidden operation."): + enum_array * 1 diff --git a/openfisca_core/indexed_enums/types.py b/openfisca_core/indexed_enums/types.py index d69eb098a..e0a71b322 100644 --- a/openfisca_core/indexed_enums/types.py +++ b/openfisca_core/indexed_enums/types.py @@ -1,3 +1,41 @@ -from openfisca_core.types import Array, DTypeEnum, Enum, EnumArray +from typing_extensions import TypeAlias -__all__ = ["Array", "DTypeEnum", "Enum", "EnumArray"] +from openfisca_core.types import Array, ArrayLike, DTypeLike, Enum, EnumArray, EnumType + +from enum import _EnumDict as EnumDict # noqa: PLC2701 + +from numpy import ( + bool_ as BoolDType, + generic as VarDType, + int32 as IntDType, + object_ as ObjDType, + str_ as StrDType, + uint8 as EnumDType, +) + +#: Type for enum indices arrays. +IndexArray: TypeAlias = Array[EnumDType] + +#: Type for boolean arrays. +BoolArray: TypeAlias = Array[BoolDType] + +#: Type for int arrays. +IntArray: TypeAlias = Array[IntDType] + +#: Type for str arrays. +StrArray: TypeAlias = Array[StrDType] + +#: Type for object arrays. +ObjArray: TypeAlias = Array[ObjDType] + +#: Type for generic arrays. +VarArray: TypeAlias = Array[VarDType] + +__all__ = [ + "ArrayLike", + "DTypeLike", + "Enum", + "EnumArray", + "EnumDict", + "EnumType", +] diff --git a/openfisca_core/simulations/simulation.py b/openfisca_core/simulations/simulation.py index c32fea22a..b7d20fa97 100644 --- a/openfisca_core/simulations/simulation.py +++ b/openfisca_core/simulations/simulation.py @@ -3,7 +3,11 @@ from collections.abc import Mapping from typing import NamedTuple -from openfisca_core.types import Population, TaxBenefitSystem, Variable +from openfisca_core.types import ( + CorePopulation as Population, + TaxBenefitSystem, + Variable, +) import tempfile import warnings diff --git a/openfisca_core/types.py b/openfisca_core/types.py index b922cde09..b79504c72 100644 --- a/openfisca_core/types.py +++ b/openfisca_core/types.py @@ -1,8 +1,8 @@ from __future__ import annotations from collections.abc import Iterable, Sequence, Sized -from numpy.typing import NDArray -from typing import Any, NewType, TypeVar, Union +from numpy.typing import DTypeLike, NDArray +from typing import NewType, TypeVar, Union from typing_extensions import Protocol, Self, TypeAlias import abc @@ -44,7 +44,7 @@ DTypeBytes: TypeAlias = numpy.bytes_ #: Type for Enum arrays. -DTypeEnum: TypeAlias = numpy.int16 +DTypeEnum: TypeAlias = numpy.uint8 #: Type for date arrays. DTypeDate: TypeAlias = numpy.datetime64 @@ -108,8 +108,15 @@ def plural(self, /) -> None | RolePlural: ... # Indexed enums -class Enum(enum.Enum, metaclass=enum.EnumMeta): +class EnumType(enum.EnumMeta): + indices: Array[DTypeEnum] + names: Array[DTypeStr] + enums: Array[DTypeObject] + + +class Enum(enum.Enum, metaclass=EnumType): index: int + _member_names_: list[str] class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): @@ -117,7 +124,7 @@ class EnumArray(Array[DTypeEnum], metaclass=abc.ABCMeta): @abc.abstractmethod def __new__( - cls, input_array: Array[DTypeEnum], possible_values: None | type[Enum] = ... + cls, input_array: Array[DTypeEnum], possible_values: type[Enum] ) -> Self: ... @@ -125,8 +132,8 @@ def __new__( class Holder(Protocol): - def clone(self, population: Any, /) -> Holder: ... - def get_memory_usage(self, /) -> Any: ... + def clone(self, population: CorePopulation, /) -> Holder: ... + def get_memory_usage(self, /) -> dict[str, object]: ... # Parameters @@ -186,27 +193,39 @@ def offset(self, offset: str | int, unit: None | DateUnit = None, /) -> Period: # Populations -class Population(Protocol): - entity: Any +class CorePopulation(Protocol): ... + + +class SinglePopulation(CorePopulation, Protocol): + entity: SingleEntity - def get_holder(self, variable_name: VariableName, /) -> Any: ... + def get_holder(self, variable_name: VariableName, /) -> Holder: ... + + +class GroupPopulation(CorePopulation, Protocol): ... # Simulations class Simulation(Protocol): - def calculate(self, variable_name: VariableName, period: Any, /) -> Any: ... - def calculate_add(self, variable_name: VariableName, period: Any, /) -> Any: ... - def calculate_divide(self, variable_name: VariableName, period: Any, /) -> Any: ... - def get_population(self, plural: None | str, /) -> Any: ... + def calculate( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def calculate_add( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def calculate_divide( + self, variable_name: VariableName, period: Period, / + ) -> Array[DTypeGeneric]: ... + def get_population(self, plural: None | str, /) -> CorePopulation: ... # Tax-Benefit systems class TaxBenefitSystem(Protocol): - person_entity: Any + person_entity: SingleEntity def get_variable( self, @@ -223,19 +242,22 @@ def get_variable( class Variable(Protocol): - entity: Any + entity: CoreEntity name: VariableName class Formula(Protocol): def __call__( self, - population: Population, + population: CorePopulation, instant: Instant, params: Params, /, - ) -> Array[Any]: ... + ) -> Array[DTypeGeneric]: ... class Params(Protocol): def __call__(self, instant: Instant, /) -> ParameterNodeAtInstant: ... + + +__all__ = ["DTypeLike"] diff --git a/openfisca_tasks/lint.mk b/openfisca_tasks/lint.mk index a3f5a8e45..f5fdbc7ce 100644 --- a/openfisca_tasks/lint.mk +++ b/openfisca_tasks/lint.mk @@ -21,6 +21,7 @@ lint-doc: \ lint-doc-commons \ lint-doc-data_storage \ lint-doc-entities \ + lint-doc-indexed_enums \ ; ## Run linters to check for syntax and style errors in the doc. diff --git a/openfisca_tasks/test_code.mk b/openfisca_tasks/test_code.mk index ed2d435ed..6a27f1b9c 100644 --- a/openfisca_tasks/test_code.mk +++ b/openfisca_tasks/test_code.mk @@ -35,11 +35,12 @@ test-code: test-core test-country test-extension ## Run openfisca-core tests. test-core: $(shell git ls-files "*test_*.py") @$(call print_help,$@:) - @python -m pytest --capture=no --xdoctest --xdoctest-verbose=0 \ + @python -m pytest --capture=no \ openfisca_core/commons \ openfisca_core/data_storage \ openfisca_core/entities \ openfisca_core/holders \ + openfisca_core/indexed_enums \ openfisca_core/periods \ openfisca_core/projectors @PYTEST_ADDOPTS="$${PYTEST_ADDOPTS} ${pytest_args}" \ diff --git a/setup.cfg b/setup.cfg index 9b8ce699b..60ac8faf0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,12 +11,25 @@ convention = google docstring_style = google extend-ignore = D -ignore = B019, E203, E501, F405, E701, E704, RST212, RST213, RST301, RST306, W503 +ignore = + B019, + E203, + E501, + F405, + E701, + E704, + RST210, + RST212, + RST213, + RST301, + RST306, + W503 in-place = true include-in-doctest = openfisca_core/commons openfisca_core/entities openfisca_core/holders + openfisca_core/indexed_enums openfisca_core/periods openfisca_core/projectors max-line-length = 88 @@ -37,7 +50,7 @@ disable = all enable = C0115, C0116, R0401 per-file-ignores = types.py:C0115,C0116 - /tests/:C0116 + /tests/:C0115,C0116 score = no [isort] diff --git a/setup.py b/setup.py index 202e5e449..d20cd6bb8 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ "PyYAML >=6.0, <7.0", "StrEnum >=0.4.8, <0.5.0", # 3.11.x backport "dpath >=2.1.4, <3.0", - "numexpr >=2.8.4, <3.0", + "numexpr >=2.10.1, <3.0", "numpy >=1.24.2, <2.0", "pendulum >=3.0.0, <4.0.0", "psutil >=5.9.4, <6.0", @@ -62,15 +62,14 @@ "pylint >=3.3.1, <4.0", "pylint-per-file-ignores >=1.3.2, <2.0", "pyright >=1.1.382, <2.0", - "ruff >=0.6.7, <1.0", + "ruff >=0.6.9, <1.0", "ruff-lsp >=0.0.57, <1.0", - "xdoctest >=1.2.0, <2.0", *api_requirements, ] setup( name="OpenFisca-Core", - version="42.0.7", + version="43.0.0", author="OpenFisca Team", author_email="contact@openfisca.org", classifiers=[ diff --git a/stubs/numexpr/__init__.pyi b/stubs/numexpr/__init__.pyi index f9ada73c3..931d47ddb 100644 --- a/stubs/numexpr/__init__.pyi +++ b/stubs/numexpr/__init__.pyi @@ -4,6 +4,7 @@ import numpy def evaluate( __ex: str, + /, *__args: object, **__kwargs: object, ) -> NDArray[numpy.bool_] | NDArray[numpy.int32] | NDArray[numpy.float32]: ... diff --git a/tests/core/test_simulation_builder.py b/tests/core/test_simulation_builder.py index b905b29b8..507d10e70 100644 --- a/tests/core/test_simulation_builder.py +++ b/tests/core/test_simulation_builder.py @@ -44,14 +44,17 @@ def __init__(self) -> None: @pytest.fixture def enum_variable(): + class _TestEnum(Enum): + foo = "bar" + class TestEnum(Variable): definition_period = DateUnit.ETERNITY value_type = Enum dtype = "O" - default_value = "0" + default_value = _TestEnum.foo is_neutralized = False set_input = None - possible_values = Enum("foo", "bar") + possible_values = _TestEnum name = "enum" def __init__(self) -> None: diff --git a/tests/core/test_tracers.py b/tests/core/test_tracers.py index 178b957ec..c9af9ecee 100644 --- a/tests/core/test_tracers.py +++ b/tests/core/test_tracers.py @@ -452,7 +452,7 @@ def test_log_aggregate_with_enum(tracer) -> None: assert ( lines[0] - == " A<2017> >> {'avg': EnumArray(HousingOccupancyStatus.tenant), 'max': EnumArray(HousingOccupancyStatus.tenant), 'min': EnumArray(HousingOccupancyStatus.tenant)}" + == " A<2017> >> {'avg': EnumArray([HousingOccupancyStatus.tenant]), 'max': EnumArray([HousingOccupancyStatus.tenant]), 'min': EnumArray([HousingOccupancyStatus.tenant])}" )