From dd03bed9c7f308e1e918fc0873d3869bbc3bfef4 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 02:06:38 +0100 Subject: [PATCH 01/13] perf: add pytest-benchmark --- setup.py | 1 + tests/core/test_big_dataset.py | 15 ++++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index b466b2407..491c63a2a 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ "pylint >=3.3.1, <4.0", "pylint-per-file-ignores >=1.3.2, <2.0", "pyright >=1.1.382, <2.0", + "pytest-benchmark >=5.1.0, <6.0", "ruff >=0.6.9, <1.0", "ruff-lsp >=0.0.57, <1.0", *api_requirements, diff --git a/tests/core/test_big_dataset.py b/tests/core/test_big_dataset.py index 677b483a6..2b6ac4e4c 100644 --- a/tests/core/test_big_dataset.py +++ b/tests/core/test_big_dataset.py @@ -1,6 +1,5 @@ from collections.abc import Iterable -from time import time from unittest import TestCase from openfisca_core.simulations import SimulationBuilder @@ -55,14 +54,8 @@ def run_simulation(tax_benefit_system) -> None: tc.assertAlmostEqual(total_taxes, sum(persons_salaries) * 0.17833333, delta=1) -def test_speed(tax_benefit_system): - elapsed = 0 - for _ in range(10): - start = time() +def test_speed(tax_benefit_system, benchmark) -> None: + def run() -> None: run_simulation(tax_benefit_system) - end = time() - elapsed += end - start - elapsed_mean = elapsed / 10 - # print(f"Mean elapsed time: {elapsed_mean:.2f} seconds") - # Expected time is less than 0.3 seconds on a AMD Threadripper 1950X - tc.assertLess(elapsed_mean, 0.3) + result = benchmark.pedantic(run, iterations=1, rounds=10) + assert not result From e7e4e77271fd260d2f1bd00a7c50ce1ef3129a17 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 02:25:25 +0100 Subject: [PATCH 02/13] revert: Enum to v42.0.0 --- ...ac0111829c4cd62c11ce5_20241120_015637.json | 78 +++++ openfisca_core/indexed_enums/enum.py | 267 +++++------------- .../indexed_enums/tests/test_enum.py | 14 + setup.cfg | 6 +- tests/core/test_big_dataset.py | 1 + 5 files changed, 164 insertions(+), 202 deletions(-) create mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json new file mode 100644 index 000000000..1bf1cf009 --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json @@ -0,0 +1,78 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "25aaa479b0743330431ac0111829c4cd62c11ce5", + "time": "2024-11-20T02:56:24+01:00", + "author_time": "2024-11-20T02:25:25+01:00", + "dirty": false, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": null, + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 1.5187502140179277e-06, + "max": 1.8091697711497545e-06, + "mean": 1.5324318880448118e-06, + "stddev": 2.427385849594817e-08, + "rounds": 1000, + "median": 1.5245797112584115e-06, + "iqr": 5.215406417846595e-09, + "q1": 1.522909733466804e-06, + "q3": 1.5281251398846506e-06, + "iqr_outliers": 189, + "stddev_outliers": 79, + "outliers": "79;189", + "ld15iqr": 1.5187502140179277e-06, + "hd15iqr": 1.5362497651949525e-06, + "ops": 652557.5510412231, + "total": 0.0015324318880448114, + "iterations": 100 + } + } + ], + "datetime": "2024-11-20T01:56:38.051253+00:00", + "version": "5.1.0" +} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index a733fd5da..ec1afa45a 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,120 +1,23 @@ from __future__ import annotations -from collections.abc import Sequence +import enum import numpy -from . import types as t -from ._enum_type import EnumType -from ._errors import EnumEncodingError, EnumMemberNotFoundError -from ._guards import ( - _is_enum_array, - _is_enum_array_like, - _is_int_array, - _is_int_array_like, - _is_str_array, - _is_str_array_like, -) -from ._utils import _enum_to_index, _int_to_index, _str_to_index +from .config import ENUM_ARRAY_DTYPE from .enum_array import EnumArray -class Enum(t.Enum, metaclass=EnumType): - """Enum based on `enum34 `_. - - Its items have an :class:`int` index, useful and performant when running - :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. - - Examples: - >>> from openfisca_core import indexed_enums as enum - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - ... FREE_LODGER = "Free lodger" - ... HOMELESS = "Homeless" - - >>> repr(Housing) - "" - - >>> repr(Housing.TENANT) - 'Housing.TENANT' - - >>> str(Housing.TENANT) - 'Housing.TENANT' - - >>> dict([(Housing.TENANT, Housing.TENANT.value)]) - {Housing.TENANT: 'Tenant'} - - >>> list(Housing) - [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] - - >>> Housing["TENANT"] - Housing.TENANT - - >>> Housing("Tenant") - Housing.TENANT - - >>> Housing.TENANT in Housing - True - - >>> len(Housing) - 4 - - >>> Housing.TENANT == Housing.TENANT - True - - >>> Housing.TENANT != Housing.TENANT - False - - >>> Housing.TENANT.index - 1 - - >>> Housing.TENANT.name - 'TENANT' - - >>> Housing.TENANT.value - 'Tenant' - +class Enum(enum.Enum): + """Enum based on `enum34 `_, whose items + have an index. """ - #: The :attr:`index` of the :class:`.Enum` member. - index: int - - def __init__(self, *__args: object, **__kwargs: object) -> None: - """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. - - When the enum is initialised, ``_member_names_`` contains the names of - the already initialized items, so its length is the index of this item. - - Args: - *__args: Positional arguments. - **__kwargs: Keyword arguments. - - Examples: - >>> import numpy - - >>> from openfisca_core import indexed_enums as enum - - >>> Housing = enum.Enum("Housing", "owner tenant") - >>> Housing.tenant.index - 1 - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - - >>> Housing.TENANT.index - 1 - - >>> array = numpy.array([[1, 2], [3, 4]]) - >>> array[Housing.TENANT.index] - array([3, 4]) - - Note: - ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. - - """ + # Tweak enums to add an index attribute to each enum item + def __init__(self, name: str) -> None: + # When the enum item is initialized, self._member_names_ contains the + # names of the previously initialized items, so its length is the index + # of this item. self.index = len(self._member_names_) # Bypass the slow Enum.__eq__ @@ -124,102 +27,64 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: # hashable. __hash__ = object.__hash__ - def __repr__(self) -> str: - return f"{self.__class__.__name__}.{self.name}" - @classmethod - def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: - """Encode an encodable array into an :class:`.EnumArray`. - - Args: - array: :class:`~numpy.ndarray` to encode. - - Returns: - EnumArray: An :class:`.EnumArray` with the encoded input values. - - Examples: - >>> import numpy - - >>> from openfisca_core import indexed_enums as enum - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - - # EnumArray - - >>> array = numpy.array([1]) - >>> enum_array = enum.EnumArray(array, Housing) - >>> Housing.encode(enum_array) - EnumArray([Housing.TENANT]) - - # Array of Enum - - >>> array = numpy.array([Housing.TENANT]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of integers - - >>> array = numpy.array([1]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of strings - - >>> array = numpy.array(["TENANT"]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of bytes - - >>> array = numpy.array([b"TENANT"]) - >>> enum_array = Housing.encode(array) - Traceback (most recent call last): - EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... - - .. seealso:: - :meth:`.EnumArray.decode` for decoding. - + def encode( + cls, + array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, + ) -> EnumArray: + """Encode a string numpy array, an enum item numpy array, or an int numpy + array into an :any:`EnumArray`. See :any:`EnumArray.decode` for + decoding. + + :param numpy.ndarray array: Array of string identifiers, or of enum + items, to encode. + + :returns: An :any:`EnumArray` encoding the input array values. + :rtype: :any:`EnumArray` + + For instance: + + >>> string_identifier_array = asarray(["free_lodger", "owner"]) + >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) + >>> encoded_array[0] + 2 # Encoded value + + >>> free_lodger = HousingOccupancyStatus.free_lodger + >>> owner = HousingOccupancyStatus.owner + >>> enum_item_array = asarray([free_lodger, owner]) + >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) + >>> encoded_array[0] + 2 # Encoded value """ if isinstance(array, EnumArray): return array - if len(array) == 0: - return EnumArray(numpy.asarray(array, t.EnumDType), cls) - if isinstance(array, Sequence): - return cls._encode_array_like(array) - return cls._encode_array(array) - @classmethod - def _encode_array(cls, value: t.VarArray) -> t.EnumArray: - if _is_int_array(value): - indices = _int_to_index(cls, value) - elif _is_str_array(value): # type: ignore[unreachable] - indices = _str_to_index(cls, value) - elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: - indices = _enum_to_index(value) - else: - raise EnumEncodingError(cls, value) - if indices.size != len(value): - raise EnumMemberNotFoundError(cls) - return EnumArray(indices, cls) - - @classmethod - def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: - if _is_int_array_like(value): - indices = _int_to_index(cls, value) - elif _is_str_array_like(value): # type: ignore[unreachable] - indices = _str_to_index(cls, value) - elif _is_enum_array_like(value): - indices = _enum_to_index(value) - else: - raise EnumEncodingError(cls, value) - if indices.size != len(value): - raise EnumMemberNotFoundError(cls) - return EnumArray(indices, cls) - - -__all__ = ["Enum"] + # String array + if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: + array = numpy.select( + [array == item.name for item in cls], + [item.index for item in cls], + ).astype(ENUM_ARRAY_DTYPE) + + # Enum items arrays + elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": + # Ensure we are comparing the comparable. The problem this fixes: + # On entering this method "cls" will generally come from + # variable.possible_values, while the array values may come from + # directly importing a module containing an Enum class. However, + # variables (and hence their possible_values) are loaded by a call + # to load_module, which gives them a different identity from the + # ones imported in the usual way. + # + # So, instead of relying on the "cls" passed in, we use only its + # name to check that the values in the array, if non-empty, are of + # the right type. + if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: + cls = array[0].__class__ + + array = numpy.select( + [array == item for item in cls], + [item.index for item in cls], + ).astype(ENUM_ARRAY_DTYPE) + + return EnumArray(array, cls) diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 2e49c1742..aee2e8b39 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -133,3 +133,17 @@ def test_enum_encode_with_any_sequence(): sequence = memoryview(b"DOG") with pytest.raises(IndexError): Animal.encode(sequence) + + +# Benchmarking + + +def test_benchmark_enum_eq(benchmark): + """Benchmark the `__eq__` method.""" + + def test(): + for animal in Animal: + for colour in Colour: + assert animal != colour + + benchmark.pedantic(test, iterations=100, rounds=1000) diff --git a/setup.cfg b/setup.cfg index e6b37ba7e..32a3f962a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -72,7 +72,11 @@ py_version = 39 sections = FUTURE, TYPING, TYPES, STDLIB, THIRDPARTY, OPENFISCA, FIRSTPARTY, LOCALFOLDER [tool:pytest] -addopts = --disable-pytest-warnings --doctest-modules --showlocals +addopts = + --benchmark-skip + --disable-pytest-warnings + --doctest-modules + --showlocals doctest_optionflags = ELLIPSIS IGNORE_EXCEPTION_DETAIL NUMBER NORMALIZE_WHITESPACE python_files = **/*.py testpaths = tests diff --git a/tests/core/test_big_dataset.py b/tests/core/test_big_dataset.py index 2b6ac4e4c..020894438 100644 --- a/tests/core/test_big_dataset.py +++ b/tests/core/test_big_dataset.py @@ -57,5 +57,6 @@ def run_simulation(tax_benefit_system) -> None: def test_speed(tax_benefit_system, benchmark) -> None: def run() -> None: run_simulation(tax_benefit_system) + result = benchmark.pedantic(run, iterations=1, rounds=10) assert not result From 5dec58b6978faeca02d3b68bb8ca62cfb9a543ba Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 03:00:29 +0100 Subject: [PATCH 03/13] revert: Enum to v43.0.0 --- openfisca_core/indexed_enums/enum.py | 289 ++++++++++++++++++++------- 1 file changed, 218 insertions(+), 71 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index ec1afa45a..d116a56ba 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,90 +1,237 @@ from __future__ import annotations -import enum +from collections.abc import Sequence import numpy -from .config import ENUM_ARRAY_DTYPE +from . import types as t +from ._enum_type import EnumType +from ._errors import EnumEncodingError, EnumMemberNotFoundError +from ._guards import ( + _is_enum_array, + _is_enum_array_like, + _is_int_array, + _is_int_array_like, + _is_str_array, + _is_str_array_like, +) +from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray -class Enum(enum.Enum): - """Enum based on `enum34 `_, whose items - have an index. +class Enum(t.Enum, metaclass=EnumType): + """Enum based on `enum34 `_. + + Its items have an :class:`int` index, useful and performant when running + :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> repr(Housing) + "" + + >>> repr(Housing.TENANT) + 'Housing.TENANT' + + >>> str(Housing.TENANT) + 'Housing.TENANT' + + >>> dict([(Housing.TENANT, Housing.TENANT.value)]) + {Housing.TENANT: 'Tenant'} + + >>> list(Housing) + [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] + + >>> Housing["TENANT"] + Housing.TENANT + + >>> Housing("Tenant") + Housing.TENANT + + >>> Housing.TENANT in Housing + True + + >>> len(Housing) + 4 + + >>> Housing.TENANT == Housing.TENANT + True + + >>> Housing.TENANT != Housing.TENANT + False + + >>> Housing.TENANT.index + 1 + + >>> Housing.TENANT.name + 'TENANT' + + >>> Housing.TENANT.value + 'Tenant' + """ - # Tweak enums to add an index attribute to each enum item - def __init__(self, name: str) -> None: - # When the enum item is initialized, self._member_names_ contains the - # names of the previously initialized items, so its length is the index - # of this item. + #: The :attr:`index` of the :class:`.Enum` member. + index: int + + def __init__(self, *__args: object, **__kwargs: object) -> None: + """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. + + When the enum is initialised, ``_member_names_`` contains the names of + the already initialized items, so its length is the index of this item. + + Args: + *__args: Positional arguments. + **__kwargs: Keyword arguments. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Housing = enum.Enum("Housing", "owner tenant") + >>> Housing.tenant.index + 1 + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.TENANT.index + 1 + + >>> array = numpy.array([[1, 2], [3, 4]]) + >>> array[Housing.TENANT.index] + array([3, 4]) + + Note: + ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. + + """ self.index = len(self._member_names_) - # Bypass the slow Enum.__eq__ - __eq__ = object.__eq__ + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" - # In Python 3, __hash__ must be defined if __eq__ is defined to stay - # hashable. - __hash__ = object.__hash__ + def __hash__(self) -> int: + return object.__hash__(self.__class__.__name__ + self.name) + + def __eq__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index == other.index + return NotImplemented + + def __ne__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index != other.index + return NotImplemented @classmethod - def encode( - cls, - array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, - ) -> EnumArray: - """Encode a string numpy array, an enum item numpy array, or an int numpy - array into an :any:`EnumArray`. See :any:`EnumArray.decode` for - decoding. - - :param numpy.ndarray array: Array of string identifiers, or of enum - items, to encode. - - :returns: An :any:`EnumArray` encoding the input array values. - :rtype: :any:`EnumArray` - - For instance: - - >>> string_identifier_array = asarray(["free_lodger", "owner"]) - >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) - >>> encoded_array[0] - 2 # Encoded value - - >>> free_lodger = HousingOccupancyStatus.free_lodger - >>> owner = HousingOccupancyStatus.owner - >>> enum_item_array = asarray([free_lodger, owner]) - >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) - >>> encoded_array[0] - 2 # Encoded value + def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: + """Encode an encodable array into an :class:`.EnumArray`. + + Args: + array: :class:`~numpy.ndarray` to encode. + + Returns: + EnumArray: An :class:`.EnumArray` with the encoded input values. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + # EnumArray + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> Housing.encode(enum_array) + EnumArray([Housing.TENANT]) + + # Array of Enum + + >>> array = numpy.array([Housing.TENANT]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of integers + + >>> array = numpy.array([1]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of strings + + >>> array = numpy.array(["TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of bytes + + >>> array = numpy.array([b"TENANT"]) + >>> enum_array = Housing.encode(array) + Traceback (most recent call last): + EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... + + .. seealso:: + :meth:`.EnumArray.decode` for decoding. + """ if isinstance(array, EnumArray): return array + if len(array) == 0: + return EnumArray(numpy.asarray(array, t.EnumDType), cls) + if isinstance(array, Sequence): + return cls._encode_array_like(array) + return cls._encode_array(array) - # String array - if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: - array = numpy.select( - [array == item.name for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - # Enum items arrays - elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: - cls = array[0].__class__ - - array = numpy.select( - [array == item for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - return EnumArray(array, cls) + @classmethod + def _encode_array(cls, value: t.VarArray) -> t.EnumArray: + if _is_int_array(value): + indices = _int_to_index(cls, value) + elif _is_str_array(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + @classmethod + def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: + if _is_int_array_like(value): + indices = _int_to_index(cls, value) + elif _is_str_array_like(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array_like(value): + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + +__all__ = ["Enum"] From d5060fc80a79f6e24ab6a06b757d01ceab1e1b30 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 03:41:25 +0100 Subject: [PATCH 04/13] revert: Enum to v42.0.0 --- ...ac0111829c4cd62c11ce5_20241120_015637.json | 78 ----- ...9622680b51da6758aa1a3_20241120_024209.json | 113 +++++++ openfisca_core/indexed_enums/enum.py | 289 +++++------------- .../indexed_enums/tests/test_enum.py | 13 +- 4 files changed, 196 insertions(+), 297 deletions(-) delete mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json create mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json deleted file mode 100644 index 1bf1cf009..000000000 --- a/.benchmarks/Darwin-CPython-3.11-64bit/0001_25aaa479b0743330431ac0111829c4cd62c11ce5_20241120_015637.json +++ /dev/null @@ -1,78 +0,0 @@ -{ - "machine_info": { - "node": "diotimac2.local", - "processor": "arm", - "machine": "arm64", - "python_compiler": "Clang 18.1.8 ", - "python_implementation": "CPython", - "python_implementation_version": "3.11.10", - "python_version": "3.11.10", - "python_build": [ - "main", - "Oct 1 2024 00:26:49" - ], - "release": "23.4.0", - "system": "Darwin", - "cpu": { - "python_version": "3.11.10.final.0 (64 bit)", - "cpuinfo_version": [ - 9, - 0, - 0 - ], - "cpuinfo_version_string": "9.0.0", - "arch": "ARM_8", - "bits": 64, - "count": 8, - "arch_string_raw": "arm64", - "brand_raw": "Apple M1" - } - }, - "commit_info": { - "id": "25aaa479b0743330431ac0111829c4cd62c11ce5", - "time": "2024-11-20T02:56:24+01:00", - "author_time": "2024-11-20T02:25:25+01:00", - "dirty": false, - "project": "openfisca-core", - "branch": "perf/add-benchmark-to-perf-test" - }, - "benchmarks": [ - { - "group": null, - "name": "test_benchmark_enum_eq", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 1.5187502140179277e-06, - "max": 1.8091697711497545e-06, - "mean": 1.5324318880448118e-06, - "stddev": 2.427385849594817e-08, - "rounds": 1000, - "median": 1.5245797112584115e-06, - "iqr": 5.215406417846595e-09, - "q1": 1.522909733466804e-06, - "q3": 1.5281251398846506e-06, - "iqr_outliers": 189, - "stddev_outliers": 79, - "outliers": "79;189", - "ld15iqr": 1.5187502140179277e-06, - "hd15iqr": 1.5362497651949525e-06, - "ops": 652557.5510412231, - "total": 0.0015324318880448114, - "iterations": 100 - } - } - ], - "datetime": "2024-11-20T01:56:38.051253+00:00", - "version": "5.1.0" -} \ No newline at end of file diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json new file mode 100644 index 000000000..22a52e83e --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json @@ -0,0 +1,113 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "8e9da042ae83a00d3d49622680b51da6758aa1a3", + "time": "2024-11-20T03:41:52+01:00", + "author_time": "2024-11-20T03:41:25+01:00", + "dirty": false, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": null, + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 1.4624965842813253e-06, + "max": 5.641701864078641e-06, + "mean": 1.530727793695405e-06, + "stddev": 1.3133383248007482e-07, + "rounds": 50000, + "median": 1.5124969650059938e-06, + "iqr": 1.6595004126429473e-08, + "q1": 1.5042023733258247e-06, + "q3": 1.5207973774522542e-06, + "iqr_outliers": 4656, + "stddev_outliers": 956, + "outliers": "956;4656", + "ld15iqr": 1.4833000022917987e-06, + "hd15iqr": 1.5457975678145885e-06, + "ops": 653284.0156941818, + "total": 0.07653638968476802, + "iterations": 10 + } + }, + { + "group": null, + "name": "test_benchmark_enum_encode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 3.5170797491446135e-05, + "max": 0.0013176416046917438, + "mean": 3.781159137981013e-05, + "stddev": 1.4478194055223582e-05, + "rounds": 50000, + "median": 3.6212499253451824e-05, + "iqr": 7.58402165956802e-07, + "q1": 3.602079814299941e-05, + "q3": 3.677920030895621e-05, + "iqr_outliers": 4857, + "stddev_outliers": 870, + "outliers": "870;4857", + "ld15iqr": 3.5170797491446135e-05, + "hd15iqr": 3.792079514823854e-05, + "ops": 26446.916501217667, + "total": 1.890579568990506, + "iterations": 10 + } + } + ], + "datetime": "2024-11-20T02:42:30.223904+00:00", + "version": "5.1.0" +} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index d116a56ba..ec1afa45a 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,237 +1,90 @@ from __future__ import annotations -from collections.abc import Sequence +import enum import numpy -from . import types as t -from ._enum_type import EnumType -from ._errors import EnumEncodingError, EnumMemberNotFoundError -from ._guards import ( - _is_enum_array, - _is_enum_array_like, - _is_int_array, - _is_int_array_like, - _is_str_array, - _is_str_array_like, -) -from ._utils import _enum_to_index, _int_to_index, _str_to_index +from .config import ENUM_ARRAY_DTYPE from .enum_array import EnumArray -class Enum(t.Enum, metaclass=EnumType): - """Enum based on `enum34 `_. - - Its items have an :class:`int` index, useful and performant when running - :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. - - Examples: - >>> from openfisca_core import indexed_enums as enum - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - ... FREE_LODGER = "Free lodger" - ... HOMELESS = "Homeless" - - >>> repr(Housing) - "" - - >>> repr(Housing.TENANT) - 'Housing.TENANT' - - >>> str(Housing.TENANT) - 'Housing.TENANT' - - >>> dict([(Housing.TENANT, Housing.TENANT.value)]) - {Housing.TENANT: 'Tenant'} - - >>> list(Housing) - [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] - - >>> Housing["TENANT"] - Housing.TENANT - - >>> Housing("Tenant") - Housing.TENANT - - >>> Housing.TENANT in Housing - True - - >>> len(Housing) - 4 - - >>> Housing.TENANT == Housing.TENANT - True - - >>> Housing.TENANT != Housing.TENANT - False - - >>> Housing.TENANT.index - 1 - - >>> Housing.TENANT.name - 'TENANT' - - >>> Housing.TENANT.value - 'Tenant' - +class Enum(enum.Enum): + """Enum based on `enum34 `_, whose items + have an index. """ - #: The :attr:`index` of the :class:`.Enum` member. - index: int - - def __init__(self, *__args: object, **__kwargs: object) -> None: - """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. - - When the enum is initialised, ``_member_names_`` contains the names of - the already initialized items, so its length is the index of this item. - - Args: - *__args: Positional arguments. - **__kwargs: Keyword arguments. - - Examples: - >>> import numpy - - >>> from openfisca_core import indexed_enums as enum - - >>> Housing = enum.Enum("Housing", "owner tenant") - >>> Housing.tenant.index - 1 - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - - >>> Housing.TENANT.index - 1 - - >>> array = numpy.array([[1, 2], [3, 4]]) - >>> array[Housing.TENANT.index] - array([3, 4]) - - Note: - ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. - - """ + # Tweak enums to add an index attribute to each enum item + def __init__(self, name: str) -> None: + # When the enum item is initialized, self._member_names_ contains the + # names of the previously initialized items, so its length is the index + # of this item. self.index = len(self._member_names_) - def __repr__(self) -> str: - return f"{self.__class__.__name__}.{self.name}" + # Bypass the slow Enum.__eq__ + __eq__ = object.__eq__ - def __hash__(self) -> int: - return object.__hash__(self.__class__.__name__ + self.name) - - def __eq__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index == other.index - return NotImplemented - - def __ne__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index != other.index - return NotImplemented + # In Python 3, __hash__ must be defined if __eq__ is defined to stay + # hashable. + __hash__ = object.__hash__ @classmethod - def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: - """Encode an encodable array into an :class:`.EnumArray`. - - Args: - array: :class:`~numpy.ndarray` to encode. - - Returns: - EnumArray: An :class:`.EnumArray` with the encoded input values. - - Examples: - >>> import numpy - - >>> from openfisca_core import indexed_enums as enum - - >>> class Housing(enum.Enum): - ... OWNER = "Owner" - ... TENANT = "Tenant" - - # EnumArray - - >>> array = numpy.array([1]) - >>> enum_array = enum.EnumArray(array, Housing) - >>> Housing.encode(enum_array) - EnumArray([Housing.TENANT]) - - # Array of Enum - - >>> array = numpy.array([Housing.TENANT]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of integers - - >>> array = numpy.array([1]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of strings - - >>> array = numpy.array(["TENANT"]) - >>> enum_array = Housing.encode(array) - >>> enum_array == Housing.TENANT - array([ True]) - - # Array of bytes - - >>> array = numpy.array([b"TENANT"]) - >>> enum_array = Housing.encode(array) - Traceback (most recent call last): - EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... - - .. seealso:: - :meth:`.EnumArray.decode` for decoding. - + def encode( + cls, + array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, + ) -> EnumArray: + """Encode a string numpy array, an enum item numpy array, or an int numpy + array into an :any:`EnumArray`. See :any:`EnumArray.decode` for + decoding. + + :param numpy.ndarray array: Array of string identifiers, or of enum + items, to encode. + + :returns: An :any:`EnumArray` encoding the input array values. + :rtype: :any:`EnumArray` + + For instance: + + >>> string_identifier_array = asarray(["free_lodger", "owner"]) + >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) + >>> encoded_array[0] + 2 # Encoded value + + >>> free_lodger = HousingOccupancyStatus.free_lodger + >>> owner = HousingOccupancyStatus.owner + >>> enum_item_array = asarray([free_lodger, owner]) + >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) + >>> encoded_array[0] + 2 # Encoded value """ if isinstance(array, EnumArray): return array - if len(array) == 0: - return EnumArray(numpy.asarray(array, t.EnumDType), cls) - if isinstance(array, Sequence): - return cls._encode_array_like(array) - return cls._encode_array(array) - @classmethod - def _encode_array(cls, value: t.VarArray) -> t.EnumArray: - if _is_int_array(value): - indices = _int_to_index(cls, value) - elif _is_str_array(value): # type: ignore[unreachable] - indices = _str_to_index(cls, value) - elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: - indices = _enum_to_index(value) - else: - raise EnumEncodingError(cls, value) - if indices.size != len(value): - raise EnumMemberNotFoundError(cls) - return EnumArray(indices, cls) - - @classmethod - def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: - if _is_int_array_like(value): - indices = _int_to_index(cls, value) - elif _is_str_array_like(value): # type: ignore[unreachable] - indices = _str_to_index(cls, value) - elif _is_enum_array_like(value): - indices = _enum_to_index(value) - else: - raise EnumEncodingError(cls, value) - if indices.size != len(value): - raise EnumMemberNotFoundError(cls) - return EnumArray(indices, cls) - - -__all__ = ["Enum"] + # String array + if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: + array = numpy.select( + [array == item.name for item in cls], + [item.index for item in cls], + ).astype(ENUM_ARRAY_DTYPE) + + # Enum items arrays + elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": + # Ensure we are comparing the comparable. The problem this fixes: + # On entering this method "cls" will generally come from + # variable.possible_values, while the array values may come from + # directly importing a module containing an Enum class. However, + # variables (and hence their possible_values) are loaded by a call + # to load_module, which gives them a different identity from the + # ones imported in the usual way. + # + # So, instead of relying on the "cls" passed in, we use only its + # name to check that the values in the array, if non-empty, are of + # the right type. + if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: + cls = array[0].__class__ + + array = numpy.select( + [array == item for item in cls], + [item.index for item in cls], + ).astype(ENUM_ARRAY_DTYPE) + + return EnumArray(array, cls) diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index aee2e8b39..7966269e1 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -146,4 +146,15 @@ def test(): for colour in Colour: assert animal != colour - benchmark.pedantic(test, iterations=100, rounds=1000) + benchmark.pedantic(test, iterations=10, rounds=50000) + + +def test_benchmark_enum_encode(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.array([Animal.DOG, Animal.CAT, Animal.DOG]) + + def test(): + enum_array = Animal.encode(array) + assert_array_equal(enum_array, numpy.array([1, 0, 1])) + + benchmark.pedantic(test, iterations=10, rounds=50000) From 82f0055fc648633f86aaccfe3df383ca51101743 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 03:44:19 +0100 Subject: [PATCH 05/13] revert: Enum to v43.2.2 --- openfisca_core/indexed_enums/enum.py | 289 ++++++++++++++++++++------- 1 file changed, 218 insertions(+), 71 deletions(-) diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index ec1afa45a..d116a56ba 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -1,90 +1,237 @@ from __future__ import annotations -import enum +from collections.abc import Sequence import numpy -from .config import ENUM_ARRAY_DTYPE +from . import types as t +from ._enum_type import EnumType +from ._errors import EnumEncodingError, EnumMemberNotFoundError +from ._guards import ( + _is_enum_array, + _is_enum_array_like, + _is_int_array, + _is_int_array_like, + _is_str_array, + _is_str_array_like, +) +from ._utils import _enum_to_index, _int_to_index, _str_to_index from .enum_array import EnumArray -class Enum(enum.Enum): - """Enum based on `enum34 `_, whose items - have an index. +class Enum(t.Enum, metaclass=EnumType): + """Enum based on `enum34 `_. + + Its items have an :class:`int` index, useful and performant when running + :mod:`~openfisca_core.simulations` on large :mod:`~openfisca_core.populations`. + + Examples: + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + ... FREE_LODGER = "Free lodger" + ... HOMELESS = "Homeless" + + >>> repr(Housing) + "" + + >>> repr(Housing.TENANT) + 'Housing.TENANT' + + >>> str(Housing.TENANT) + 'Housing.TENANT' + + >>> dict([(Housing.TENANT, Housing.TENANT.value)]) + {Housing.TENANT: 'Tenant'} + + >>> list(Housing) + [Housing.OWNER, Housing.TENANT, Housing.FREE_LODGER, Housing.HOMELESS] + + >>> Housing["TENANT"] + Housing.TENANT + + >>> Housing("Tenant") + Housing.TENANT + + >>> Housing.TENANT in Housing + True + + >>> len(Housing) + 4 + + >>> Housing.TENANT == Housing.TENANT + True + + >>> Housing.TENANT != Housing.TENANT + False + + >>> Housing.TENANT.index + 1 + + >>> Housing.TENANT.name + 'TENANT' + + >>> Housing.TENANT.value + 'Tenant' + """ - # Tweak enums to add an index attribute to each enum item - def __init__(self, name: str) -> None: - # When the enum item is initialized, self._member_names_ contains the - # names of the previously initialized items, so its length is the index - # of this item. + #: The :attr:`index` of the :class:`.Enum` member. + index: int + + def __init__(self, *__args: object, **__kwargs: object) -> None: + """Tweak :class:`enum.Enum` to add an :attr:`.index` to each enum item. + + When the enum is initialised, ``_member_names_`` contains the names of + the already initialized items, so its length is the index of this item. + + Args: + *__args: Positional arguments. + **__kwargs: Keyword arguments. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> Housing = enum.Enum("Housing", "owner tenant") + >>> Housing.tenant.index + 1 + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + >>> Housing.TENANT.index + 1 + + >>> array = numpy.array([[1, 2], [3, 4]]) + >>> array[Housing.TENANT.index] + array([3, 4]) + + Note: + ``_member_names_`` is undocumented in upstream :class:`enum.Enum`. + + """ self.index = len(self._member_names_) - # Bypass the slow Enum.__eq__ - __eq__ = object.__eq__ + def __repr__(self) -> str: + return f"{self.__class__.__name__}.{self.name}" - # In Python 3, __hash__ must be defined if __eq__ is defined to stay - # hashable. - __hash__ = object.__hash__ + def __hash__(self) -> int: + return object.__hash__(self.__class__.__name__ + self.name) + + def __eq__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index == other.index + return NotImplemented + + def __ne__(self, other: object) -> bool: + if ( + isinstance(other, Enum) + and self.__class__.__name__ == other.__class__.__name__ + ): + return self.index != other.index + return NotImplemented @classmethod - def encode( - cls, - array: EnumArray | numpy.int32 | numpy.float32 | numpy.object_, - ) -> EnumArray: - """Encode a string numpy array, an enum item numpy array, or an int numpy - array into an :any:`EnumArray`. See :any:`EnumArray.decode` for - decoding. - - :param numpy.ndarray array: Array of string identifiers, or of enum - items, to encode. - - :returns: An :any:`EnumArray` encoding the input array values. - :rtype: :any:`EnumArray` - - For instance: - - >>> string_identifier_array = asarray(["free_lodger", "owner"]) - >>> encoded_array = HousingOccupancyStatus.encode(string_identifier_array) - >>> encoded_array[0] - 2 # Encoded value - - >>> free_lodger = HousingOccupancyStatus.free_lodger - >>> owner = HousingOccupancyStatus.owner - >>> enum_item_array = asarray([free_lodger, owner]) - >>> encoded_array = HousingOccupancyStatus.encode(enum_item_array) - >>> encoded_array[0] - 2 # Encoded value + def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: + """Encode an encodable array into an :class:`.EnumArray`. + + Args: + array: :class:`~numpy.ndarray` to encode. + + Returns: + EnumArray: An :class:`.EnumArray` with the encoded input values. + + Examples: + >>> import numpy + + >>> from openfisca_core import indexed_enums as enum + + >>> class Housing(enum.Enum): + ... OWNER = "Owner" + ... TENANT = "Tenant" + + # EnumArray + + >>> array = numpy.array([1]) + >>> enum_array = enum.EnumArray(array, Housing) + >>> Housing.encode(enum_array) + EnumArray([Housing.TENANT]) + + # Array of Enum + + >>> array = numpy.array([Housing.TENANT]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of integers + + >>> array = numpy.array([1]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of strings + + >>> array = numpy.array(["TENANT"]) + >>> enum_array = Housing.encode(array) + >>> enum_array == Housing.TENANT + array([ True]) + + # Array of bytes + + >>> array = numpy.array([b"TENANT"]) + >>> enum_array = Housing.encode(array) + Traceback (most recent call last): + EnumEncodingError: Failed to encode "[b'TENANT']" of type 'bytes... + + .. seealso:: + :meth:`.EnumArray.decode` for decoding. + """ if isinstance(array, EnumArray): return array + if len(array) == 0: + return EnumArray(numpy.asarray(array, t.EnumDType), cls) + if isinstance(array, Sequence): + return cls._encode_array_like(array) + return cls._encode_array(array) - # String array - if isinstance(array, numpy.ndarray) and array.dtype.kind in {"U", "S"}: - array = numpy.select( - [array == item.name for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - # Enum items arrays - elif isinstance(array, numpy.ndarray) and array.dtype.kind == "O": - # Ensure we are comparing the comparable. The problem this fixes: - # On entering this method "cls" will generally come from - # variable.possible_values, while the array values may come from - # directly importing a module containing an Enum class. However, - # variables (and hence their possible_values) are loaded by a call - # to load_module, which gives them a different identity from the - # ones imported in the usual way. - # - # So, instead of relying on the "cls" passed in, we use only its - # name to check that the values in the array, if non-empty, are of - # the right type. - if len(array) > 0 and cls.__name__ is array[0].__class__.__name__: - cls = array[0].__class__ - - array = numpy.select( - [array == item for item in cls], - [item.index for item in cls], - ).astype(ENUM_ARRAY_DTYPE) - - return EnumArray(array, cls) + @classmethod + def _encode_array(cls, value: t.VarArray) -> t.EnumArray: + if _is_int_array(value): + indices = _int_to_index(cls, value) + elif _is_str_array(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + @classmethod + def _encode_array_like(cls, value: t.ArrayLike[object]) -> t.EnumArray: + if _is_int_array_like(value): + indices = _int_to_index(cls, value) + elif _is_str_array_like(value): # type: ignore[unreachable] + indices = _str_to_index(cls, value) + elif _is_enum_array_like(value): + indices = _enum_to_index(value) + else: + raise EnumEncodingError(cls, value) + if indices.size != len(value): + raise EnumMemberNotFoundError(cls) + return EnumArray(indices, cls) + + +__all__ = ["Enum"] From 5fc125d1888f0f60de9eae84116846f5068df69e Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 03:44:19 +0100 Subject: [PATCH 06/13] revert: Enum to v43.2.2 --- ...accfe3df383ca51101743_20241120_025231.json | 113 ++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json b/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json new file mode 100644 index 000000000..63be1d537 --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json @@ -0,0 +1,113 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "82f0055fc648633f86aaccfe3df383ca51101743", + "time": "2024-11-20T03:44:19+01:00", + "author_time": "2024-11-20T03:44:19+01:00", + "dirty": false, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": null, + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 3.875000402331352e-06, + "max": 1.7379102064296604e-05, + "mean": 4.058212952688336e-06, + "stddev": 2.664279038707121e-07, + "rounds": 50000, + "median": 3.9874983485788105e-06, + "iqr": 9.170034900307621e-08, + "q1": 3.975001163780689e-06, + "q3": 4.0667015127837655e-06, + "iqr_outliers": 4861, + "stddev_outliers": 2098, + "outliers": "2098;4861", + "ld15iqr": 3.875000402331352e-06, + "hd15iqr": 4.208297468721867e-06, + "ops": 246413.88011379747, + "total": 0.20291064763441605, + "iterations": 10 + } + }, + { + "group": null, + "name": "test_benchmark_enum_encode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 1.9433395937085152e-05, + "max": 0.0001467082998715341, + "mean": 2.0089991154964083e-05, + "stddev": 9.781553270547485e-07, + "rounds": 50000, + "median": 1.996250357478857e-05, + "iqr": 1.6250414773822102e-07, + "q1": 1.991249737329781e-05, + "q3": 2.007500152103603e-05, + "iqr_outliers": 5036, + "stddev_outliers": 1594, + "outliers": "1594;5036", + "ld15iqr": 1.967080170288682e-05, + "hd15iqr": 2.0320795010775326e-05, + "ops": 49776.02987908308, + "total": 1.0044995577481972, + "iterations": 10 + } + } + ], + "datetime": "2024-11-20T02:52:44.676031+00:00", + "version": "5.1.0" +} \ No newline at end of file From 572da3be0971bfcdceeeea20737fab6c046cca74 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 04:13:42 +0100 Subject: [PATCH 07/13] test: add benchmark groups --- ...accfe3df383ca51101743_20241120_025231.json | 113 ------------------ .../indexed_enums/tests/test_enum.py | 2 + 2 files changed, 2 insertions(+), 113 deletions(-) delete mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json b/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json deleted file mode 100644 index 63be1d537..000000000 --- a/.benchmarks/Darwin-CPython-3.11-64bit/0002_82f0055fc648633f86aaccfe3df383ca51101743_20241120_025231.json +++ /dev/null @@ -1,113 +0,0 @@ -{ - "machine_info": { - "node": "diotimac2.local", - "processor": "arm", - "machine": "arm64", - "python_compiler": "Clang 18.1.8 ", - "python_implementation": "CPython", - "python_implementation_version": "3.11.10", - "python_version": "3.11.10", - "python_build": [ - "main", - "Oct 1 2024 00:26:49" - ], - "release": "23.4.0", - "system": "Darwin", - "cpu": { - "python_version": "3.11.10.final.0 (64 bit)", - "cpuinfo_version": [ - 9, - 0, - 0 - ], - "cpuinfo_version_string": "9.0.0", - "arch": "ARM_8", - "bits": 64, - "count": 8, - "arch_string_raw": "arm64", - "brand_raw": "Apple M1" - } - }, - "commit_info": { - "id": "82f0055fc648633f86aaccfe3df383ca51101743", - "time": "2024-11-20T03:44:19+01:00", - "author_time": "2024-11-20T03:44:19+01:00", - "dirty": false, - "project": "openfisca-core", - "branch": "perf/add-benchmark-to-perf-test" - }, - "benchmarks": [ - { - "group": null, - "name": "test_benchmark_enum_eq", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 3.875000402331352e-06, - "max": 1.7379102064296604e-05, - "mean": 4.058212952688336e-06, - "stddev": 2.664279038707121e-07, - "rounds": 50000, - "median": 3.9874983485788105e-06, - "iqr": 9.170034900307621e-08, - "q1": 3.975001163780689e-06, - "q3": 4.0667015127837655e-06, - "iqr_outliers": 4861, - "stddev_outliers": 2098, - "outliers": "2098;4861", - "ld15iqr": 3.875000402331352e-06, - "hd15iqr": 4.208297468721867e-06, - "ops": 246413.88011379747, - "total": 0.20291064763441605, - "iterations": 10 - } - }, - { - "group": null, - "name": "test_benchmark_enum_encode", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 1.9433395937085152e-05, - "max": 0.0001467082998715341, - "mean": 2.0089991154964083e-05, - "stddev": 9.781553270547485e-07, - "rounds": 50000, - "median": 1.996250357478857e-05, - "iqr": 1.6250414773822102e-07, - "q1": 1.991249737329781e-05, - "q3": 2.007500152103603e-05, - "iqr_outliers": 5036, - "stddev_outliers": 1594, - "outliers": "1594;5036", - "ld15iqr": 1.967080170288682e-05, - "hd15iqr": 2.0320795010775326e-05, - "ops": 49776.02987908308, - "total": 1.0044995577481972, - "iterations": 10 - } - } - ], - "datetime": "2024-11-20T02:52:44.676031+00:00", - "version": "5.1.0" -} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 7966269e1..03614203a 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -138,6 +138,7 @@ def test_enum_encode_with_any_sequence(): # Benchmarking +@pytest.mark.benchmark(group="Enum.__eq__") def test_benchmark_enum_eq(benchmark): """Benchmark the `__eq__` method.""" @@ -149,6 +150,7 @@ def test(): benchmark.pedantic(test, iterations=10, rounds=50000) +@pytest.mark.benchmark(group="Enum.encode") def test_benchmark_enum_encode(benchmark): """Benchmark the `Enum.encode` method.""" array = numpy.array([Animal.DOG, Animal.CAT, Animal.DOG]) From 196145a9b9890a3ab1482ca1706b577a2559a527 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 05:45:18 +0100 Subject: [PATCH 08/13] perf: improve Enum.__eq__ --- openfisca_core/indexed_enums/_enum_type.py | 9 +++++++++ openfisca_core/indexed_enums/enum.py | 21 +-------------------- openfisca_core/indexed_enums/enum_array.py | 10 ++-------- 3 files changed, 12 insertions(+), 28 deletions(-) diff --git a/openfisca_core/indexed_enums/_enum_type.py b/openfisca_core/indexed_enums/_enum_type.py index 8083a6d49..9b95364a2 100644 --- a/openfisca_core/indexed_enums/_enum_type.py +++ b/openfisca_core/indexed_enums/_enum_type.py @@ -66,5 +66,14 @@ def __new__( def __dir__(cls) -> list[str]: return sorted({"indices", "names", "enums", *super().__dir__()}) + def __hash__(cls) -> int: + return object.__hash__(cls.__name__) + + def __eq__(cls, other: object) -> bool: + return hash(cls) == hash(other) + + def __ne__(cls, other: object) -> bool: + return hash(cls) != hash(other) + __all__ = ["EnumType"] diff --git a/openfisca_core/indexed_enums/enum.py b/openfisca_core/indexed_enums/enum.py index d116a56ba..43a893e85 100644 --- a/openfisca_core/indexed_enums/enum.py +++ b/openfisca_core/indexed_enums/enum.py @@ -120,25 +120,6 @@ def __init__(self, *__args: object, **__kwargs: object) -> None: def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" - def __hash__(self) -> int: - return object.__hash__(self.__class__.__name__ + self.name) - - def __eq__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index == other.index - return NotImplemented - - def __ne__(self, other: object) -> bool: - if ( - isinstance(other, Enum) - and self.__class__.__name__ == other.__class__.__name__ - ): - return self.index != other.index - return NotImplemented - @classmethod def encode(cls, array: t.VarArray | t.ArrayLike[object]) -> t.EnumArray: """Encode an encodable array into an :class:`.EnumArray`. @@ -211,7 +192,7 @@ def _encode_array(cls, value: t.VarArray) -> t.EnumArray: indices = _int_to_index(cls, value) elif _is_str_array(value): # type: ignore[unreachable] indices = _str_to_index(cls, value) - elif _is_enum_array(value) and cls.__name__ is value[0].__class__.__name__: + elif _is_enum_array(value) and cls == value[0].__class__: indices = _enum_to_index(value) else: raise EnumEncodingError(cls, value) diff --git a/openfisca_core/indexed_enums/enum_array.py b/openfisca_core/indexed_enums/enum_array.py index 98f9b4c6a..65bc209a7 100644 --- a/openfisca_core/indexed_enums/enum_array.py +++ b/openfisca_core/indexed_enums/enum_array.py @@ -153,10 +153,7 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] return NotImplemented if other is None: return NotImplemented - if ( - isinstance(other, type(t.Enum)) - and other.__name__ is self.possible_values.__name__ - ): + if isinstance(other, type(t.Enum)) and other == self.possible_values: result = ( self.view(numpy.ndarray) == self.possible_values.indices[ @@ -164,10 +161,7 @@ def __eq__(self, other: object) -> t.BoolArray: # type: ignore[override] ] ) return result - if ( - isinstance(other, t.Enum) - and other.__class__.__name__ is self.possible_values.__name__ - ): + if isinstance(other, t.Enum) and other.__class__ == self.possible_values: result = self.view(numpy.ndarray) == other.index return result # For NumPy >=1.26.x. From e8b0acf18413d142a4778eb63f40c518f32b94bc Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 10:27:23 +0100 Subject: [PATCH 09/13] test: add perf tests to EnumArray --- ...70_20241120_093422_uncommited-changes.json | 218 ++++++++++++++++++ ...9622680b51da6758aa1a3_20241120_024209.json | 113 --------- .../indexed_enums/tests/test_enum.py | 16 +- .../indexed_enums/tests/test_enum_array.py | 42 ++++ 4 files changed, 268 insertions(+), 121 deletions(-) create mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json delete mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json new file mode 100644 index 000000000..7af71c464 --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json @@ -0,0 +1,218 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "3f253dbc281cabc8fc3a234037c880da28954a70", + "time": "2024-11-20T10:27:33+01:00", + "author_time": "2024-11-20T10:27:23+01:00", + "dirty": true, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": "Enum.__eq__", + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 6.088025000099151e-06, + "max": 6.279845799872419e-06, + "mean": 6.152403132000472e-06, + "stddev": 3.371609720219699e-08, + "rounds": 100, + "median": 6.1525833501036685e-06, + "iqr": 4.103329993085945e-08, + "q1": 6.129739600055473e-06, + "q3": 6.170772899986332e-06, + "iqr_outliers": 3, + "stddev_outliers": 29, + "outliers": "29;3", + "ld15iqr": 6.088025000099151e-06, + "hd15iqr": 6.234904200027813e-06, + "ops": 162538.11373294174, + "total": 0.0006152403132000474, + "iterations": 10000 + } + }, + { + "group": "Enum.encode", + "name": "test_benchmark_enum_encode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0011622333000559592, + "max": 0.001228883299882, + "mean": 0.0011774051709980994, + "stddev": 1.044335238367826e-05, + "rounds": 100, + "median": 0.0011762395999539877, + "iqr": 9.822950050874819e-06, + "q1": 0.001170379149971268, + "q3": 0.001180202100022143, + "iqr_outliers": 6, + "stddev_outliers": 20, + "outliers": "20;6", + "ld15iqr": 0.0011622333000559592, + "hd15iqr": 0.0011961624999457855, + "ops": 849.3252999324686, + "total": 0.11774051709980991, + "iterations": 10 + } + }, + { + "group": "EnumArray.__eq__", + "name": "test_benchmark_enum_array_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 8.837080004013843e-06, + "max": 9.454999999434221e-06, + "mean": 8.931775500968797e-06, + "stddev": 9.783811489338906e-08, + "rounds": 100, + "median": 8.893335007087444e-06, + "iqr": 1.1583500054257429e-07, + "q1": 8.863125003699679e-06, + "q3": 8.978960004242253e-06, + "iqr_outliers": 3, + "stddev_outliers": 10, + "outliers": "10;3", + "ld15iqr": 8.837080004013843e-06, + "hd15iqr": 9.189590000460158e-06, + "ops": 111959.82253377662, + "total": 0.0008931775500968795, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode", + "name": "test_benchmark_enum_array_decode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0007508641700042062, + "max": 0.0013628329199855216, + "mean": 0.0007727886379994743, + "stddev": 7.314420787878281e-05, + "rounds": 100, + "median": 0.0007575662499948522, + "iqr": 1.1167909988216802e-05, + "q1": 0.0007547029200031829, + "q3": 0.0007658708299913997, + "iqr_outliers": 9, + "stddev_outliers": 2, + "outliers": "2;9", + "ld15iqr": 0.0007508641700042062, + "hd15iqr": 0.0007828712500122493, + "ops": 1294.0148843139184, + "total": 0.0772788637999474, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode_to_str", + "name": "test_benchmark_enum_array_decode_to_str", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode_to_str", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008599420899918186, + "max": 0.000904484170005162, + "mean": 0.000868757312199159, + "stddev": 8.210009405774617e-06, + "rounds": 100, + "median": 0.000866150829997423, + "iqr": 4.8420749953947875e-06, + "q1": 0.0008642458350004745, + "q3": 0.0008690879099958693, + "iqr_outliers": 12, + "stddev_outliers": 13, + "outliers": "13;12", + "ld15iqr": 0.0008599420899918186, + "hd15iqr": 0.0008774329200059583, + "ops": 1151.069448231308, + "total": 0.0868757312199159, + "iterations": 100 + } + } + ], + "datetime": "2024-11-20T09:34:47.304646+00:00", + "version": "5.1.0" +} \ No newline at end of file diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json deleted file mode 100644 index 22a52e83e..000000000 --- a/.benchmarks/Darwin-CPython-3.11-64bit/0001_8e9da042ae83a00d3d49622680b51da6758aa1a3_20241120_024209.json +++ /dev/null @@ -1,113 +0,0 @@ -{ - "machine_info": { - "node": "diotimac2.local", - "processor": "arm", - "machine": "arm64", - "python_compiler": "Clang 18.1.8 ", - "python_implementation": "CPython", - "python_implementation_version": "3.11.10", - "python_version": "3.11.10", - "python_build": [ - "main", - "Oct 1 2024 00:26:49" - ], - "release": "23.4.0", - "system": "Darwin", - "cpu": { - "python_version": "3.11.10.final.0 (64 bit)", - "cpuinfo_version": [ - 9, - 0, - 0 - ], - "cpuinfo_version_string": "9.0.0", - "arch": "ARM_8", - "bits": 64, - "count": 8, - "arch_string_raw": "arm64", - "brand_raw": "Apple M1" - } - }, - "commit_info": { - "id": "8e9da042ae83a00d3d49622680b51da6758aa1a3", - "time": "2024-11-20T03:41:52+01:00", - "author_time": "2024-11-20T03:41:25+01:00", - "dirty": false, - "project": "openfisca-core", - "branch": "perf/add-benchmark-to-perf-test" - }, - "benchmarks": [ - { - "group": null, - "name": "test_benchmark_enum_eq", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 1.4624965842813253e-06, - "max": 5.641701864078641e-06, - "mean": 1.530727793695405e-06, - "stddev": 1.3133383248007482e-07, - "rounds": 50000, - "median": 1.5124969650059938e-06, - "iqr": 1.6595004126429473e-08, - "q1": 1.5042023733258247e-06, - "q3": 1.5207973774522542e-06, - "iqr_outliers": 4656, - "stddev_outliers": 956, - "outliers": "956;4656", - "ld15iqr": 1.4833000022917987e-06, - "hd15iqr": 1.5457975678145885e-06, - "ops": 653284.0156941818, - "total": 0.07653638968476802, - "iterations": 10 - } - }, - { - "group": null, - "name": "test_benchmark_enum_encode", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 3.5170797491446135e-05, - "max": 0.0013176416046917438, - "mean": 3.781159137981013e-05, - "stddev": 1.4478194055223582e-05, - "rounds": 50000, - "median": 3.6212499253451824e-05, - "iqr": 7.58402165956802e-07, - "q1": 3.602079814299941e-05, - "q3": 3.677920030895621e-05, - "iqr_outliers": 4857, - "stddev_outliers": 870, - "outliers": "870;4857", - "ld15iqr": 3.5170797491446135e-05, - "hd15iqr": 3.792079514823854e-05, - "ops": 26446.916501217667, - "total": 1.890579568990506, - "iterations": 10 - } - } - ], - "datetime": "2024-11-20T02:42:30.223904+00:00", - "version": "5.1.0" -} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 03614203a..6d355f06c 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -141,22 +141,22 @@ def test_enum_encode_with_any_sequence(): @pytest.mark.benchmark(group="Enum.__eq__") def test_benchmark_enum_eq(benchmark): """Benchmark the `__eq__` method.""" + array = numpy.random.choice([*list(Animal), *list(Colour)], size=50000) def test(): - for animal in Animal: - for colour in Colour: - assert animal != colour + animal_1, animal_2 = numpy.random.choice(array, size=2) + animal_1 == animal_2 + animal_1 != animal_2 - benchmark.pedantic(test, iterations=10, rounds=50000) + benchmark.pedantic(test, iterations=10000, rounds=100) @pytest.mark.benchmark(group="Enum.encode") def test_benchmark_enum_encode(benchmark): """Benchmark the `Enum.encode` method.""" - array = numpy.array([Animal.DOG, Animal.CAT, Animal.DOG]) + array = numpy.random.choice(["INCARNADINE", "TURQUOISE", "AMARANTH"], size=50000) def test(): - enum_array = Animal.encode(array) - assert_array_equal(enum_array, numpy.array([1, 0, 1])) + Colour.encode(array) - benchmark.pedantic(test, iterations=10, rounds=50000) + benchmark.pedantic(test, iterations=10, rounds=100) diff --git a/openfisca_core/indexed_enums/tests/test_enum_array.py b/openfisca_core/indexed_enums/tests/test_enum_array.py index 1ab247468..854c65ddf 100644 --- a/openfisca_core/indexed_enums/tests/test_enum_array.py +++ b/openfisca_core/indexed_enums/tests/test_enum_array.py @@ -28,3 +28,45 @@ def test_enum_array_any_other_operation(enum_array): """Only equality and non-equality operations are permitted.""" with pytest.raises(TypeError, match="Forbidden operation."): enum_array * 1 + + +# Benchmarking + + +@pytest.mark.benchmark(group="EnumArray.__eq__") +def test_benchmark_enum_array_eq(benchmark): + """Benchmark the `EnumArray.__eq__` method.""" + array_1 = numpy.random.choice(list(Fruit), size=50000) + array_2 = numpy.random.choice(list(Fruit), size=50000) + enum_array_1 = Fruit.encode(array_1) + enum_array_2 = Fruit.encode(array_2) + + def test(): + enum_array_1 == enum_array_2 + enum_array_1 != enum_array_2 + + benchmark.pedantic(test, iterations=100, rounds=100) + + +@pytest.mark.benchmark(group="EnumArray.decode") +def test_benchmark_enum_array_decode(benchmark): + """Benchmark the `EnumArray.decode` method.""" + array = numpy.random.choice(list(Fruit), size=50000) + enum_array = Fruit.encode(array) + + def test(): + enum_array.decode() + + benchmark.pedantic(test, iterations=100, rounds=100) + + +@pytest.mark.benchmark(group="EnumArray.decode_to_str") +def test_benchmark_enum_array_decode_to_str(benchmark): + """Benchmark the `EnumArray.decode_to_str` method.""" + array = numpy.random.choice(list(Fruit), size=50000) + enum_array = Fruit.encode(array) + + def test(): + enum_array.decode_to_str() + + benchmark.pedantic(test, iterations=100, rounds=100) From 0ef333faea386d088f386469ccf8e5bc94b67d94 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 11:28:46 +0100 Subject: [PATCH 10/13] test: add missing perf tests --- ...70_20241120_093422_uncommited-changes.json | 218 ------------- ...bc_20241120_094745_uncommited-changes.json | 288 ++++++++++++++++++ .../indexed_enums/tests/test_enum.py | 26 +- 3 files changed, 312 insertions(+), 220 deletions(-) delete mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json create mode 100644 .benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json deleted file mode 100644 index 7af71c464..000000000 --- a/.benchmarks/Darwin-CPython-3.11-64bit/0001_3f253dbc281cabc8fc3a234037c880da28954a70_20241120_093422_uncommited-changes.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "machine_info": { - "node": "diotimac2.local", - "processor": "arm", - "machine": "arm64", - "python_compiler": "Clang 18.1.8 ", - "python_implementation": "CPython", - "python_implementation_version": "3.11.10", - "python_version": "3.11.10", - "python_build": [ - "main", - "Oct 1 2024 00:26:49" - ], - "release": "23.4.0", - "system": "Darwin", - "cpu": { - "python_version": "3.11.10.final.0 (64 bit)", - "cpuinfo_version": [ - 9, - 0, - 0 - ], - "cpuinfo_version_string": "9.0.0", - "arch": "ARM_8", - "bits": 64, - "count": 8, - "arch_string_raw": "arm64", - "brand_raw": "Apple M1" - } - }, - "commit_info": { - "id": "3f253dbc281cabc8fc3a234037c880da28954a70", - "time": "2024-11-20T10:27:33+01:00", - "author_time": "2024-11-20T10:27:23+01:00", - "dirty": true, - "project": "openfisca-core", - "branch": "perf/add-benchmark-to-perf-test" - }, - "benchmarks": [ - { - "group": "Enum.__eq__", - "name": "test_benchmark_enum_eq", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 6.088025000099151e-06, - "max": 6.279845799872419e-06, - "mean": 6.152403132000472e-06, - "stddev": 3.371609720219699e-08, - "rounds": 100, - "median": 6.1525833501036685e-06, - "iqr": 4.103329993085945e-08, - "q1": 6.129739600055473e-06, - "q3": 6.170772899986332e-06, - "iqr_outliers": 3, - "stddev_outliers": 29, - "outliers": "29;3", - "ld15iqr": 6.088025000099151e-06, - "hd15iqr": 6.234904200027813e-06, - "ops": 162538.11373294174, - "total": 0.0006152403132000474, - "iterations": 10000 - } - }, - { - "group": "Enum.encode", - "name": "test_benchmark_enum_encode", - "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 0.0011622333000559592, - "max": 0.001228883299882, - "mean": 0.0011774051709980994, - "stddev": 1.044335238367826e-05, - "rounds": 100, - "median": 0.0011762395999539877, - "iqr": 9.822950050874819e-06, - "q1": 0.001170379149971268, - "q3": 0.001180202100022143, - "iqr_outliers": 6, - "stddev_outliers": 20, - "outliers": "20;6", - "ld15iqr": 0.0011622333000559592, - "hd15iqr": 0.0011961624999457855, - "ops": 849.3252999324686, - "total": 0.11774051709980991, - "iterations": 10 - } - }, - { - "group": "EnumArray.__eq__", - "name": "test_benchmark_enum_array_eq", - "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_eq", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 8.837080004013843e-06, - "max": 9.454999999434221e-06, - "mean": 8.931775500968797e-06, - "stddev": 9.783811489338906e-08, - "rounds": 100, - "median": 8.893335007087444e-06, - "iqr": 1.1583500054257429e-07, - "q1": 8.863125003699679e-06, - "q3": 8.978960004242253e-06, - "iqr_outliers": 3, - "stddev_outliers": 10, - "outliers": "10;3", - "ld15iqr": 8.837080004013843e-06, - "hd15iqr": 9.189590000460158e-06, - "ops": 111959.82253377662, - "total": 0.0008931775500968795, - "iterations": 100 - } - }, - { - "group": "EnumArray.decode", - "name": "test_benchmark_enum_array_decode", - "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 0.0007508641700042062, - "max": 0.0013628329199855216, - "mean": 0.0007727886379994743, - "stddev": 7.314420787878281e-05, - "rounds": 100, - "median": 0.0007575662499948522, - "iqr": 1.1167909988216802e-05, - "q1": 0.0007547029200031829, - "q3": 0.0007658708299913997, - "iqr_outliers": 9, - "stddev_outliers": 2, - "outliers": "2;9", - "ld15iqr": 0.0007508641700042062, - "hd15iqr": 0.0007828712500122493, - "ops": 1294.0148843139184, - "total": 0.0772788637999474, - "iterations": 100 - } - }, - { - "group": "EnumArray.decode_to_str", - "name": "test_benchmark_enum_array_decode_to_str", - "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode_to_str", - "params": null, - "param": null, - "extra_info": {}, - "options": { - "disable_gc": false, - "timer": "perf_counter", - "min_rounds": 5, - "max_time": 1.0, - "min_time": 5e-06, - "warmup": false - }, - "stats": { - "min": 0.0008599420899918186, - "max": 0.000904484170005162, - "mean": 0.000868757312199159, - "stddev": 8.210009405774617e-06, - "rounds": 100, - "median": 0.000866150829997423, - "iqr": 4.8420749953947875e-06, - "q1": 0.0008642458350004745, - "q3": 0.0008690879099958693, - "iqr_outliers": 12, - "stddev_outliers": 13, - "outliers": "13;12", - "ld15iqr": 0.0008599420899918186, - "hd15iqr": 0.0008774329200059583, - "ops": 1151.069448231308, - "total": 0.0868757312199159, - "iterations": 100 - } - } - ], - "datetime": "2024-11-20T09:34:47.304646+00:00", - "version": "5.1.0" -} \ No newline at end of file diff --git a/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json b/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json new file mode 100644 index 000000000..d96f71f7c --- /dev/null +++ b/.benchmarks/Darwin-CPython-3.11-64bit/0001_e8b0acf18413d142a4778eb63f40c518f32b94bc_20241120_094745_uncommited-changes.json @@ -0,0 +1,288 @@ +{ + "machine_info": { + "node": "diotimac2.local", + "processor": "arm", + "machine": "arm64", + "python_compiler": "Clang 18.1.8 ", + "python_implementation": "CPython", + "python_implementation_version": "3.11.10", + "python_version": "3.11.10", + "python_build": [ + "main", + "Oct 1 2024 00:26:49" + ], + "release": "23.4.0", + "system": "Darwin", + "cpu": { + "python_version": "3.11.10.final.0 (64 bit)", + "cpuinfo_version": [ + 9, + 0, + 0 + ], + "cpuinfo_version_string": "9.0.0", + "arch": "ARM_8", + "bits": 64, + "count": 8, + "arch_string_raw": "arm64", + "brand_raw": "Apple M1" + } + }, + "commit_info": { + "id": "e8b0acf18413d142a4778eb63f40c518f32b94bc", + "time": "2024-11-20T10:36:21+01:00", + "author_time": "2024-11-20T10:27:23+01:00", + "dirty": true, + "project": "openfisca-core", + "branch": "perf/add-benchmark-to-perf-test" + }, + "benchmarks": [ + { + "group": "Enum.__eq__", + "name": "test_benchmark_enum_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 6.116424999891024e-06, + "max": 6.239233299857005e-06, + "mean": 6.1712051209888155e-06, + "stddev": 2.3556546098919103e-08, + "rounds": 100, + "median": 6.170224949983095e-06, + "iqr": 2.8364549962134198e-08, + "q1": 6.158277100075793e-06, + "q3": 6.186641650037927e-06, + "iqr_outliers": 2, + "stddev_outliers": 30, + "outliers": "30;2", + "ld15iqr": 6.116424999891024e-06, + "hd15iqr": 6.2380624998695565e-06, + "ops": 162042.90416452225, + "total": 0.0006171205120988818, + "iterations": 10000 + } + }, + { + "group": "Enum.encode (int)", + "name": "test_benchmark_enum_encode_int", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_int", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 6.000000212225132e-07, + "max": 4.333400102041196e-06, + "mean": 6.603360034205253e-07, + "stddev": 3.7118752804099704e-07, + "rounds": 100, + "median": 6.209000275703147e-07, + "iqr": 1.2449982023099353e-08, + "q1": 6.166999810375274e-07, + "q3": 6.291499630606267e-07, + "iqr_outliers": 3, + "stddev_outliers": 1, + "outliers": "1;3", + "ld15iqr": 6.000000212225132e-07, + "hd15iqr": 6.584001312148757e-07, + "ops": 1514380.5499321925, + "total": 6.603360034205245e-05, + "iterations": 10 + } + }, + { + "group": "Enum.encode (str)", + "name": "test_benchmark_enum_encode_str", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_str", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0011514084000737058, + "max": 0.005457095800011302, + "mean": 0.0013338218729913934, + "stddev": 0.0005570327348628046, + "rounds": 100, + "median": 0.0011803895499724604, + "iqr": 3.827289992841543e-05, + "q1": 0.0011704437500156927, + "q3": 0.0012087166499441082, + "iqr_outliers": 16, + "stddev_outliers": 6, + "outliers": "6;16", + "ld15iqr": 0.0011514084000737058, + "hd15iqr": 0.0013195624998843414, + "ops": 749.7252970947887, + "total": 0.13338218729913934, + "iterations": 10 + } + }, + { + "group": "Enum.encode (Enum)", + "name": "test_benchmark_enum_encode_enum", + "fullname": "openfisca_core/indexed_enums/tests/test_enum.py::test_benchmark_enum_encode_enum", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0019466166999336566, + "max": 0.0024945540999397053, + "mean": 0.0020038005400019755, + "stddev": 8.117530790455684e-05, + "rounds": 100, + "median": 0.001972604149977997, + "iqr": 5.918540000493565e-05, + "q1": 0.0019553521000489123, + "q3": 0.002014537500053848, + "iqr_outliers": 11, + "stddev_outliers": 15, + "outliers": "15;11", + "ld15iqr": 0.0019466166999336566, + "hd15iqr": 0.002103391700075008, + "ops": 499.05166708808946, + "total": 0.2003800540001976, + "iterations": 10 + } + }, + { + "group": "EnumArray.__eq__", + "name": "test_benchmark_enum_array_eq", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_eq", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 8.86542000444024e-06, + "max": 9.384160002809948e-06, + "mean": 8.935446600662544e-06, + "stddev": 1.0087850596955905e-07, + "rounds": 100, + "median": 8.896874996935366e-06, + "iqr": 4.874999831372528e-08, + "q1": 8.881044996087438e-06, + "q3": 8.929794994401163e-06, + "iqr_outliers": 15, + "stddev_outliers": 10, + "outliers": "10;15", + "ld15iqr": 8.86542000444024e-06, + "hd15iqr": 9.002920014609117e-06, + "ops": 111913.82419831734, + "total": 0.0008935446600662543, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode", + "name": "test_benchmark_enum_array_decode", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0007523354099976131, + "max": 0.0015013620799982164, + "mean": 0.0007657737745999838, + "stddev": 7.457900895775381e-05, + "rounds": 100, + "median": 0.0007576514599986695, + "iqr": 4.303959995013377e-06, + "q1": 0.0007556031250078377, + "q3": 0.0007599070850028511, + "iqr_outliers": 3, + "stddev_outliers": 1, + "outliers": "1;3", + "ld15iqr": 0.0007523354099976131, + "hd15iqr": 0.000770621250012482, + "ops": 1305.8686953890117, + "total": 0.07657737745999839, + "iterations": 100 + } + }, + { + "group": "EnumArray.decode_to_str", + "name": "test_benchmark_enum_array_decode_to_str", + "fullname": "openfisca_core/indexed_enums/tests/test_enum_array.py::test_benchmark_enum_array_decode_to_str", + "params": null, + "param": null, + "extra_info": {}, + "options": { + "disable_gc": false, + "timer": "perf_counter", + "min_rounds": 5, + "max_time": 1.0, + "min_time": 5e-06, + "warmup": false + }, + "stats": { + "min": 0.0008616308300042875, + "max": 0.0009163904200067918, + "mean": 0.0008716657909002606, + "stddev": 9.943622783295886e-06, + "rounds": 100, + "median": 0.0008683410449975782, + "iqr": 6.7525050053518325e-06, + "q1": 0.00086608999499731, + "q3": 0.0008728425000026619, + "iqr_outliers": 11, + "stddev_outliers": 12, + "outliers": "12;11", + "ld15iqr": 0.0008616308300042875, + "hd15iqr": 0.0008845904199915822, + "ops": 1147.2286860852892, + "total": 0.08716657909002602, + "iterations": 100 + } + } + ], + "datetime": "2024-11-20T09:48:12.053939+00:00", + "version": "5.1.0" +} \ No newline at end of file diff --git a/openfisca_core/indexed_enums/tests/test_enum.py b/openfisca_core/indexed_enums/tests/test_enum.py index 6d355f06c..c6708a05c 100644 --- a/openfisca_core/indexed_enums/tests/test_enum.py +++ b/openfisca_core/indexed_enums/tests/test_enum.py @@ -151,8 +151,19 @@ def test(): benchmark.pedantic(test, iterations=10000, rounds=100) -@pytest.mark.benchmark(group="Enum.encode") -def test_benchmark_enum_encode(benchmark): +@pytest.mark.benchmark(group="Enum.encode (int)") +def test_benchmark_enum_encode_int(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.random.choice([0, 1, 2], size=50000) + + def test(): + Colour.encode(array) + + benchmark.pedantic(test, iterations=10, rounds=100) + + +@pytest.mark.benchmark(group="Enum.encode (str)") +def test_benchmark_enum_encode_str(benchmark): """Benchmark the `Enum.encode` method.""" array = numpy.random.choice(["INCARNADINE", "TURQUOISE", "AMARANTH"], size=50000) @@ -160,3 +171,14 @@ def test(): Colour.encode(array) benchmark.pedantic(test, iterations=10, rounds=100) + + +@pytest.mark.benchmark(group="Enum.encode (Enum)") +def test_benchmark_enum_encode_enum(benchmark): + """Benchmark the `Enum.encode` method.""" + array = numpy.random.choice(list(Colour), size=50000) + + def test(): + Colour.encode(array) + + benchmark.pedantic(test, iterations=10, rounds=100) From 84668d3d36d7a372ffcac4a3728fa6f47e6c4a07 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 11:29:16 +0100 Subject: [PATCH 11/13] perf: improve Enum.encode(str) by 15x --- openfisca_core/indexed_enums/_utils.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index aa676b92f..f5af758bb 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -155,14 +155,13 @@ def _str_to_index( ... ) >>> _str_to_index(Road, "AVENUE") - array([], dtype=uint8) + array([1], dtype=uint8) >>> _str_to_index(Road, ["AVENUE"]) array([1], dtype=uint8) >>> _str_to_index(Road, numpy.array("AVENUE")) - Traceback (most recent call last): - TypeError: iteration over a 0-d array + array([1], dtype=uint8) >>> _str_to_index(Road, numpy.array(["AVENUE"])) array([1], dtype=uint8) @@ -174,14 +173,12 @@ def _str_to_index( array([1, 1], dtype=uint8) """ - return numpy.array( - [ - enum_class.__members__[name].index - for name in value - if name in enum_class._member_names_ - ], - t.EnumDType, - ) + values = numpy.array(value, copy=False) + names = enum_class.names + mask = numpy.isin(values, enum_class.names) + sorter = numpy.argsort(names) + result = sorter[numpy.searchsorted(names, values[mask], sorter=sorter)] + return result.astype(t.EnumDType) __all__ = ["_enum_to_index", "_int_to_index", "_str_to_index"] From d95a1cefb7725771cfcbbe97c9ea27af46955e54 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Wed, 20 Nov 2024 13:24:15 +0100 Subject: [PATCH 12/13] perf: improve Enum.encode(int) by 339x --- openfisca_core/indexed_enums/_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index f5af758bb..8025b4ed6 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -118,9 +118,9 @@ def _int_to_index( array([1, 1], dtype=uint8) """ - return numpy.array( - [index for index in value if index < len(enum_class.__members__)], t.EnumDType - ) + indices = enum_class.indices + values = numpy.array(value, copy=False) + return values[values < indices.size].astype(t.EnumDType) def _str_to_index( @@ -175,7 +175,7 @@ def _str_to_index( """ values = numpy.array(value, copy=False) names = enum_class.names - mask = numpy.isin(values, enum_class.names) + mask = numpy.isin(values, names) sorter = numpy.argsort(names) result = sorter[numpy.searchsorted(names, values[mask], sorter=sorter)] return result.astype(t.EnumDType) From 84b1ba4c82f7cdbc624f2e6966d45606635a794f Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Thu, 21 Nov 2024 07:24:49 +0100 Subject: [PATCH 13/13] test: fix failing test --- openfisca_core/indexed_enums/_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openfisca_core/indexed_enums/_utils.py b/openfisca_core/indexed_enums/_utils.py index 8025b4ed6..67c9e741b 100644 --- a/openfisca_core/indexed_enums/_utils.py +++ b/openfisca_core/indexed_enums/_utils.py @@ -92,8 +92,7 @@ def _int_to_index( ... ) >>> _int_to_index(Road, 1) - Traceback (most recent call last): - TypeError: 'int' object is not iterable + array([1], dtype=uint8) >>> _int_to_index(Road, [1]) array([1], dtype=uint8) @@ -105,8 +104,7 @@ def _int_to_index( array([1], dtype=uint8) >>> _int_to_index(Road, numpy.array(1)) - Traceback (most recent call last): - TypeError: iteration over a 0-d array + array([1], dtype=uint8) >>> _int_to_index(Road, numpy.array([1])) array([1], dtype=uint8)