Skip to content
27 changes: 15 additions & 12 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def __repr__(self):
# The name of an attribute on the class where we store the Field
# objects. Also used to check if a class is a Data Class.
_FIELDS = '__dataclass_fields__'
# The name of an attribute on the class where we store the field names.
_FIELD_NAMES = '__dataclass_field_names__'

# The name of an attribute on the class that stores the parameters to
# @dataclass.
Expand Down Expand Up @@ -1052,6 +1054,9 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
# Remember all of the fields on our class (including bases). This
# also marks this class as being a dataclass.
setattr(cls, _FIELDS, fields)
# Store field names. Excludes pseudo-fields.
setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want, you can also do cls.__dataclass_field_names__ = ... directly (I would also advise you to consider this as part of the "inline" commit, so you can also force-push this one).

An other alternative is to only inline getattr(x, _FIELDS_NAMES) instead of having a standalone function for that. It could also improve readability a bit and justify the needs of the global _FIELD_NAMES.

IOW, choose among the following:

  • Hardcode __dataclass_field_names__ everywhere, without having using getattr/setattr and _FIELD_NAMES.
  • Use _FIELD_NAMES + getattr/setattr(..., _FIELD_NAMES) directly.
  • Use _FIELD_NAMES + a module-wide function defined to be the partialization _get_names(x) ~ getattr(x, _FIELD_NAMES).

My preference is (1) or (2) but (3) is an overkill IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, going for option (1)

if f._field_type is _FIELD))

# Was this class defined with an explicit __hash__? Note that if
# __eq__ is defined in this class, then python will automatically
Expand Down Expand Up @@ -1196,13 +1201,13 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
# the code instead of iterating over fields. But that can be a project for
# another day, if performance becomes an issue.
def _dataclass_getstate(self):
return [getattr(self, f.name) for f in fields(self)]
return [getattr(self, name) for name in self.__dataclass_field_names__]


def _dataclass_setstate(self, state):
for field, value in zip(fields(self), state):
for field_name, value in zip(self.__dataclass_field_names__, state):
# use setattr because dataclass may be frozen
object.__setattr__(self, field.name, value)
object.__setattr__(self, field_name, value)


def _get_slots(cls):
Expand Down Expand Up @@ -1285,7 +1290,7 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):

# Create a new dict for our new class.
cls_dict = dict(cls.__dict__)
field_names = tuple(f.name for f in fields(cls))
field_names = cls.__dataclass_field_names__
# Make sure slots don't overlap with those in base classes.
inherited_slots = set(
itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1]))
Expand Down Expand Up @@ -1377,8 +1382,6 @@ def fields(class_or_instance):
Accepts a dataclass or an instance of one. Tuple elements are of
type Field.
"""

# Might it be worth caching this, per class?
try:
fields = getattr(class_or_instance, _FIELDS)
except AttributeError:
Expand Down Expand Up @@ -1433,13 +1436,13 @@ def _asdict_inner(obj, dict_factory):
# dataclass instance: fast path for the common case
if dict_factory is dict:
return {
f.name: _asdict_inner(getattr(obj, f.name), dict)
for f in fields(obj)
name: _asdict_inner(getattr(obj, name), dict)
for name in obj_type.__dataclass_field_names__
}
else:
return dict_factory([
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
for f in fields(obj)
(name, _asdict_inner(getattr(obj, name), dict_factory))
for name in obj_type.__dataclass_field_names__
])
# handle the builtin types first for speed; subclasses handled below
elif obj_type is list:
Expand Down Expand Up @@ -1522,8 +1525,8 @@ def _astuple_inner(obj, tuple_factory):
return obj
elif _is_dataclass_instance(obj):
return tuple_factory([
_astuple_inner(getattr(obj, f.name), tuple_factory)
for f in fields(obj)
_astuple_inner(getattr(obj, name), tuple_factory)
for name in obj.__dataclass_field_names__
])
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve performance of :func:`dataclasses.asdict` up to 40% by caching the field names on dataclass classes.
Loading