Skip to content
33 changes: 21 additions & 12 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,8 @@ def __repr__(self):
# The name of an attribute on the class where we store the Field
# objects. Also used to check if a class is a Data Class.
_FIELDS = '__dataclass_fields__'
# The name of an attribute on the class where we store the field names.
_FIELD_NAMES = '__dataclass_field_names__'

# The name of an attribute on the class that stores the parameters to
# @dataclass.
Expand Down Expand Up @@ -1052,6 +1054,8 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
# Remember all of the fields on our class (including bases). This
# also marks this class as being a dataclass.
setattr(cls, _FIELDS, fields)
setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want, you can also do cls.__dataclass_field_names__ = ... directly (I would also advise you to consider this as part of the "inline" commit, so you can also force-push this one).

An other alternative is to only inline getattr(x, _FIELDS_NAMES) instead of having a standalone function for that. It could also improve readability a bit and justify the needs of the global _FIELD_NAMES.

IOW, choose among the following:

  • Hardcode __dataclass_field_names__ everywhere, without having using getattr/setattr and _FIELD_NAMES.
  • Use _FIELD_NAMES + getattr/setattr(..., _FIELD_NAMES) directly.
  • Use _FIELD_NAMES + a module-wide function defined to be the partialization _get_names(x) ~ getattr(x, _FIELD_NAMES).

My preference is (1) or (2) but (3) is an overkill IMO.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, going for option (1)

if f._field_type is _FIELD))

# Was this class defined with an explicit __hash__? Note that if
# __eq__ is defined in this class, then python will automatically
Expand Down Expand Up @@ -1196,13 +1200,13 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen,
# the code instead of iterating over fields. But that can be a project for
# another day, if performance becomes an issue.
def _dataclass_getstate(self):
return [getattr(self, f.name) for f in fields(self)]
return [getattr(self, name) for name in _field_names(self)]


def _dataclass_setstate(self, state):
for field, value in zip(fields(self), state):
for field_name, value in zip(_field_names(self), state):
# use setattr because dataclass may be frozen
object.__setattr__(self, field.name, value)
object.__setattr__(self, field_name, value)


def _get_slots(cls):
Expand Down Expand Up @@ -1285,7 +1289,7 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields):

# Create a new dict for our new class.
cls_dict = dict(cls.__dict__)
field_names = tuple(f.name for f in fields(cls))
field_names = _field_names(cls)
# Make sure slots don't overlap with those in base classes.
inherited_slots = set(
itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1]))
Expand Down Expand Up @@ -1377,8 +1381,6 @@ def fields(class_or_instance):
Accepts a dataclass or an instance of one. Tuple elements are of
type Field.
"""

# Might it be worth caching this, per class?
try:
fields = getattr(class_or_instance, _FIELDS)
except AttributeError:
Expand All @@ -1388,6 +1390,13 @@ def fields(class_or_instance):
# order, so the order of the tuple is as the fields were defined.
return tuple(f for f in fields.values() if f._field_type is _FIELD)

def _field_names(class_or_instance):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inlining this method gives a bit of performance gain:

%timeit _field_names(s)
%timeit s.__dataclass_field_names__

Results in

41.8 ns ± 1.59 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)
17.8 ns ± 0.188 ns per loop (mean ± std. dev. of 7 runs, 100,000,000 loops each)

"""Return a tuple describing the field names of this dataclass.

Accepts a dataclass or an instance of one. Excludes pseudo-fields
"""

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My suggestion here is to remove the blank line after """. And I however think that it would be better to actually inline the function but considering we also use this approach for _PARAMS, it would be more consistent to keep it.

return getattr(class_or_instance, _FIELD_NAMES)

def _is_dataclass_instance(obj):
"""Returns True if obj is an instance of a dataclass."""
Expand Down Expand Up @@ -1433,13 +1442,13 @@ def _asdict_inner(obj, dict_factory):
# dataclass instance: fast path for the common case
if dict_factory is dict:
return {
f.name: _asdict_inner(getattr(obj, f.name), dict)
for f in fields(obj)
name: _asdict_inner(getattr(obj, name), dict)
for name in _field_names(obj_type)
}
else:
return dict_factory([
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
for f in fields(obj)
(name, _asdict_inner(getattr(obj, name), dict_factory))
for name in _field_names(obj_type)
])
# handle the builtin types first for speed; subclasses handled below
elif obj_type is list:
Expand Down Expand Up @@ -1522,8 +1531,8 @@ def _astuple_inner(obj, tuple_factory):
return obj
elif _is_dataclass_instance(obj):
return tuple_factory([
_astuple_inner(getattr(obj, f.name), tuple_factory)
for f in fields(obj)
_astuple_inner(getattr(obj, name), tuple_factory)
for name in _field_names(obj)
])
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
Expand Down
Loading