diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 86d29df0639184..decd97129460eb 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -219,7 +219,7 @@ def __repr__(self): # String regex that string annotations for ClassVar or InitVar must match. # Allows "identifier.identifier[" or "identifier[". # https://bugs.python.org/issue33453 for details. -_MODULE_IDENTIFIER_RE = re.compile(r'^(?:\s*(\w+)\s*\.)?\s*(\w+)') +_MODULE_IDENTIFIER_RE = re.compile(r'^\s*(\w+(?:\s*\.\s*\w+)*)') # Atomic immutable types which don't require any recursive handling and for which deepcopy # returns the same object. We can provide a fast-path for these types in asdict and astuple. @@ -711,22 +711,16 @@ def _is_kw_only(a_type, dataclasses): return a_type is dataclasses.KW_ONLY -def _is_type(annotation, cls, a_module, a_type, is_type_predicate): - # Given a type annotation string, does it refer to a_type in - # a_module? For example, when checking that annotation denotes a - # ClassVar, then a_module is typing, and a_type is - # typing.ClassVar. +def _get_type_from_annotation(annotation, cls): + # Loosely parse a string annotation and return its type. - # It's possible to look up a_module given a_type, but it involves - # looking in sys.modules (again!), and seems like a waste since - # the caller already knows a_module. + # We can't perform a full type hint evaluation at the point where @dataclass + # was invoked because class's module is not fully initialized yet. So we resort + # to parsing string annotation using regexp, and extracting a type before + # the first square bracket. # - annotation is a string type annotation # - cls is the class that this annotation was found in - # - a_module is the module we want to match - # - a_type is the type in that module we want to match - # - is_type_predicate is a function called with (obj, a_module) - # that determines if obj is of the desired type. # Since this test does not do a local namespace lookup (and # instead only a module (global) lookup), there are some things it @@ -753,21 +747,18 @@ def _is_type(annotation, cls, a_module, a_type, is_type_predicate): # that's defined. It was judged not worth it. match = _MODULE_IDENTIFIER_RE.match(annotation) - if match: - ns = None - module_name = match.group(1) - if not module_name: - # No module name, assume the class's module did - # "from dataclasses import InitVar". - ns = sys.modules.get(cls.__module__).__dict__ - else: - # Look up module_name in the class's module. - module = sys.modules.get(cls.__module__) - if module and module.__dict__.get(module_name) is a_module: - ns = sys.modules.get(a_type.__module__).__dict__ - if ns and is_type_predicate(ns.get(match.group(2)), a_module): - return True - return False + if not match: + return None + + # Note: _MODULE_IDENTIFIER_RE guarantees that path is non-empty + path = match.group(1).split(".") + root = sys.modules.get(cls.__module__) + for path_item in path: + root = getattr(root, path_item.strip(), None) + if root is None: + return None + + return root def _get_field(cls, a_name, a_type, default_kw_only): @@ -805,6 +796,10 @@ def _get_field(cls, a_name, a_type, default_kw_only): # is actually of the correct type. # For the complete discussion, see https://bugs.python.org/issue33453 + if isinstance(a_type, str): + a_type_annotation = _get_type_from_annotation(a_type, cls) + else: + a_type_annotation = a_type # If typing has not been imported, then it's impossible for any # annotation to be a ClassVar. So, only look for ClassVar if @@ -812,10 +807,7 @@ def _get_field(cls, a_name, a_type, default_kw_only): # module). typing = sys.modules.get('typing') if typing: - if (_is_classvar(a_type, typing) - or (isinstance(f.type, str) - and _is_type(f.type, cls, typing, typing.ClassVar, - _is_classvar))): + if _is_classvar(a_type_annotation, typing): f._field_type = _FIELD_CLASSVAR # If the type is InitVar, or if it's a matching string annotation, @@ -824,10 +816,7 @@ def _get_field(cls, a_name, a_type, default_kw_only): # The module we're checking against is the module we're # currently in (dataclasses.py). dataclasses = sys.modules[__name__] - if (_is_initvar(a_type, dataclasses) - or (isinstance(f.type, str) - and _is_type(f.type, cls, dataclasses, dataclasses.InitVar, - _is_initvar))): + if _is_initvar(a_type_annotation, dataclasses): f._field_type = _FIELD_INITVAR # Validations for individual fields. This is delayed until now, @@ -998,10 +987,11 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, dataclasses = sys.modules[__name__] for name, type in cls_annotations.items(): # See if this is a marker to change the value of kw_only. - if (_is_kw_only(type, dataclasses) - or (isinstance(type, str) - and _is_type(type, cls, dataclasses, dataclasses.KW_ONLY, - _is_kw_only))): + if isinstance(type, str): + a_type_annotation = _get_type_from_annotation(type, cls) + else: + a_type_annotation = type + if _is_kw_only(a_type_annotation, dataclasses): # Switch the default to kw_only=True, and ignore this # annotation: it's not a real field. if KW_ONLY_seen: diff --git a/Lib/test/test_dataclasses/__init__.py b/Lib/test/test_dataclasses/__init__.py index ac78f8327b808e..c678ea94761420 100644 --- a/Lib/test/test_dataclasses/__init__.py +++ b/Lib/test/test_dataclasses/__init__.py @@ -4093,10 +4093,17 @@ def test_classvar_module_level_import(self): from test.test_dataclasses import dataclass_module_1_str from test.test_dataclasses import dataclass_module_2 from test.test_dataclasses import dataclass_module_2_str - - for m in (dataclass_module_1, dataclass_module_1_str, - dataclass_module_2, dataclass_module_2_str, - ): + from test.test_dataclasses import dataclass_module_3 + from test.test_dataclasses import dataclass_module_3_str + from test.test_dataclasses import dataclass_module_4 + from test.test_dataclasses import dataclass_module_4_str + + for m in ( + dataclass_module_1, dataclass_module_1_str, + dataclass_module_2, dataclass_module_2_str, + dataclass_module_3, dataclass_module_3_str, + dataclass_module_4, dataclass_module_4_str, + ): with self.subTest(m=m): # There's a difference in how the ClassVars are # interpreted when using string annotations or @@ -4400,6 +4407,14 @@ def custom_dataclass(cls, *args, **kwargs): self.assertEqual(c.x, 10) self.assertEqual(c.__custom__, True) + def test_empty_annotation_string(self): + @dataclass + class DataclassWithEmptyTypeAnnotation: + x: "" + + c = DataclassWithEmptyTypeAnnotation(10) + self.assertEqual(c.x, 10) + class TestReplace(unittest.TestCase): def test(self): diff --git a/Lib/test/test_dataclasses/_types_proxy.py b/Lib/test/test_dataclasses/_types_proxy.py new file mode 100644 index 00000000000000..f4aaeef7aec59d --- /dev/null +++ b/Lib/test/test_dataclasses/_types_proxy.py @@ -0,0 +1,8 @@ +# We need this to test a case when a type +# is imported via some other package, +# like ClassVar from typing_extensions instead of typing. +# https://github.com/python/cpython/issues/133956 +from typing import ClassVar +from dataclasses import InitVar + +__all__ = ["ClassVar", "InitVar"] diff --git a/Lib/test/test_dataclasses/dataclass_module_3.py b/Lib/test/test_dataclasses/dataclass_module_3.py new file mode 100644 index 00000000000000..74abc091f35acd --- /dev/null +++ b/Lib/test/test_dataclasses/dataclass_module_3.py @@ -0,0 +1,32 @@ +#from __future__ import annotations +USING_STRINGS = False + +# dataclass_module_3.py and dataclass_module_3_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass +import test.test_dataclasses._types_proxy as tp + +T_CV2 = tp.ClassVar[int] +T_CV3 = tp.ClassVar + +T_IV2 = tp.InitVar[int] +T_IV3 = tp.InitVar + +@dataclass +class CV: + T_CV4 = tp.ClassVar + cv0: tp.ClassVar[int] = 20 + cv1: tp.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = tp.InitVar + iv0: tp.InitVar[int] + iv1: tp.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/test_dataclasses/dataclass_module_3_str.py b/Lib/test/test_dataclasses/dataclass_module_3_str.py new file mode 100644 index 00000000000000..49e5fca61831b6 --- /dev/null +++ b/Lib/test/test_dataclasses/dataclass_module_3_str.py @@ -0,0 +1,32 @@ +from __future__ import annotations +USING_STRINGS = True + +# dataclass_module_3.py and dataclass_module_3_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass +import test.test_dataclasses._types_proxy as tp + +T_CV2 = tp.ClassVar[int] +T_CV3 = tp.ClassVar + +T_IV2 = tp.InitVar[int] +T_IV3 = tp.InitVar + +@dataclass +class CV: + T_CV4 = tp.ClassVar + cv0: tp.ClassVar[int] = 20 + cv1: tp.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = tp.InitVar + iv0: tp.InitVar[int] + iv1: tp.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/test_dataclasses/dataclass_module_4.py b/Lib/test/test_dataclasses/dataclass_module_4.py new file mode 100644 index 00000000000000..7e0c8a18356590 --- /dev/null +++ b/Lib/test/test_dataclasses/dataclass_module_4.py @@ -0,0 +1,38 @@ +#from __future__ import annotations +USING_STRINGS = False + +# dataclass_module_4.py and dataclass_module_4_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass +import dataclasses +import typing + +class TypingProxy: + class Nested: + ClassVar = typing.ClassVar + InitVar = dataclasses.InitVar + +T_CV2 = TypingProxy.Nested.ClassVar[int] +T_CV3 = TypingProxy.Nested.ClassVar + +T_IV2 = TypingProxy.Nested.InitVar[int] +T_IV3 = TypingProxy.Nested.InitVar + +@dataclass +class CV: + T_CV4 = TypingProxy.Nested.ClassVar + cv0: TypingProxy.Nested.ClassVar[int] = 20 + cv1: TypingProxy.Nested.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = TypingProxy.Nested.InitVar + iv0: TypingProxy.Nested.InitVar[int] + iv1: TypingProxy.Nested.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Lib/test/test_dataclasses/dataclass_module_4_str.py b/Lib/test/test_dataclasses/dataclass_module_4_str.py new file mode 100644 index 00000000000000..876f3dcf7c88fa --- /dev/null +++ b/Lib/test/test_dataclasses/dataclass_module_4_str.py @@ -0,0 +1,38 @@ +from __future__ import annotations +USING_STRINGS = True + +# dataclass_module_4.py and dataclass_module_4_str.py are identical +# except only the latter uses string annotations. + +from dataclasses import dataclass +import dataclasses +import typing + +class TypingProxy: + class Nested: + ClassVar = typing.ClassVar + InitVar = dataclasses.InitVar + +T_CV2 = TypingProxy.Nested.ClassVar[int] +T_CV3 = TypingProxy.Nested.ClassVar + +T_IV2 = TypingProxy.Nested.InitVar[int] +T_IV3 = TypingProxy.Nested.InitVar + +@dataclass +class CV: + T_CV4 = TypingProxy.Nested.ClassVar + cv0: TypingProxy.Nested.ClassVar[int] = 20 + cv1: TypingProxy.Nested.ClassVar = 30 + cv2: T_CV2 + cv3: T_CV3 + not_cv4: T_CV4 # When using string annotations, this field is not recognized as a ClassVar. + +@dataclass +class IV: + T_IV4 = TypingProxy.Nested.InitVar + iv0: TypingProxy.Nested.InitVar[int] + iv1: TypingProxy.Nested.InitVar + iv2: T_IV2 + iv3: T_IV3 + not_iv4: T_IV4 # When using string annotations, this field is not recognized as an InitVar. diff --git a/Misc/NEWS.d/next/Library/2025-05-16-01-43-58.gh-issue-133956.5kWDYd.rst b/Misc/NEWS.d/next/Library/2025-05-16-01-43-58.gh-issue-133956.5kWDYd.rst new file mode 100644 index 00000000000000..5923e12d55964c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-16-01-43-58.gh-issue-133956.5kWDYd.rst @@ -0,0 +1,4 @@ +Fix bug where :func:`@dataclass ` +wouldn't detect ``ClassVar`` fields +if ``ClassVar`` was re-exported from a module +other than :mod:`typing`.