Skip to content

Commit 6ba393a

Browse files
Added function for deterministic ID for class definitions by hashing. (#36793)
* Added function for deterministic ID for class definitions by hashing. * Trigger CI: Rerun checks * addresrsing reviwer comments
1 parent e51998b commit 6ba393a

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

sdks/python/apache_beam/internal/cloudpickle/cloudpickle.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
import dis
6363
from enum import Enum
6464
import functools
65+
import hashlib
6566
import io
6667
import itertools
6768
import logging
@@ -98,7 +99,7 @@
9899
_DYNAMIC_CLASS_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
99100
_DYNAMIC_CLASS_TRACKER_BY_ID = weakref.WeakValueDictionary()
100101
_DYNAMIC_CLASS_STATE_TRACKER_BY_CLASS = weakref.WeakKeyDictionary()
101-
_DYNAMIC_CLASS_TRACKER_LOCK = threading.Lock()
102+
_DYNAMIC_CLASS_TRACKER_LOCK = threading.RLock()
102103

103104
PYPY = platform.python_implementation() == "PyPy"
104105

@@ -168,6 +169,7 @@ class CloudPickleConfig:
168169

169170

170171
DEFAULT_CONFIG = CloudPickleConfig()
172+
_GENERATING_SENTINEL = object()
171173
builtin_code_type = None
172174
if PYPY:
173175
# builtin-code objects only exist in pypy
@@ -179,10 +181,21 @@ class CloudPickleConfig:
179181
def _get_or_create_tracker_id(class_def, id_generator):
180182
with _DYNAMIC_CLASS_TRACKER_LOCK:
181183
class_tracker_id = _DYNAMIC_CLASS_TRACKER_BY_CLASS.get(class_def)
184+
if class_tracker_id is _GENERATING_SENTINEL and id_generator:
185+
raise RuntimeError(
186+
f"Recursive ID generation detected for {class_def}. "
187+
f"The id_generator cannot recursively request an ID for the same class."
188+
)
189+
182190
if class_tracker_id is None and id_generator is not None:
183-
class_tracker_id = id_generator(class_def)
184-
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
185-
_DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
191+
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = _GENERATING_SENTINEL
192+
try:
193+
class_tracker_id = id_generator(class_def)
194+
_DYNAMIC_CLASS_TRACKER_BY_CLASS[class_def] = class_tracker_id
195+
_DYNAMIC_CLASS_TRACKER_BY_ID[class_tracker_id] = class_def
196+
except Exception:
197+
_DYNAMIC_CLASS_TRACKER_BY_CLASS.pop(class_def, None)
198+
raise
186199
return class_tracker_id
187200

188201

@@ -1720,3 +1733,10 @@ def dumps(
17201733

17211734
# Backward compat alias.
17221735
CloudPickler = Pickler
1736+
1737+
1738+
def hash_dynamic_classdef(classdef):
1739+
"""Generates a deterministic ID by hashing the pickled class definition."""
1740+
hexdigest = hashlib.sha256(
1741+
dumps(classdef, config=CloudPickleConfig(id_generator=None))).hexdigest()
1742+
return hexdigest

0 commit comments

Comments
 (0)