Skip to content

Commit 77c545e

Browse files
committed
Don't create dynamic module, add more tests for locally defined models
Signed-off-by: Nijat Khanbabayev <nijat.khanbabayev@cubistsystematic.com>
1 parent ff6acf1 commit 77c545e

File tree

8 files changed

+969
-367
lines changed

8 files changed

+969
-367
lines changed

ccflow/base.py

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from typing_extensions import Self
3131

3232
from .exttypes.pyobjectpath import PyObjectPath
33-
from .local_persistence import _register_local_subclass
33+
from .local_persistence import _register, _register_local_subclass_if_needed, _sync_to_module
3434

3535
log = logging.getLogger(__name__)
3636

@@ -157,14 +157,29 @@ class BaseModel(PydanticBaseModel, _RegistryMixin, metaclass=_SerializeAsAnyMeta
157157
- Registration by name, and coercion from string name to allow for object re-use from the configs
158158
"""
159159

160-
__ccflow_local_registration_kind__: ClassVar[str] = "model"
161-
162160
@classmethod
163161
def __pydantic_init_subclass__(cls, **kwargs):
164-
# __pydantic_init_subclass__ is the supported hook point once Pydantic finishes wiring the subclass.
165162
super().__pydantic_init_subclass__(**kwargs)
166-
kind = getattr(cls, "__ccflow_local_registration_kind__", "model")
167-
_register_local_subclass(cls, kind=kind)
163+
# Register local-scope classes so they're importable via PyObjectPath.
164+
# At definition time, we can only detect <locals> in qualname - full check deferred.
165+
if "__ccflow_import_path__" in cls.__dict__:
166+
_sync_to_module(cls) # Cross-process unpickle case
167+
elif "<locals>" in cls.__qualname__:
168+
_register(cls)
169+
170+
def __reduce_ex__(self, protocol):
171+
# Ensure the class is registered before pickling. This is necessary for classes
172+
# created dynamically (e.g., via pydantic's create_model) that don't have '<locals>'
173+
# in their __qualname__. Without this, such classes would get different UUIDs in
174+
# different processes if pickled before type_ is accessed, breaking cross-process
175+
# consistency. By registering here, we guarantee __ccflow_import_path__ is set
176+
# before cloudpickle serializes the class definition.
177+
#
178+
# We use __reduce_ex__ instead of __reduce__ because it's called first, allowing us
179+
# to perform the registration side effect. We then return NotImplemented to let
180+
# pydantic's default pickling mechanism take over, preserving the pickle format.
181+
_register_local_subclass_if_needed(type(self))
182+
return super().__reduce_ex__(protocol)
168183

169184
@computed_field(
170185
alias="_target_",
@@ -830,8 +845,6 @@ class ContextBase(ResultBase):
830845
that is an input into another CallableModel.
831846
"""
832847

833-
__ccflow_local_registration_kind__: ClassVar[str] = "context"
834-
835848
model_config = ConfigDict(
836849
frozen=True,
837850
arbitrary_types_allowed=False,

ccflow/callable.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,6 @@ class _CallableModel(BaseModel, abc.ABC):
7474
The purpose of this class is to provide type definitions of context_type and return_type.
7575
"""
7676

77-
__ccflow_local_registration_kind__: ClassVar[str] = "callable_model"
78-
7977
model_config = ConfigDict(
8078
ignored_types=(property,),
8179
)

ccflow/exttypes/pyobjectpath.py

Lines changed: 85 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,86 @@
11
"""This module contains extension types for pydantic."""
22

3+
import importlib
34
from functools import cached_property, lru_cache
45
from types import FunctionType, MethodType, ModuleType
56
from typing import Any, Type, get_origin
67

7-
from pydantic import ImportString, TypeAdapter
8+
from pydantic import TypeAdapter
89
from pydantic_core import core_schema
910
from typing_extensions import Self
1011

11-
from ccflow.local_persistence import _ensure_registered_at_import_path
12-
13-
_import_string_adapter = TypeAdapter(ImportString)
12+
from ccflow.local_persistence import _register_local_subclass_if_needed
1413

1514

1615
@lru_cache(maxsize=None)
17-
def import_string(input_string: str):
18-
return _import_string_adapter.validate_python(input_string)
16+
def import_string(dotted_path: str) -> Any:
17+
"""Import an object from a dotted path string.
18+
19+
Handles nested class paths like 'module.OuterClass.InnerClass' by progressively
20+
trying shorter module paths and using getattr for the remaining parts.
21+
22+
This is more flexible than pydantic's ImportString which can fail on nested classes.
23+
"""
24+
if not dotted_path:
25+
raise ImportError("Empty path")
26+
27+
parts = dotted_path.split(".")
28+
29+
# Try progressively shorter module paths
30+
# e.g., for 'a.b.C.D', try 'a.b.C', then 'a.b', then 'a'
31+
for i in range(len(parts), 0, -1):
32+
module_path = ".".join(parts[:i])
33+
try:
34+
obj = importlib.import_module(module_path)
35+
# Successfully imported module, now getattr for remaining parts
36+
for attr_name in parts[i:]:
37+
obj = getattr(obj, attr_name)
38+
return obj
39+
except ImportError:
40+
continue
41+
except AttributeError:
42+
# Module imported but attribute not found - keep trying shorter paths
43+
continue
44+
45+
raise ImportError(f"No module named '{dotted_path}'")
46+
47+
48+
def _build_standard_import_path(obj: Any) -> str:
49+
"""Build 'module.qualname' path from an object with __module__ and __qualname__."""
50+
qualname = obj.__qualname__
51+
# Strip generic type parameters (e.g., "MyClass[int]" -> "MyClass")
52+
# This happens with Generic types in pydantic. Type info is lost but
53+
# at least the base class remains importable.
54+
# TODO: Find a way of capturing the underlying type info
55+
if "[" in qualname:
56+
qualname = qualname.split("[", 1)[0]
57+
return f"{obj.__module__}.{qualname}" if obj.__module__ else qualname
1958

2059

2160
class PyObjectPath(str):
22-
"""Similar to pydantic's ImportString (formerly PyObject in v1), this class represents the path to the object as a string.
61+
"""A string representing an importable Python object path (e.g., "module.ClassName").
2362
24-
In pydantic v1, PyObject could not be serialized to json, whereas in v2, ImportString can.
25-
However, the round trip is not always consistent, i.e.
63+
Similar to pydantic's ImportString, but with consistent serialization behavior:
64+
- ImportString deserializes to the actual object
65+
- PyObjectPath deserializes back to the string path
66+
67+
Example:
2668
>>> ta = TypeAdapter(ImportString)
2769
>>> ta.validate_json(ta.dump_json("math.pi"))
2870
3.141592653589793
2971
>>> ta = TypeAdapter(PyObjectPath)
3072
>>> ta.validate_json(ta.dump_json("math.pi"))
3173
'math.pi'
3274
33-
Other differences are that ImportString can contain other arbitrary python values, whereas PyObjectPath is always a string
75+
PyObjectPath also only accepts importable objects, not arbitrary values:
3476
>>> TypeAdapter(ImportString).validate_python(0)
3577
0
3678
>>> TypeAdapter(PyObjectPath).validate_python(0)
3779
raises
3880
"""
3981

4082
# TODO: It would be nice to make this also derive from Generic[T],
41-
# where T could then by used for type checking in validate.
83+
# where T could then be used for type checking in validate.
4284
# However, this doesn't work: https://github.com/python/typing/issues/629
4385

4486
@cached_property
@@ -52,41 +94,41 @@ def __get_pydantic_core_schema__(cls, source_type, handler):
5294

5395
@classmethod
5496
def _validate(cls, value: Any):
97+
"""Convert value (string path or object) to PyObjectPath, verifying it's importable."""
5598
if isinstance(value, str):
56-
value = cls(value)
57-
else: # Try to construct a string from the object that can then be used to import the object
99+
path = cls(value)
100+
else:
101+
# Unwrap generic types (e.g., List[int] -> list)
58102
origin = get_origin(value)
59103
if origin:
60104
value = origin
105+
path = cls._path_from_object(value)
61106

62-
# Check for ccflow's import path override first (used for local-scope classes)
63-
# This allows classes with '<locals>' in __qualname__ to remain importable
64-
# while preserving cloudpickle's ability to serialize the class definition
65-
if hasattr(value, "__ccflow_import_path__"):
66-
_ensure_registered_at_import_path(value)
67-
value = cls(value.__ccflow_import_path__)
68-
elif hasattr(value, "__module__") and hasattr(value, "__qualname__"):
69-
if value.__module__ == "__builtin__":
70-
module = "builtins"
71-
else:
72-
module = value.__module__
73-
qualname = value.__qualname__
74-
if "[" in qualname:
75-
# This happens with Generic types in pydantic. We strip out the info for now.
76-
# TODO: Find a way of capturing the underlying type info
77-
qualname = qualname.split("[", 1)[0]
78-
if not module:
79-
value = cls(qualname)
80-
else:
81-
value = cls(module + "." + qualname)
82-
else:
83-
raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}")
107+
# Verify the path is actually importable
84108
try:
85-
value.object
109+
path.object
86110
except ImportError as e:
87111
raise ValueError(f"ensure this value contains valid import path or importable object: {str(e)}")
88112

89-
return value
113+
return path
114+
115+
@classmethod
116+
def _path_from_object(cls, value: Any) -> "PyObjectPath":
117+
"""Build import path from an object. Triggers ccflow registration for local classes."""
118+
if isinstance(value, type):
119+
# For ccflow BaseModel subclasses that aren't normally importable (defined in
120+
# functions or via create_model), this registers them on ccflow.base
121+
_register_local_subclass_if_needed(value)
122+
123+
# Use __ccflow_import_path__ if set (check __dict__ to avoid inheriting from parents)
124+
if "__ccflow_import_path__" in value.__dict__:
125+
return cls(value.__ccflow_import_path__)
126+
return cls(_build_standard_import_path(value))
127+
128+
if hasattr(value, "__module__") and hasattr(value, "__qualname__"):
129+
return cls(_build_standard_import_path(value))
130+
131+
raise ValueError(f"ensure this value contains valid import path or importable object: unable to import path for {value}")
90132

91133
@classmethod
92134
@lru_cache(maxsize=None)
@@ -95,10 +137,12 @@ def _validate_cached(cls, value: str) -> Self:
95137

96138
@classmethod
97139
def validate(cls, value) -> Self:
98-
"""Try to convert/validate an arbitrary value to a PyObjectPath."""
99-
if isinstance(
100-
value, (str, type, FunctionType, ModuleType, MethodType)
101-
): # If the value is trivial, we cache it here to avoid the overhead of validation
140+
"""Try to convert/validate an arbitrary value to a PyObjectPath.
141+
142+
Uses caching for common value types to improve performance.
143+
"""
144+
# Cache validation for common immutable types to avoid repeated work
145+
if isinstance(value, (str, type, FunctionType, ModuleType, MethodType)):
102146
return cls._validate_cached(value)
103147
return _TYPE_ADAPTER.validate_python(value)
104148

0 commit comments

Comments
 (0)