Skip to content

Commit 9258ecb

Browse files
committed
feat(convert): make type information cached to avoid recompute in encoding
1 parent fc1c113 commit 9258ecb

File tree

2 files changed

+46
-47
lines changed

2 files changed

+46
-47
lines changed

python/cocoindex/convert.py

Lines changed: 45 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,18 @@ def _is_type_kind_convertible_to(src_type_kind: str, dst_type_kind: str) -> bool
6464

6565
def _encode_engine_value_core(
6666
value: Any,
67-
in_struct: bool = False,
6867
type_hint: Type[Any] | str | None = None,
6968
type_variant: AnalyzedTypeInfo | None = None,
69+
_elem_type_cache: dict[Any, AnalyzedTypeInfo] | None = None,
7070
) -> Any:
7171
"""Core encoding logic for converting Python values to engine values."""
72+
_elem_type_cache = _elem_type_cache or {}
73+
7274
if dataclasses.is_dataclass(value):
7375
fields = dataclasses.fields(value)
7476
return [
7577
encode_engine_value(
7678
getattr(value, f.name),
77-
in_struct=True,
7879
type_hint=f.type,
7980
)
8081
for f in fields
@@ -85,7 +86,6 @@ def _encode_engine_value_core(
8586
return [
8687
encode_engine_value(
8788
getattr(value, name),
88-
in_struct=True,
8989
type_hint=annotations.get(name),
9090
)
9191
for name in value._fields
@@ -103,34 +103,44 @@ def _encode_engine_value_core(
103103
and isinstance(type_variant.variant, AnalyzedListType)
104104
and type_variant.variant.elem_type
105105
):
106-
elem_encoder = make_engine_value_encoder(type_variant.variant.elem_type)
107-
return [elem_encoder(v) for v in value]
106+
# Cache the analyzed element type
107+
elem_type = type_variant.variant.elem_type
108+
if elem_type not in _elem_type_cache:
109+
_elem_type_cache[elem_type] = analyze_type_info(elem_type)
110+
elem_type_info = _elem_type_cache[elem_type]
111+
return [
112+
_encode_engine_value_core(
113+
v,
114+
type_hint=None,
115+
type_variant=elem_type_info,
116+
_elem_type_cache=_elem_type_cache,
117+
)
118+
for v in value
119+
]
108120
else:
109-
return [encode_engine_value(v, in_struct) for v in value]
121+
return [encode_engine_value(v, type_hint) for v in value]
110122

111123
if isinstance(value, dict):
112124
# Determine if this is a JSON type
113125
is_json_type = False
114126
if type_variant and isinstance(type_variant.variant, AnalyzedBasicType):
115127
is_json_type = type_variant.variant.kind == "Json"
116128
elif type_hint:
117-
hint_type_info = analyze_type_info(type_hint)
129+
hint_type_info = type_variant or analyze_type_info(type_hint)
118130
is_json_type = (
119131
isinstance(hint_type_info.variant, AnalyzedBasicType)
120132
and hint_type_info.variant.kind == "Json"
121133
)
122134

123135
# Handle empty dict
124136
if not value:
125-
if in_struct:
126-
return value if is_json_type else []
127-
return {} if is_json_type else value
137+
return value if (not type_hint or is_json_type) else []
128138

129139
# Handle KTable
130140
first_val = next(iter(value.values()))
131141
if is_struct_type(type(first_val)):
132142
return [
133-
[encode_engine_value(k, in_struct)] + encode_engine_value(v, in_struct)
143+
[encode_engine_value(k, type_hint)] + encode_engine_value(v, type_hint)
134144
for k, v in value.items()
135145
]
136146

@@ -140,55 +150,44 @@ def _encode_engine_value_core(
140150
and isinstance(type_variant.variant, AnalyzedDictType)
141151
and type_variant.variant.value_type
142152
):
143-
value_encoder = make_engine_value_encoder(type_variant.variant.value_type)
144-
return {k: value_encoder(v) for k, v in value.items()}
153+
# Cache the analyzed value type
154+
value_type = type_variant.variant.value_type
155+
if value_type not in _elem_type_cache:
156+
_elem_type_cache[value_type] = analyze_type_info(value_type)
157+
value_type_info = _elem_type_cache[value_type]
158+
return {
159+
k: _encode_engine_value_core(
160+
v,
161+
type_hint=None,
162+
type_variant=value_type_info,
163+
_elem_type_cache=_elem_type_cache,
164+
)
165+
for k, v in value.items()
166+
}
145167

146168
return value
147169

148170

149-
def make_engine_value_encoder(type_annotation: Any) -> Callable[[Any], Any]:
150-
"""
151-
Make an encoder from a Python value to an engine value.
152-
153-
Args:
154-
type_annotation: The type annotation of the Python value.
155-
156-
Returns:
157-
An encoder from a Python value to an engine value.
158-
"""
159-
type_info = analyze_type_info(type_annotation)
160-
161-
if isinstance(type_info.variant, AnalyzedUnknownType):
162-
raise ValueError(f"Type annotation `{type_info.core_type}` is unsupported")
163-
164-
def encode_value(value: Any, in_struct: bool = False) -> Any:
165-
return _encode_engine_value_core(
166-
value, in_struct=in_struct, type_variant=type_info
167-
)
168-
169-
return lambda value: encode_value(value, in_struct=False)
170-
171-
172-
def encode_engine_value(
173-
value: Any, in_struct: bool = False, type_hint: Type[Any] | str | None = None
174-
) -> Any:
171+
def encode_engine_value(value: Any, type_hint: Type[Any] | str | None = None) -> Any:
175172
"""
176173
Encode a Python value to an engine value.
177174
178175
Args:
179176
value: The Python value to encode
180-
in_struct: Whether this value is being encoded within a struct context
181-
type_hint: Type annotation for the value. When provided, enables optimized
182-
type-aware encoding. For top-level calls, this should always be provided.
177+
type_hint: Type annotation for the value. This should always be provided.
183178
184179
Returns:
185180
The encoded engine value
186181
"""
187-
if type_hint is not None:
188-
encoder = make_engine_value_encoder(type_hint)
189-
return encoder(value)
182+
if type_hint is None:
183+
return _encode_engine_value_core(value)
184+
185+
# Analyze type once and reuse the result
186+
type_info = analyze_type_info(type_hint)
187+
if isinstance(type_info.variant, AnalyzedUnknownType):
188+
raise ValueError(f"Type annotation `{type_info.core_type}` is unsupported")
190189

191-
return _encode_engine_value_core(value, in_struct=in_struct)
190+
return _encode_engine_value_core(value, type_hint=type_hint, type_variant=type_info)
192191

193192

194193
def make_engine_value_decoder(

python/cocoindex/tests/test_convert.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def eq(a: Any, b: Any) -> bool:
9595
return np.array_equal(a, b)
9696
return type(a) is type(b) and not not (a == b)
9797

98-
encoded_value = encode_engine_value(value)
98+
encoded_value = encode_engine_value(value, value_type)
9999
value_type = value_type or type(value)
100100
encoded_output_type = encode_enriched_type(value_type)["type"]
101101
value_from_engine = _engine.testutil.seder_roundtrip(

0 commit comments

Comments
 (0)