From 8a9855e4ff2ddf855ca9ced53b7ed9dea68f845d Mon Sep 17 00:00:00 2001 From: vumichien Date: Fri, 11 Jul 2025 15:26:20 +0900 Subject: [PATCH 1/4] feat(convert): implement struct to dict binding with enhanced type handling --- python/cocoindex/convert.py | 44 +++++++++- python/cocoindex/tests/test_convert.py | 112 +++++++++++++++++++++++++ python/cocoindex/typing.py | 8 +- 3 files changed, 162 insertions(+), 2 deletions(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 52422d295..3df37d242 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -89,7 +89,9 @@ def make_engine_value_decoder( if dst_is_any: if src_type_kind == "Union": return lambda value: value[1] - if src_type_kind == "Struct" or src_type_kind in TABLE_TYPES: + if src_type_kind == "Struct": + return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"]) + if src_type_kind in TABLE_TYPES: raise ValueError( f"Missing type annotation for `{''.join(field_path)}`." f"It's required for {src_type_kind} type." @@ -98,6 +100,18 @@ def make_engine_value_decoder( dst_type_info = analyze_type_info(dst_annotation) + # Handle struct -> dict binding for explicit dict annotations + if ( + src_type_kind == "Struct" + and dst_type_info.kind == "KTable" + and dst_type_info.elem_type + and isinstance(dst_type_info.elem_type, tuple) + and len(dst_type_info.elem_type) == 2 + and dst_type_info.elem_type[0] is str + and dst_type_info.elem_type[1] is Any + ): + return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"]) + if src_type_kind == "Union": dst_type_variants = ( dst_type_info.union_variant_types @@ -294,6 +308,34 @@ def make_closure_for_value( ) +def _make_engine_struct_to_dict_decoder( + field_path: list[str], + src_fields: list[dict[str, Any]], +) -> Callable[[list[Any]], dict[str, Any]]: + """Make a decoder from engine field values to a Python dict.""" + + field_decoders = [] + for i, field_schema in enumerate(src_fields): + field_name = field_schema["name"] + field_path.append(f".{field_name}") + field_decoder = make_engine_value_decoder( + field_path, + field_schema["type"], + Any, # Use Any for recursive decoding + ) + field_path.pop() + field_decoders.append((field_name, field_decoder)) + + def decode_to_dict(values: list[Any]) -> dict[str, Any]: + result = {} + for i, (field_name, field_decoder) in enumerate(field_decoders): + if i < len(values): + result[field_name] = field_decoder(values[i]) + return result + + return decode_to_dict + + def dump_engine_object(v: Any) -> Any: """Recursively dump an object for engine. Engine side uses `Pythonized` to catch.""" if v is None: diff --git a/python/cocoindex/tests/test_convert.py b/python/cocoindex/tests/test_convert.py index e71676398..622b5c880 100644 --- a/python/cocoindex/tests/test_convert.py +++ b/python/cocoindex/tests/test_convert.py @@ -1229,3 +1229,115 @@ class MixedStruct: annotated_float=2.0, ) validate_full_roundtrip(instance, MixedStruct) + + +def test_roundtrip_struct_to_dict_binding() -> None: + """Test struct -> dict binding with Any annotation.""" + + @dataclass + class SimpleStruct: + name: str + value: int + price: float + + instance = SimpleStruct("test", 42, 3.14) + expected_dict = {"name": "test", "value": 42, "price": 3.14} + + # Test Any annotation + validate_full_roundtrip(instance, SimpleStruct, (expected_dict, Any)) + + +def test_roundtrip_struct_to_dict_explicit() -> None: + """Test struct -> dict binding with explicit dict annotations.""" + + @dataclass + class Product: + id: str + name: str + price: float + active: bool + + instance = Product("P1", "Widget", 29.99, True) + expected_dict = {"id": "P1", "name": "Widget", "price": 29.99, "active": True} + + # Test explicit dict annotations + validate_full_roundtrip( + instance, Product, (expected_dict, dict), (expected_dict, dict[str, Any]) + ) + + +def test_roundtrip_struct_to_dict_with_none_annotation() -> None: + """Test struct -> dict binding with None annotation.""" + + @dataclass + class Config: + host: str + port: int + debug: bool + + instance = Config("localhost", 8080, True) + expected_dict = {"host": "localhost", "port": 8080, "debug": True} + + # Test None annotation (should be treated as Any) + validate_full_roundtrip(instance, Config, (expected_dict, None)) + + +def test_roundtrip_struct_to_dict_nested() -> None: + """Test struct -> dict binding with nested structs.""" + + @dataclass + class Address: + street: str + city: str + + @dataclass + class Person: + name: str + age: int + address: Address + + address = Address("123 Main St", "Anytown") + person = Person("John", 30, address) + expected_dict = { + "name": "John", + "age": 30, + "address": {"street": "123 Main St", "city": "Anytown"}, + } + + # Test nested struct conversion + validate_full_roundtrip(person, Person, (expected_dict, dict[str, Any])) + + +def test_roundtrip_struct_to_dict_with_list() -> None: + """Test struct -> dict binding with list fields.""" + + @dataclass + class Team: + name: str + members: list[str] + active: bool + + instance = Team("Dev Team", ["Alice", "Bob", "Charlie"], True) + expected_dict = { + "name": "Dev Team", + "members": ["Alice", "Bob", "Charlie"], + "active": True, + } + + validate_full_roundtrip(instance, Team, (expected_dict, dict)) + + +def test_roundtrip_namedtuple_to_dict_binding() -> None: + """Test NamedTuple -> dict binding.""" + + class Point(NamedTuple): + x: float + y: float + z: float + + instance = Point(1.0, 2.0, 3.0) + expected_dict = {"x": 1.0, "y": 2.0, "z": 3.0} + + validate_full_roundtrip( + instance, Point, (expected_dict, dict), (expected_dict, Any) + ) diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index eb5fc9088..0acefddcd 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -241,7 +241,10 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo: elif base_type is collections.abc.Mapping or base_type is dict: args = typing.get_args(t) - elem_type = (args[0], args[1]) + if len(args) == 0: # Handle untyped dict + elem_type = (str, Any) + else: + elem_type = (args[0], args[1]) kind = "KTable" elif base_type in (types.UnionType, typing.Union): possible_types = typing.get_args(t) @@ -282,6 +285,9 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo: kind = "OffsetDateTime" elif t is datetime.timedelta: kind = "TimeDelta" + elif t is dict: + elem_type = (str, Any) + kind = "KTable" else: raise ValueError(f"type unsupported yet: {t}") From dfd522e37379c286af8aa2b1170775e78aaa6c0f Mon Sep 17 00:00:00 2001 From: vumichien Date: Sun, 13 Jul 2025 11:01:15 +0900 Subject: [PATCH 2/4] feat(convert): enhance struct to dict binding and improve type validation for annotations --- python/cocoindex/convert.py | 41 ++++++++++++++++++++----------------- python/cocoindex/typing.py | 12 +++++++---- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 3df37d242..8325ddd27 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -86,6 +86,14 @@ def make_engine_value_decoder( or dst_annotation is inspect.Parameter.empty or dst_annotation is Any ) + # Handle struct -> dict binding for explicit dict annotations + is_dict_annotation = False + if dst_annotation is dict: + is_dict_annotation = True + elif getattr(dst_annotation, "__origin__", None) is dict: + args = getattr(dst_annotation, "__args__", ()) + if args == (str, Any): + is_dict_annotation = True if dst_is_any: if src_type_kind == "Union": return lambda value: value[1] @@ -97,21 +105,11 @@ def make_engine_value_decoder( f"It's required for {src_type_kind} type." ) return lambda value: value + if is_dict_annotation and src_type_kind == "Struct": + return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"]) dst_type_info = analyze_type_info(dst_annotation) - # Handle struct -> dict binding for explicit dict annotations - if ( - src_type_kind == "Struct" - and dst_type_info.kind == "KTable" - and dst_type_info.elem_type - and isinstance(dst_type_info.elem_type, tuple) - and len(dst_type_info.elem_type) == 2 - and dst_type_info.elem_type[0] is str - and dst_type_info.elem_type[1] is Any - ): - return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"]) - if src_type_kind == "Union": dst_type_variants = ( dst_type_info.union_variant_types @@ -311,7 +309,7 @@ def make_closure_for_value( def _make_engine_struct_to_dict_decoder( field_path: list[str], src_fields: list[dict[str, Any]], -) -> Callable[[list[Any]], dict[str, Any]]: +) -> Callable[[list[Any] | None], dict[str, Any] | None]: """Make a decoder from engine field values to a Python dict.""" field_decoders = [] @@ -326,12 +324,17 @@ def _make_engine_struct_to_dict_decoder( field_path.pop() field_decoders.append((field_name, field_decoder)) - def decode_to_dict(values: list[Any]) -> dict[str, Any]: - result = {} - for i, (field_name, field_decoder) in enumerate(field_decoders): - if i < len(values): - result[field_name] = field_decoder(values[i]) - return result + def decode_to_dict(values: list[Any] | None) -> dict[str, Any] | None: + if values is None: + return None + if len(field_decoders) != len(values): + raise ValueError( + f"Field count mismatch: expected {len(field_decoders)}, got {len(values)}" + ) + return { + field_name: field_decoder(value) + for value, (field_name, field_decoder) in zip(values, field_decoders) + } return decode_to_dict diff --git a/python/cocoindex/typing.py b/python/cocoindex/typing.py index 0acefddcd..4540b97e8 100644 --- a/python/cocoindex/typing.py +++ b/python/cocoindex/typing.py @@ -168,7 +168,8 @@ class AnalyzedTypeInfo: def analyze_type_info(t: Any) -> AnalyzedTypeInfo: """ - Analyze a Python type and return the analyzed info. + Analyze a Python type annotation and extract CocoIndex-specific type information. + Only concrete CocoIndex type annotations are supported. Raises ValueError for Any, empty, or untyped dict types. """ if isinstance(t, tuple) and len(t) == 2: kt, vt = t @@ -242,7 +243,9 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo: elif base_type is collections.abc.Mapping or base_type is dict: args = typing.get_args(t) if len(args) == 0: # Handle untyped dict - elem_type = (str, Any) + raise ValueError( + "Untyped dict is not supported; please provide a concrete type, e.g., dict[str, Any]." + ) else: elem_type = (args[0], args[1]) kind = "KTable" @@ -286,8 +289,9 @@ def analyze_type_info(t: Any) -> AnalyzedTypeInfo: elif t is datetime.timedelta: kind = "TimeDelta" elif t is dict: - elem_type = (str, Any) - kind = "KTable" + raise ValueError( + "Untyped dict is not supported; please provide a concrete type, e.g., dict[str, Any]." + ) else: raise ValueError(f"type unsupported yet: {t}") From ff4c090cf14513f98fa9260dcafe0e06ac5c93cd Mon Sep 17 00:00:00 2001 From: vumichien Date: Sun, 13 Jul 2025 16:12:57 +0900 Subject: [PATCH 3/4] refactor(convert): reorganize struct to dict binding logic for clarity and maintainability --- python/cocoindex/convert.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 8325ddd27..392abd15f 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -86,14 +86,6 @@ def make_engine_value_decoder( or dst_annotation is inspect.Parameter.empty or dst_annotation is Any ) - # Handle struct -> dict binding for explicit dict annotations - is_dict_annotation = False - if dst_annotation is dict: - is_dict_annotation = True - elif getattr(dst_annotation, "__origin__", None) is dict: - args = getattr(dst_annotation, "__args__", ()) - if args == (str, Any): - is_dict_annotation = True if dst_is_any: if src_type_kind == "Union": return lambda value: value[1] @@ -105,6 +97,15 @@ def make_engine_value_decoder( f"It's required for {src_type_kind} type." ) return lambda value: value + + # Handle struct -> dict binding for explicit dict annotations + is_dict_annotation = False + if dst_annotation is dict: + is_dict_annotation = True + elif getattr(dst_annotation, "__origin__", None) is dict: + args = getattr(dst_annotation, "__args__", ()) + if args == (str, Any): + is_dict_annotation = True if is_dict_annotation and src_type_kind == "Struct": return _make_engine_struct_to_dict_decoder(field_path, src_type["fields"]) From ce1743c3baee4545cafd740d8539678878e750bc Mon Sep 17 00:00:00 2001 From: vumichien Date: Sun, 13 Jul 2025 16:19:15 +0900 Subject: [PATCH 4/4] remove unnecessary whitespace for ruff check --- python/cocoindex/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cocoindex/convert.py b/python/cocoindex/convert.py index 392abd15f..1a254ac28 100644 --- a/python/cocoindex/convert.py +++ b/python/cocoindex/convert.py @@ -97,7 +97,7 @@ def make_engine_value_decoder( f"It's required for {src_type_kind} type." ) return lambda value: value - + # Handle struct -> dict binding for explicit dict annotations is_dict_annotation = False if dst_annotation is dict: