Skip to content

Commit fb03c1c

Browse files
kkutrowskiekzhu
andauthored
Fix: Handle nested objects in array items for JSON schema conversion (#6993)
Co-authored-by: Eric Zhu <ekzhu@users.noreply.github.com>
1 parent 17d3aef commit fb03c1c

File tree

2 files changed

+224
-0
lines changed

2 files changed

+224
-0
lines changed

python/packages/autogen-core/src/autogen_core/utils/_json_to_pydantic.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,17 @@ def get_ref(self, ref_name: str) -> Any:
128128

129129
return self._model_cache[ref_name]
130130

131+
def _get_item_model_name(self, array_field_name: str, parent_model_name: str) -> str:
132+
"""Generate hash-based model names for array items to keep names short and unique."""
133+
import hashlib
134+
135+
# Create a short hash of the full path to ensure uniqueness
136+
full_path = f"{parent_model_name}_{array_field_name}"
137+
hash_suffix = hashlib.md5(full_path.encode()).hexdigest()[:6]
138+
139+
# Use field name as-is with hash suffix
140+
return f"{array_field_name}_{hash_suffix}"
141+
131142
def _process_definitions(self, root_schema: Dict[str, Any]) -> None:
132143
if "$defs" in root_schema:
133144
for model_name in root_schema["$defs"]:
@@ -253,6 +264,11 @@ def _extract_field_type(self, key: str, value: Dict[str, Any], model_name: str,
253264
item_schema = value.get("items", {"type": "string"})
254265
if "$ref" in item_schema:
255266
item_type = self.get_ref(item_schema["$ref"].split("/")[-1])
267+
elif item_schema.get("type") == "object" and "properties" in item_schema:
268+
# Handle array items that are objects with properties - create a nested model
269+
# Use hash-based naming to keep names short and unique
270+
item_model_name = self._get_item_model_name(key, model_name)
271+
item_type = self._json_schema_to_model(item_schema, item_model_name, root_schema)
256272
else:
257273
item_type_name = item_schema.get("type")
258274
if item_type_name is None:

python/packages/autogen-core/tests/test_json_to_pydantic.py

Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,3 +834,211 @@ def test_unknown_format_raises() -> None:
834834
converter = _JSONSchemaToPydantic()
835835
with pytest.raises(FormatNotSupportedError):
836836
converter.json_schema_to_pydantic(schema, "UnknownFormatModel")
837+
838+
839+
def test_array_items_with_object_schema_properties() -> None:
840+
"""Test that array items with object schemas create proper Pydantic models."""
841+
schema = {
842+
"type": "object",
843+
"properties": {
844+
"users": {
845+
"type": "array",
846+
"items": {
847+
"type": "object",
848+
"properties": {"name": {"type": "string"}, "email": {"type": "string"}, "age": {"type": "integer"}},
849+
"required": ["name", "email"],
850+
},
851+
}
852+
},
853+
}
854+
855+
converter = _JSONSchemaToPydantic()
856+
Model = converter.json_schema_to_pydantic(schema, "UserListModel")
857+
858+
# Verify the users field has correct type annotation
859+
users_field = Model.model_fields["users"]
860+
from typing import Union, get_args, get_origin
861+
862+
# Extract inner type from Optional[List[...]]
863+
actual_list_type = users_field.annotation
864+
if get_origin(users_field.annotation) is Union:
865+
union_args = get_args(users_field.annotation)
866+
for arg in union_args:
867+
if get_origin(arg) is list:
868+
actual_list_type = arg
869+
break
870+
871+
assert get_origin(actual_list_type) is list
872+
inner_type = get_args(actual_list_type)[0]
873+
874+
# Verify array items are BaseModel subclasses, not dict
875+
assert inner_type is not dict
876+
assert hasattr(inner_type, "model_fields")
877+
878+
# Verify expected fields are present
879+
expected_fields = {"name", "email", "age"}
880+
actual_fields = set(inner_type.model_fields.keys())
881+
assert expected_fields.issubset(actual_fields)
882+
883+
# Test instantiation and field access
884+
test_data = {
885+
"users": [
886+
{"name": "Alice", "email": "alice@example.com", "age": 30},
887+
{"name": "Bob", "email": "bob@example.com"},
888+
]
889+
}
890+
891+
instance = Model(**test_data)
892+
assert len(instance.users) == 2 # type: ignore[attr-defined]
893+
894+
first_user = instance.users[0] # type: ignore[attr-defined]
895+
assert hasattr(first_user, "model_fields") # type: ignore[reportUnknownArgumentType]
896+
assert not isinstance(first_user, dict)
897+
898+
# Test attribute access (BaseModel behavior)
899+
assert first_user.name == "Alice" # type: ignore[attr-defined]
900+
assert first_user.email == "alice@example.com" # type: ignore[attr-defined]
901+
assert first_user.age == 30 # type: ignore[attr-defined]
902+
903+
904+
def test_nested_arrays_with_object_schemas() -> None:
905+
"""Test deeply nested arrays with object schemas create proper Pydantic models."""
906+
schema = {
907+
"type": "object",
908+
"properties": {
909+
"companies": {
910+
"type": "array",
911+
"items": {
912+
"type": "object",
913+
"properties": {
914+
"name": {"type": "string"},
915+
"departments": {
916+
"type": "array",
917+
"items": {
918+
"type": "object",
919+
"properties": {
920+
"name": {"type": "string"},
921+
"employees": {
922+
"type": "array",
923+
"items": {
924+
"type": "object",
925+
"properties": {
926+
"name": {"type": "string"},
927+
"role": {"type": "string"},
928+
"skills": {"type": "array", "items": {"type": "string"}},
929+
},
930+
"required": ["name", "role"],
931+
},
932+
},
933+
},
934+
"required": ["name"],
935+
},
936+
},
937+
},
938+
"required": ["name"],
939+
},
940+
}
941+
},
942+
}
943+
944+
converter = _JSONSchemaToPydantic()
945+
Model = converter.json_schema_to_pydantic(schema, "CompanyListModel")
946+
947+
# Verify companies field type annotation
948+
companies_field = Model.model_fields["companies"]
949+
from typing import Union, get_args, get_origin
950+
951+
# Extract companies inner type
952+
actual_list_type = companies_field.annotation
953+
if get_origin(companies_field.annotation) is Union:
954+
union_args = get_args(companies_field.annotation)
955+
for arg in union_args:
956+
if get_origin(arg) is list:
957+
actual_list_type = arg
958+
break
959+
960+
assert get_origin(actual_list_type) is list
961+
company_type = get_args(actual_list_type)[0]
962+
963+
# Verify companies are BaseModel subclasses
964+
assert company_type is not dict
965+
assert hasattr(company_type, "model_fields")
966+
assert "name" in company_type.model_fields
967+
assert "departments" in company_type.model_fields
968+
969+
# Verify departments field type annotation
970+
departments_field = company_type.model_fields["departments"]
971+
dept_list_type = departments_field.annotation
972+
if get_origin(dept_list_type) is Union:
973+
union_args = get_args(dept_list_type)
974+
for arg in union_args:
975+
if get_origin(arg) is list:
976+
dept_list_type = arg
977+
break
978+
979+
assert get_origin(dept_list_type) is list
980+
department_type = get_args(dept_list_type)[0]
981+
982+
# Verify departments are BaseModel subclasses
983+
assert department_type is not dict
984+
assert hasattr(department_type, "model_fields")
985+
assert "name" in department_type.model_fields
986+
assert "employees" in department_type.model_fields
987+
988+
# Verify employees field type annotation
989+
employees_field = department_type.model_fields["employees"]
990+
emp_list_type = employees_field.annotation
991+
if get_origin(emp_list_type) is Union:
992+
union_args = get_args(emp_list_type)
993+
for arg in union_args:
994+
if get_origin(arg) is list:
995+
emp_list_type = arg
996+
break
997+
998+
assert get_origin(emp_list_type) is list
999+
employee_type = get_args(emp_list_type)[0]
1000+
1001+
# Verify employees are BaseModel subclasses
1002+
assert employee_type is not dict
1003+
assert hasattr(employee_type, "model_fields")
1004+
expected_emp_fields = {"name", "role", "skills"}
1005+
actual_emp_fields = set(employee_type.model_fields.keys())
1006+
assert expected_emp_fields.issubset(actual_emp_fields)
1007+
1008+
# Test instantiation with nested data
1009+
test_data = {
1010+
"companies": [
1011+
{
1012+
"name": "TechCorp",
1013+
"departments": [
1014+
{
1015+
"name": "Engineering",
1016+
"employees": [
1017+
{"name": "Alice", "role": "Senior Developer", "skills": ["Python", "JavaScript", "Docker"]},
1018+
{"name": "Bob", "role": "DevOps Engineer", "skills": ["Kubernetes", "AWS"]},
1019+
],
1020+
},
1021+
{"name": "Marketing", "employees": [{"name": "Carol", "role": "Marketing Manager"}]},
1022+
],
1023+
}
1024+
]
1025+
}
1026+
1027+
instance = Model(**test_data)
1028+
assert len(instance.companies) == 1 # type: ignore[attr-defined]
1029+
1030+
company = instance.companies[0] # type: ignore[attr-defined]
1031+
assert hasattr(company, "model_fields") # type: ignore[reportUnknownArgumentType]
1032+
assert company.name == "TechCorp" # type: ignore[attr-defined]
1033+
assert len(company.departments) == 2 # type: ignore[attr-defined]
1034+
1035+
engineering_dept = company.departments[0] # type: ignore[attr-defined]
1036+
assert hasattr(engineering_dept, "model_fields") # type: ignore[reportUnknownArgumentType]
1037+
assert engineering_dept.name == "Engineering" # type: ignore[attr-defined]
1038+
assert len(engineering_dept.employees) == 2 # type: ignore[attr-defined]
1039+
1040+
alice = engineering_dept.employees[0] # type: ignore[attr-defined]
1041+
assert hasattr(alice, "model_fields") # type: ignore[reportUnknownArgumentType]
1042+
assert alice.name == "Alice" # type: ignore[attr-defined]
1043+
assert alice.role == "Senior Developer" # type: ignore[attr-defined]
1044+
assert alice.skills == ["Python", "JavaScript", "Docker"] # type: ignore[attr-defined]

0 commit comments

Comments
 (0)