Skip to content

Commit 99ef439

Browse files
committed
feat: Add field-level descriptions to FieldSchema
- Add description field to Rust FieldSchema struct (Arc<str>) - Update Python FieldSchema to include description field - Extract Pydantic field descriptions during schema creation - Update JSON schema builder to include field descriptions - Add comprehensive tests for field description functionality Resolves cocoindex-io#1074
1 parent 0dc1a48 commit 99ef439

File tree

4 files changed

+115
-3
lines changed

4 files changed

+115
-3
lines changed

python/cocoindex/tests/test_convert.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,3 +1715,81 @@ class MixedStruct:
17151715
order = OrderPydantic(order_id="O1", name="item1", price=10.0)
17161716
mixed = MixedStruct(name="test", pydantic_order=order)
17171717
validate_full_roundtrip(mixed, MixedStruct)
1718+
1719+
1720+
@pytest.mark.skipif(not PYDANTIC_AVAILABLE, reason="Pydantic not available")
1721+
def test_pydantic_field_descriptions() -> None:
1722+
"""Test that Pydantic field descriptions are extracted and included in schema."""
1723+
from pydantic import BaseModel, Field
1724+
1725+
class UserWithDescriptions(BaseModel):
1726+
"""A user model with field descriptions."""
1727+
name: str = Field(description="The user's full name")
1728+
age: int = Field(description="The user's age in years", ge=0, le=150)
1729+
email: str = Field(description="The user's email address")
1730+
is_active: bool = Field(description="Whether the user account is active", default=True)
1731+
1732+
# Test that field descriptions are extracted
1733+
encoded_schema = dump_engine_object(UserWithDescriptions)
1734+
1735+
# Check that the schema contains field descriptions
1736+
assert "fields" in encoded_schema["type"]
1737+
fields = encoded_schema["type"]["fields"]
1738+
1739+
# Find fields by name and check descriptions
1740+
field_descriptions = {field["name"]: field.get("description") for field in fields}
1741+
1742+
assert field_descriptions["name"] == "The user's full name"
1743+
assert field_descriptions["age"] == "The user's age in years"
1744+
assert field_descriptions["email"] == "The user's email address"
1745+
assert field_descriptions["is_active"] == "Whether the user account is active"
1746+
1747+
1748+
@pytest.mark.skipif(not PYDANTIC_AVAILABLE, reason="Pydantic not available")
1749+
def test_pydantic_field_descriptions_without_field() -> None:
1750+
"""Test that Pydantic models without field descriptions work correctly."""
1751+
from pydantic import BaseModel
1752+
1753+
class UserWithoutDescriptions(BaseModel):
1754+
"""A user model without field descriptions."""
1755+
name: str
1756+
age: int
1757+
email: str
1758+
1759+
# Test that the schema works without descriptions
1760+
encoded_schema = dump_engine_object(UserWithoutDescriptions)
1761+
1762+
# Check that the schema contains fields but no descriptions
1763+
assert "fields" in encoded_schema["type"]
1764+
fields = encoded_schema["type"]["fields"]
1765+
1766+
# Verify no descriptions are present
1767+
for field in fields:
1768+
assert "description" not in field or field["description"] is None
1769+
1770+
1771+
@pytest.mark.skipif(not PYDANTIC_AVAILABLE, reason="Pydantic not available")
1772+
def test_pydantic_mixed_descriptions() -> None:
1773+
"""Test Pydantic model with some fields having descriptions and others not."""
1774+
from pydantic import BaseModel, Field
1775+
1776+
class MixedDescriptions(BaseModel):
1777+
"""A model with mixed field descriptions."""
1778+
name: str = Field(description="The name field")
1779+
age: int # No description
1780+
email: str = Field(description="The email field")
1781+
active: bool # No description
1782+
1783+
# Test that only fields with descriptions have them in the schema
1784+
encoded_schema = dump_engine_object(MixedDescriptions)
1785+
1786+
assert "fields" in encoded_schema["type"]
1787+
fields = encoded_schema["type"]["fields"]
1788+
1789+
# Find fields by name and check descriptions
1790+
field_descriptions = {field["name"]: field.get("description") for field in fields}
1791+
1792+
assert field_descriptions["name"] == "The name field"
1793+
assert field_descriptions["age"] is None
1794+
assert field_descriptions["email"] == "The email field"
1795+
assert field_descriptions["active"] is None

python/cocoindex/typing.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def _encode_struct_schema(
359359
) -> tuple[dict[str, Any], int | None]:
360360
fields = []
361361

362-
def add_field(name: str, analyzed_type: AnalyzedTypeInfo) -> None:
362+
def add_field(name: str, analyzed_type: AnalyzedTypeInfo, description: str | None = None) -> None:
363363
try:
364364
type_info = encode_enriched_type_info(analyzed_type)
365365
except ValueError as e:
@@ -369,6 +369,8 @@ def add_field(name: str, analyzed_type: AnalyzedTypeInfo) -> None:
369369
)
370370
raise
371371
type_info["name"] = name
372+
if description is not None:
373+
type_info["description"] = description
372374
fields.append(type_info)
373375

374376
def add_fields_from_struct(struct_type: type) -> None:
@@ -384,7 +386,9 @@ def add_fields_from_struct(struct_type: type) -> None:
384386
for name, field_info in struct_type.model_fields.items(): # type: ignore[attr-defined]
385387
# Get the annotation from the field info
386388
field_type = field_info.annotation
387-
add_field(name, analyze_type_info(field_type))
389+
# Extract description from Pydantic field info
390+
description = getattr(field_info, 'description', None)
391+
add_field(name, analyze_type_info(field_type), description)
388392
else:
389393
raise ValueError(f"Invalid Pydantic model: {struct_type}")
390394
else:
@@ -624,14 +628,21 @@ def encode(self) -> dict[str, Any]:
624628
class FieldSchema:
625629
name: str
626630
value_type: EnrichedValueType
631+
description: str | None = None
627632

628633
@staticmethod
629634
def decode(obj: dict[str, Any]) -> "FieldSchema":
630-
return FieldSchema(name=obj["name"], value_type=EnrichedValueType.decode(obj))
635+
return FieldSchema(
636+
name=obj["name"],
637+
value_type=EnrichedValueType.decode(obj),
638+
description=obj.get("description")
639+
)
631640

632641
def encode(self) -> dict[str, Any]:
633642
result = self.value_type.encode()
634643
result["name"] = self.name
644+
if self.description is not None:
645+
result["description"] = self.description
635646
return result
636647

637648

src/base/json_schema.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,10 @@ impl JsonSchemaBuilder {
219219
*instance_type = SingleOrVec::Vec(types);
220220
}
221221
}
222+
// Set field description if available
223+
if let Some(description) = &f.description {
224+
self.set_description(&mut schema, description, field_path.prepend(&f.name));
225+
}
222226
(f.name.to_string(), schema.into())
223227
})
224228
.collect(),

src/base/schema.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -325,20 +325,38 @@ pub struct FieldSchema<DataType = ValueType> {
325325

326326
#[serde(flatten)]
327327
pub value_type: EnrichedValueType<DataType>,
328+
329+
/// Optional description for the field.
330+
#[serde(default, skip_serializing_if = "Option::is_none")]
331+
pub description: Option<Arc<str>>,
328332
}
329333

330334
impl FieldSchema {
331335
pub fn new(name: impl ToString, value_type: EnrichedValueType) -> Self {
332336
Self {
333337
name: name.to_string(),
334338
value_type,
339+
description: None,
340+
}
341+
}
342+
343+
pub fn new_with_description(
344+
name: impl ToString,
345+
value_type: EnrichedValueType,
346+
description: Option<impl ToString>,
347+
) -> Self {
348+
Self {
349+
name: name.to_string(),
350+
value_type,
351+
description: description.map(|d| d.to_string().into()),
335352
}
336353
}
337354

338355
pub fn without_attrs(&self) -> Self {
339356
Self {
340357
name: self.name.clone(),
341358
value_type: self.value_type.without_attrs(),
359+
description: None,
342360
}
343361
}
344362
}
@@ -351,6 +369,7 @@ impl<DataType> FieldSchema<DataType> {
351369
Ok(Self {
352370
name: field.name.clone(),
353371
value_type: EnrichedValueType::from_alternative(&field.value_type)?,
372+
description: field.description.clone(),
354373
})
355374
}
356375
}

0 commit comments

Comments
 (0)