Skip to content

Conversation

@Davda-James
Copy link
Contributor

Description

Now cocoindex supports the binding classes on pydantic model to cocoindex struct as well as updated docs according to the new feature

Closes Issue: #795

Tests passed
image

Also written file (not pushed) to test pydantic models working (results are below along with code)
Code:

"""
Example demonstrating Pydantic model support in CocoIndex.

This example shows how Pydantic models can be used as struct types
in CocoIndex, alongside traditional dataclasses and NamedTuples.
"""

import sys
import os
from pathlib import Path

# Add the python directory to path so we can import cocoindex
project_root = Path(__file__).parent.parent
python_dir = project_root / "python"
sys.path.insert(0, str(python_dir))

try:
    from pydantic import BaseModel
    PYDANTIC_AVAILABLE = True
except ImportError:
    print("Pydantic not available - install it with: pip install pydantic")
    PYDANTIC_AVAILABLE = False
    sys.exit(1)

from dataclasses import dataclass
from typing import NamedTuple
import datetime

# Import CocoIndex functions
import cocoindex.typing as ci_typing
import cocoindex.convert as ci_convert


# Define the same struct using different approaches
@dataclass
class PersonDataclass:
    name: str
    age: int
    score: float


class PersonNamedTuple(NamedTuple):
    name: str
    age: int
    score: float


class PersonPydantic(BaseModel):
    name: str
    age: int
    score: float


def demonstrate_pydantic_support():
    """Demonstrate that Pydantic models work with CocoIndex type system."""
    
    print("=== CocoIndex Pydantic Support Demo ===\n")
    
    # Test detection functions
    print("1. Type Detection:")
    print(f"   is_struct_type(PersonDataclass): {ci_typing.is_struct_type(PersonDataclass)}")
    print(f"   is_struct_type(PersonNamedTuple): {ci_typing.is_struct_type(PersonNamedTuple)}")
    print(f"   is_struct_type(PersonPydantic): {ci_typing.is_struct_type(PersonPydantic)}")
    print(f"   is_pydantic_model(PersonPydantic): {ci_typing.is_pydantic_model(PersonPydantic)}")
    print()
    
    # Test schema encoding
    print("2. Schema Encoding:")
    dataclass_schema = ci_typing._encode_struct_schema(PersonDataclass)
    namedtuple_schema = ci_typing._encode_struct_schema(PersonNamedTuple)
    pydantic_schema = ci_typing._encode_struct_schema(PersonPydantic)
    
    print(f"   Dataclass schema: {dataclass_schema}")
    print(f"   NamedTuple schema: {namedtuple_schema}")
    print(f"   Pydantic schema: {pydantic_schema}")
    print(f"   All schemas equal: {dataclass_schema == namedtuple_schema == pydantic_schema}")
    print()
    
    # Test value encoding
    print("3. Value Encoding:")
    
    # Create instances
    person_dc = PersonDataclass(name="Alice", age=30, score=95.5)
    person_nt = PersonNamedTuple(name="Alice", age=30, score=95.5)
    person_py = PersonPydantic(name="Alice", age=30, score=95.5)
    
    # Create encoders using analyze_type_info
    dc_encoder = ci_convert.make_engine_value_encoder(ci_typing.analyze_type_info(PersonDataclass))
    nt_encoder = ci_convert.make_engine_value_encoder(ci_typing.analyze_type_info(PersonNamedTuple))
    py_encoder = ci_convert.make_engine_value_encoder(ci_typing.analyze_type_info(PersonPydantic))
    
    # Encode values
    dc_encoded = dc_encoder(person_dc)
    nt_encoded = nt_encoder(person_nt)
    py_encoded = py_encoder(person_py)
    
    print(f"   Dataclass encoded: {dc_encoded}")
    print(f"   NamedTuple encoded: {nt_encoded}")
    print(f"   Pydantic encoded: {py_encoded}")
    print(f"   All encodings equal: {dc_encoded == nt_encoded == py_encoded}")
    print()
    
    # Test that the encoded values are lists with the expected values
    print("4. Encoded Values Verification:")
    print(f"   Expected: ['Alice', 30, 95.5]")
    print(f"   All match expected: {dc_encoded == ['Alice', 30, 95.5]}")
    print()
    
    # Test Pydantic model access
    print("5. Pydantic Model Features:")
    print(f"   person_py.name: {person_py.name}")
    print(f"   person_py.age: {person_py.age}")
    print(f"   person_py.score: {person_py.score}")
    print(f"   person_py.model_dump(): {person_py.model_dump()}")
    print()
    
    print("All Pydantic support tests passed!")
    print()
    print("Summary:")
    print("- Pydantic models are detected as struct types")
    print("- Schema encoding works correctly") 
    print("- Value encoding produces expected results")
    print("- Pydantic models work alongside dataclasses and NamedTuples")
    print()
    
    # Test with Pydantic-specific features
    print("6. Pydantic-Specific Features:")
    
    # Test validation
    try:
        invalid_person = PersonPydantic(name="Bob", age="not_a_number", score=85.0)
        print(f"   Created invalid person: {invalid_person}")
    except Exception as e:
        print(f"   Pydantic validation caught error: {type(e).__name__}: {e}")
    
    # Test model serialization
    person_dict = person_py.model_dump()
    print(f"   Pydantic model_dump(): {person_dict}")
    
    # Test model creation from dict
    person_from_dict = PersonPydantic.model_validate(person_dict)
    print(f"   Pydantic from dict: {person_from_dict}")
    print()
    
    print("=== Demo Complete ===")


if __name__ == "__main__":
    demonstrate_pydantic_support()

Output:

=== CocoIndex Pydantic Support Demo ===

1. Type Detection:
   is_struct_type(PersonDataclass): True
   is_struct_type(PersonNamedTuple): True
   is_struct_type(PersonPydantic): True
   is_pydantic_model(PersonPydantic): True

2. Schema Encoding:
   Dataclass schema: ({'fields': [{'type': {'kind': 'Str'}, 'name': 'name'}, {'type': {'kind': 'Int64'}, 'name': 'age'}, {'type': {'kind': 'Float64'}, 'name': 'score'}], 'description': 'PersonDataclass(name: str, age: int, score: float)'}, None)
   NamedTuple schema: ({'fields': [{'type': {'kind': 'Str'}, 'name': 'name'}, {'type': {'kind': 'Int64'}, 'name': 'age'}, {'type': {'kind': 'Float64'}, 'name': 'score'}], 'description': 'PersonNamedTuple(name, age, score)'}, None)
   Pydantic schema: ({'fields': [{'type': {'kind': 'Str'}, 'name': 'name'}, {'type': {'kind': 'Int64'}, 'name': 'age'}, {'type': {'kind': 'Float64'}, 'name': 'score'}], 'description': '!!! abstract "Usage Documentation"\n    [Models](../concepts/models.md)\n\nA base class for creating Pydantic models.\n\nAttributes:\n    __class_vars__: The names of the class variables defined on the model.\n    __private_attributes__: Metadata about the private attributes of the model.\n    __signature__: The synthesized `__init__` [`Signature`][inspect.Signature] of the model.\n\n    __pydantic_complete__: Whether model building is completed, or if there are still undefined fields.\n    __pydantic_core_schema__: The core schema of the model.\n    __pydantic_custom_init__: Whether the model has a custom `__init__` function.\n    __pydantic_decorators__: Metadata containing the decorators defined on the model.\n        This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.\n    __pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to\n        __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.\n    __pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.\n    __pydantic_post_init__: The name of the post-init method for the model, if defined.\n    __pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].\n    __pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.\n    __pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.\n\n    __pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.\n    __pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.\n\n    __pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]\n        is set to `\'allow\'`.\n    __pydantic_fields_set__: The names of fields explicitly set during instantiation.\n    __pydantic_private__: Values of private attributes set on the model instance.'}, None)       
   All schemas equal: False

3. Value Encoding:
   Dataclass encoded: ['Alice', 30, 95.5]
   NamedTuple encoded: ['Alice', 30, 95.5]
   Pydantic encoded: ['Alice', 30, 95.5]
   All encodings equal: True

4. Encoded Values Verification:
   Expected: ['Alice', 30, 95.5]
   All match expected: True

5. Pydantic Model Features:
   person_py.name: Alice
   person_py.age: 30
   person_py.score: 95.5
   person_py.model_dump(): {'name': 'Alice', 'age': 30, 'score': 95.5}

All Pydantic support tests passed!

Summary:
- Pydantic models are detected as struct types
- Schema encoding works correctly
- Value encoding produces expected results
- Pydantic models work alongside dataclasses and NamedTuples

6. Pydantic-Specific Features:
   Pydantic validation caught error: ValidationError: 1 validation error for PersonPydantic
age
  Input should be a valid integer, unable to parse string as an integer [type=int_parsing, input_value='not_a_number', input_type=str]
    For further information visit https://errors.pydantic.dev/2.11/v/int_parsing
   Pydantic model_dump(): {'name': 'Alice', 'age': 30, 'score': 95.5}
   Pydantic from dict: name='Alice' age=30 score=95.5

=== Demo Complete ===

Copy link
Member

@georgeh0 georgeh0 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

very high quality implementation! thanks!

@georgeh0 georgeh0 merged commit d309076 into cocoindex-io:main Oct 1, 2025
9 checks passed
@badmonster0
Copy link
Member

hi @Davda-James latest release note out and we made a section for you https://cocoindex.io/blogs/cocoindex-changelog-2025-10-19#davda-james, thanks for your contribution!

@Davda-James
Copy link
Contributor Author

Thanks @badmonster0 @georgeh0

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants