Skip to content

Commit daf37e0

Browse files
committed
pre-commit pass
1 parent d7db89c commit daf37e0

File tree

5 files changed

+75
-39
lines changed

5 files changed

+75
-39
lines changed

pyproject.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,11 @@ strict = true
8484
files = "python/cocoindex"
8585
exclude = "(\\.venv|site-packages)"
8686
disable_error_code = ["unused-ignore"]
87+
[[tool.mypy.overrides]]
88+
module = [
89+
"sentence_transformers",
90+
"torch",
91+
"colpali_engine",
92+
"PIL",
93+
]
94+
ignore_missing_imports = true

python/cocoindex/tests/test_convert.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,21 +1724,24 @@ def test_pydantic_field_descriptions() -> None:
17241724

17251725
class UserWithDescriptions(BaseModel):
17261726
"""A user model with field descriptions."""
1727+
17271728
name: str = Field(description="The user's full name")
17281729
age: int = Field(description="The user's age in years", ge=0, le=150)
17291730
email: str = Field(description="The user's email address")
1730-
is_active: bool = Field(description="Whether the user account is active", default=True)
1731+
is_active: bool = Field(
1732+
description="Whether the user account is active", default=True
1733+
)
17311734

17321735
# Test that field descriptions are extracted
17331736
encoded_schema = dump_engine_object(UserWithDescriptions)
1734-
1737+
17351738
# Check that the schema contains field descriptions
17361739
assert "fields" in encoded_schema["type"]
17371740
fields = encoded_schema["type"]["fields"]
1738-
1741+
17391742
# Find fields by name and check descriptions
17401743
field_descriptions = {field["name"]: field.get("description") for field in fields}
1741-
1744+
17421745
assert field_descriptions["name"] == "The user's full name"
17431746
assert field_descriptions["age"] == "The user's age in years"
17441747
assert field_descriptions["email"] == "The user's email address"
@@ -1752,17 +1755,18 @@ def test_pydantic_field_descriptions_without_field() -> None:
17521755

17531756
class UserWithoutDescriptions(BaseModel):
17541757
"""A user model without field descriptions."""
1758+
17551759
name: str
17561760
age: int
17571761
email: str
17581762

17591763
# Test that the schema works without descriptions
17601764
encoded_schema = dump_engine_object(UserWithoutDescriptions)
1761-
1765+
17621766
# Check that the schema contains fields but no descriptions
17631767
assert "fields" in encoded_schema["type"]
17641768
fields = encoded_schema["type"]["fields"]
1765-
1769+
17661770
# Verify no descriptions are present
17671771
for field in fields:
17681772
assert "description" not in field or field["description"] is None
@@ -1775,20 +1779,21 @@ def test_pydantic_mixed_descriptions() -> None:
17751779

17761780
class MixedDescriptions(BaseModel):
17771781
"""A model with mixed field descriptions."""
1782+
17781783
name: str = Field(description="The name field")
17791784
age: int # No description
17801785
email: str = Field(description="The email field")
17811786
active: bool # No description
17821787

17831788
# Test that only fields with descriptions have them in the schema
17841789
encoded_schema = dump_engine_object(MixedDescriptions)
1785-
1790+
17861791
assert "fields" in encoded_schema["type"]
17871792
fields = encoded_schema["type"]["fields"]
1788-
1793+
17891794
# Find fields by name and check descriptions
17901795
field_descriptions = {field["name"]: field.get("description") for field in fields}
1791-
1796+
17921797
assert field_descriptions["name"] == "The name field"
17931798
assert field_descriptions["age"] is None
17941799
assert field_descriptions["email"] == "The email field"

python/cocoindex/typing.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,9 @@ def _encode_struct_schema(
359359
) -> tuple[dict[str, Any], int | None]:
360360
fields = []
361361

362-
def add_field(name: str, analyzed_type: AnalyzedTypeInfo, description: str | None = None) -> None:
362+
def add_field(
363+
name: str, analyzed_type: AnalyzedTypeInfo, description: str | None = None
364+
) -> None:
363365
try:
364366
type_info = encode_enriched_type_info(analyzed_type)
365367
except ValueError as e:
@@ -387,7 +389,7 @@ def add_fields_from_struct(struct_type: type) -> None:
387389
# Get the annotation from the field info
388390
field_type = field_info.annotation
389391
# Extract description from Pydantic field info
390-
description = getattr(field_info, 'description', None)
392+
description = getattr(field_info, "description", None)
391393
add_field(name, analyze_type_info(field_type), description)
392394
else:
393395
raise ValueError(f"Invalid Pydantic model: {struct_type}")
@@ -633,9 +635,9 @@ class FieldSchema:
633635
@staticmethod
634636
def decode(obj: dict[str, Any]) -> "FieldSchema":
635637
return FieldSchema(
636-
name=obj["name"],
638+
name=obj["name"],
637639
value_type=EnrichedValueType.decode(obj),
638-
description=obj.get("description")
640+
description=obj.get("description"),
639641
)
640642

641643
def encode(self) -> dict[str, Any]:

src/base/json_schema.rs

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,14 @@ impl JsonSchemaBuilder {
4646
let mut fields: Vec<_> = field_path.iter().map(|f| f.as_str()).collect();
4747
fields.reverse();
4848
let field_path_str = fields.join(".");
49-
49+
5050
// Check if we already have a description for this field
51-
if let Some(existing_description) = self.extra_instructions_per_field.get(&field_path_str) {
51+
if let Some(existing_description) =
52+
self.extra_instructions_per_field.get(&field_path_str)
53+
{
5254
// Concatenate descriptions with newline separator
53-
let combined_description = format!("{}\n{}", existing_description, description.to_string());
55+
let combined_description =
56+
format!("{}\n{}", existing_description, description.to_string());
5457
self.extra_instructions_per_field
5558
.insert(field_path_str, combined_description);
5659
} else {
@@ -61,7 +64,8 @@ impl JsonSchemaBuilder {
6164
let metadata = schema.metadata.get_or_insert_default();
6265
if let Some(existing_description) = &metadata.description {
6366
// Concatenate descriptions with newline separator
64-
let combined_description = format!("{}\n{}", existing_description, description.to_string());
67+
let combined_description =
68+
format!("{}\n{}", existing_description, description.to_string());
6569
metadata.description = Some(combined_description);
6670
} else {
6771
metadata.description = Some(description.to_string());
@@ -378,25 +382,25 @@ pub fn build_json_schema(
378382
#[cfg(test)]
379383
mod tests {
380384
use super::*;
381-
use crate::base::schema::{BasicValueType, EnrichedValueType, FieldSchema, StructSchema, ValueType};
385+
use crate::base::schema::{
386+
BasicValueType, EnrichedValueType, FieldSchema, StructSchema, ValueType,
387+
};
382388
use std::sync::Arc;
383389

384390
#[test]
385391
fn test_description_concatenation() {
386392
// Create a struct with a field that has both field-level and type-level descriptions
387393
let struct_schema = StructSchema {
388394
description: Some(Arc::from("Test struct description")),
389-
fields: Arc::new(vec![
390-
FieldSchema {
391-
name: "uuid_field".to_string(),
392-
value_type: EnrichedValueType {
393-
typ: ValueType::Basic(BasicValueType::Uuid),
394-
nullable: false,
395-
attrs: Default::default(),
396-
},
397-
description: Some(Arc::from("This is a field-level description for UUID")),
395+
fields: Arc::new(vec![FieldSchema {
396+
name: "uuid_field".to_string(),
397+
value_type: EnrichedValueType {
398+
typ: ValueType::Basic(BasicValueType::Uuid),
399+
nullable: false,
400+
attrs: Default::default(),
398401
},
399-
]),
402+
description: Some(Arc::from("This is a field-level description for UUID")),
403+
}]),
400404
};
401405

402406
let enriched_value_type = EnrichedValueType {
@@ -413,21 +417,35 @@ mod tests {
413417
};
414418

415419
let result = build_json_schema(enriched_value_type, options).unwrap();
416-
420+
417421
// Check if the description contains both field and type descriptions
418422
if let Some(properties) = &result.schema.object {
419423
if let Some(uuid_field_schema) = properties.properties.get("uuid_field") {
420424
if let Schema::Object(schema_object) = uuid_field_schema {
421-
if let Some(description) = &schema_object.metadata.as_ref().and_then(|m| m.description.as_ref()) {
425+
if let Some(description) = &schema_object
426+
.metadata
427+
.as_ref()
428+
.and_then(|m| m.description.as_ref())
429+
{
422430
// Check if both descriptions are present
423-
assert!(description.contains("This is a field-level description for UUID"),
424-
"Field-level description not found in: {}", description);
425-
assert!(description.contains("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000"),
426-
"Type-level description not found in: {}", description);
427-
431+
assert!(
432+
description.contains("This is a field-level description for UUID"),
433+
"Field-level description not found in: {}",
434+
description
435+
);
436+
assert!(
437+
description
438+
.contains("A UUID, e.g. 123e4567-e89b-12d3-a456-426614174000"),
439+
"Type-level description not found in: {}",
440+
description
441+
);
442+
428443
// Check that they are separated by a newline
429-
assert!(description.contains("\n"),
430-
"Descriptions should be separated by newline: {}", description);
444+
assert!(
445+
description.contains("\n"),
446+
"Descriptions should be separated by newline: {}",
447+
description
448+
);
431449
} else {
432450
panic!("No description found in the schema");
433451
}

src/builder/flow_builder.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -364,8 +364,11 @@ impl FlowBuilder {
364364
.into_py_result()?;
365365
}
366366
let result = Self::last_field_to_data_slice(&self.root_op_scope).into_py_result()?;
367-
self.direct_input_fields
368-
.push(FieldSchema { name, value_type, description: None });
367+
self.direct_input_fields.push(FieldSchema {
368+
name,
369+
value_type,
370+
description: None,
371+
});
369372
Ok(result)
370373
}
371374

0 commit comments

Comments
 (0)