Skip to content

Commit 5871b37

Browse files
committed
UPDATE
1 parent c59e207 commit 5871b37

File tree

9 files changed

+108
-67
lines changed

9 files changed

+108
-67
lines changed

examples/job_description.txt

Whitespace-only changes.

examples/news_article.txt

Whitespace-only changes.

examples/news_prompt.txt

Whitespace-only changes.

examples/news_schema.json

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{
2+
"type": "object",
3+
"properties": {
4+
"headline": {
5+
"type": "string",
6+
"description": "Main headline of the news article"
7+
},
8+
"summary": {
9+
"type": "string",
10+
"description": "Brief summary of the article"
11+
},
12+
"publication_date": {
13+
"type": ["string", "null"],
14+
"description": "Publication date if mentioned"
15+
},
16+
"author": {
17+
"type": ["string", "null"],
18+
"description": "Author name if mentioned"
19+
},
20+
"location": {
21+
"type": ["string", "null"],
22+
"description": "Geographic location mentioned in the news"
23+
},
24+
"key_people": {
25+
"type": "array",
26+
"items": {
27+
"type": "string"
28+
},
29+
"description": "Names of key people mentioned in the article"
30+
},
31+
"organizations": {
32+
"type": "array",
33+
"items": {
34+
"type": "string"
35+
},
36+
"description": "Organizations or companies mentioned"
37+
},
38+
"category": {
39+
"type": ["string", "null"],
40+
"description": "News category (politics, technology, sports, etc.)"
41+
},
42+
"sentiment": {
43+
"type": ["string", "null"],
44+
"description": "Overall sentiment of the article (positive, negative, neutral)"
45+
}
46+
},
47+
"required": ["headline", "summary", "key_people", "organizations"],
48+
"additionalProperties": false
49+
}

examples/recipe.txt

Whitespace-only changes.

src/structured_output_cookbook/extractor.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ def extract(
4444
# Use custom prompt or schema default
4545
prompt = system_prompt or schema.get_extraction_prompt()
4646

47+
# Generate schema and ensure additionalProperties is false
48+
schema_dict = schema.model_json_schema()
49+
self._ensure_additional_properties_false(schema_dict)
50+
4751
response = self.client.chat.completions.create(
4852
model=self.config.openai_model,
4953
messages=[
@@ -53,9 +57,9 @@ def extract(
5357
response_format={
5458
"type": "json_schema",
5559
"json_schema": {
56-
"name": schema.get_schema_name().lower(),
60+
"name": schema.get_schema_name().lower().replace(" ", "_"),
5761
"strict": True,
58-
"schema": schema.model_json_schema()
62+
"schema": schema_dict
5963
}
6064
},
6165
timeout=self.config.timeout_seconds
@@ -87,6 +91,24 @@ def extract(
8791
self.logger.error(f"Extraction failed: {e}")
8892
return ExtractionResult.error_result(str(e))
8993

94+
def _ensure_additional_properties_false(self, schema_dict: Dict[str, Any]) -> None:
95+
"""Recursively ensure all objects have additionalProperties: false."""
96+
if isinstance(schema_dict, dict):
97+
if schema_dict.get("type") == "object":
98+
schema_dict["additionalProperties"] = False
99+
100+
# Recursively process nested schemas
101+
for key, value in schema_dict.items():
102+
if key in ["properties", "items", "anyOf", "allOf", "oneOf"]:
103+
if isinstance(value, dict):
104+
self._ensure_additional_properties_false(value)
105+
elif isinstance(value, list):
106+
for item in value:
107+
if isinstance(item, dict):
108+
self._ensure_additional_properties_false(item)
109+
elif isinstance(value, dict):
110+
self._ensure_additional_properties_false(value)
111+
90112
def extract_with_custom_schema(
91113
self,
92114
text: str,
@@ -107,6 +129,9 @@ def extract_with_custom_schema(
107129
try:
108130
self.logger.info("Starting extraction with custom schema")
109131

132+
# Ensure custom schema has additionalProperties: false
133+
self._ensure_additional_properties_false(schema_dict)
134+
110135
response = self.client.chat.completions.create(
111136
model=self.config.openai_model,
112137
messages=[

src/structured_output_cookbook/schemas/base.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,19 @@
33
from abc import ABC, abstractmethod
44
from typing import Any, Dict, Type
55

6-
from pydantic import BaseModel
6+
from pydantic import BaseModel, ConfigDict
77

88

99
class BaseSchema(BaseModel, ABC):
1010
"""Abstract base class for all extraction schemas."""
1111

12+
model_config = ConfigDict(
13+
extra="forbid", # This generates additionalProperties: false
14+
validate_assignment=True,
15+
str_strip_whitespace=True
16+
)
17+
"""Abstract base class for all extraction schemas."""
18+
1219
@classmethod
1320
@abstractmethod
1421
def get_extraction_prompt(cls) -> str:

src/structured_output_cookbook/templates/job_description.py

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Job description extraction schema."""
22

3-
from typing import List, Optional
3+
from typing import List, Union
44
from pydantic import Field
55
from ..schemas.base import BaseSchema
66

@@ -10,41 +10,32 @@ class JobDescriptionSchema(BaseSchema):
1010

1111
title: str = Field(description="Job title or position name")
1212
company: str = Field(description="Company name")
13-
location: Optional[str] = Field(default=None, description="Job location")
14-
employment_type: Optional[str] = Field(
15-
default=None,
13+
location: Union[str, None] = Field(description="Job location")
14+
employment_type: Union[str, None] = Field(
1615
description="Employment type (full-time, part-time, contract, etc.)"
1716
)
18-
experience_level: Optional[str] = Field(
19-
default=None,
17+
experience_level: Union[str, None] = Field(
2018
description="Required experience level (entry, mid, senior, etc.)"
2119
)
22-
salary_range: Optional[str] = Field(
23-
default=None,
20+
salary_range: Union[str, None] = Field(
2421
description="Salary range or compensation information"
2522
)
2623
required_skills: List[str] = Field(
27-
default_factory=list,
2824
description="Required technical skills and technologies"
2925
)
3026
preferred_skills: List[str] = Field(
31-
default_factory=list,
3227
description="Preferred or nice-to-have skills"
3328
)
3429
responsibilities: List[str] = Field(
35-
default_factory=list,
3630
description="Key job responsibilities and duties"
3731
)
3832
requirements: List[str] = Field(
39-
default_factory=list,
4033
description="Job requirements and qualifications"
4134
)
4235
benefits: List[str] = Field(
43-
default_factory=list,
4436
description="Benefits and perks offered"
4537
)
46-
remote_work: Optional[bool] = Field(
47-
default=None,
38+
remote_work: Union[bool, None] = Field(
4839
description="Whether remote work is available"
4940
)
5041

src/structured_output_cookbook/templates/recipe.py

Lines changed: 18 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,36 @@
11
"""Recipe extraction schema."""
22

3-
from typing import List, Optional
4-
from pydantic import BaseModel, Field
3+
from typing import List, Union, Dict, Any
4+
from pydantic import BaseModel, Field, ConfigDict
55
from ..schemas.base import BaseSchema
66

77

88
class Ingredient(BaseModel):
99
"""Single ingredient with quantity and unit."""
1010

11+
model_config = ConfigDict(extra="forbid")
12+
1113
name: str = Field(description="Ingredient name")
12-
quantity: Optional[str] = Field(default=None, description="Amount needed")
13-
unit: Optional[str] = Field(default=None, description="Unit of measurement")
14-
notes: Optional[str] = Field(default=None, description="Additional notes")
14+
quantity: Union[str, None] = Field(description="Amount needed")
15+
unit: Union[str, None] = Field(description="Unit of measurement")
16+
notes: Union[str, None] = Field(description="Additional notes")
1517

1618

1719
class RecipeSchema(BaseSchema):
1820
"""Extract structured information from recipes."""
1921

2022
name: str = Field(description="Recipe name or title")
21-
description: Optional[str] = Field(
22-
default=None,
23-
description="Brief description of the dish"
24-
)
25-
cuisine: Optional[str] = Field(
26-
default=None,
27-
description="Cuisine type (Italian, Asian, etc.)"
28-
)
29-
difficulty: Optional[str] = Field(
30-
default=None,
31-
description="Difficulty level (easy, medium, hard)"
32-
)
33-
prep_time: Optional[str] = Field(
34-
default=None,
35-
description="Preparation time"
36-
)
37-
cook_time: Optional[str] = Field(
38-
default=None,
39-
description="Cooking time"
40-
)
41-
total_time: Optional[str] = Field(
42-
default=None,
43-
description="Total time required"
44-
)
45-
servings: Optional[int] = Field(
46-
default=None,
47-
description="Number of servings"
48-
)
49-
ingredients: List[Ingredient] = Field(
50-
default_factory=list,
51-
description="List of ingredients with quantities"
52-
)
53-
instructions: List[str] = Field(
54-
default_factory=list,
55-
description="Step-by-step cooking instructions"
56-
)
57-
tags: List[str] = Field(
58-
default_factory=list,
59-
description="Recipe tags (vegetarian, gluten-free, etc.)"
60-
)
61-
nutrition: Optional[dict] = Field(
62-
default=None,
63-
description="Nutritional information if available"
64-
)
23+
description: Union[str, None] = Field(description="Brief description of the dish")
24+
cuisine: Union[str, None] = Field(description="Cuisine type (Italian, Asian, etc.)")
25+
difficulty: Union[str, None] = Field(description="Difficulty level (easy, medium, hard)")
26+
prep_time: Union[str, None] = Field(description="Preparation time")
27+
cook_time: Union[str, None] = Field(description="Cooking time")
28+
total_time: Union[str, None] = Field(description="Total time required")
29+
servings: Union[int, None] = Field(description="Number of servings")
30+
ingredients: List[Ingredient] = Field(description="List of ingredients with quantities")
31+
instructions: List[str] = Field(description="Step-by-step cooking instructions")
32+
tags: List[str] = Field(description="Recipe tags (vegetarian, gluten-free, etc.)")
33+
nutrition: Union[Dict[str, Any], None] = Field(description="Nutritional information if available")
6534

6635
@classmethod
6736
def get_extraction_prompt(cls) -> str:

0 commit comments

Comments
 (0)