11import json
22import logging
3- from copy import deepcopy
4- from typing import Any , Dict , Type
5-
6- import json_repair
73import litellm
84import pydantic
9- from pydantic import create_model
5+ import json_repair
6+
7+ from typing import Any , Dict , Type , get_origin
108from pydantic .fields import FieldInfo
119
12- from dspy .adapters . chat_adapter import ChatAdapter , FieldInfoWithName
10+ from dspy .clients . lm import LM
1311from dspy .adapters .utils import (
1412 format_field_value ,
1513 get_annotation_name ,
1614 parse_value ,
1715 serialize_for_json ,
1816 translate_field_type ,
1917)
20- from dspy .clients .lm import LM
2118from dspy .signatures .signature import Signature , SignatureMeta
19+ from dspy .adapters .chat_adapter import ChatAdapter , FieldInfoWithName
2220
2321logger = logging .getLogger (__name__ )
2422
2523
24+ def _has_open_ended_mapping (signature : SignatureMeta ) -> bool :
25+ """
26+ Check whether any output field in the signature has an open-ended mapping type,
27+ such as dict[str, Any]. Structured Outputs require explicit properties, so such fields
28+ are incompatible.
29+ """
30+ for name , field in signature .output_fields .items ():
31+ annotation = field .annotation
32+ if get_origin (annotation ) is dict :
33+ return True
34+ return False
35+
36+
2637class JSONAdapter (ChatAdapter ):
2738 def __call__ (
2839 self ,
@@ -35,14 +46,20 @@ def __call__(
3546 provider = lm .model .split ("/" , 1 )[0 ] or "openai"
3647 params = litellm .get_supported_openai_params (model = lm .model , custom_llm_provider = provider )
3748
38- # If response_format is not supported, use basic call
49+ # If response_format is not supported, use basic call.
3950 if not params or "response_format" not in params :
4051 return super ().__call__ (lm , lm_kwargs , signature , demos , inputs )
4152
42- # Try structured output first, fall back to basic json if it fails
53+ # Check early for open-ended mapping types before trying structured outputs.
54+ if _has_open_ended_mapping (signature ):
55+ lm_kwargs ["response_format" ] = {"type" : "json_object" }
56+ return super ().__call__ (lm , lm_kwargs , signature , demos , inputs )
57+
58+ # Try structured output first, fall back to basic JSON if it fails.
4359 try :
44- structured_output_format = self ._get_structured_outputs_response_format (signature )
45- lm_kwargs ["response_format" ] = structured_output_format
60+ structured_output_model = _get_structured_outputs_response_format (signature )
61+ print (structured_output_model .schema_json (indent = 2 ))
62+ lm_kwargs ["response_format" ] = structured_output_model
4663 return super ().__call__ (lm , lm_kwargs , signature , demos , inputs )
4764 except Exception as e :
4865 logger .warning (f"Failed to use structured output format. Falling back to JSON mode. Error: { e } " )
@@ -102,7 +119,7 @@ def parse(self, signature: Type[Signature], completion: str) -> dict[str, Any]:
102119 fields = json_repair .loads (completion )
103120 fields = {k : v for k , v in fields .items () if k in signature .output_fields }
104121
105- # attempt to cast each value to type signature.output_fields[k].annotation
122+ # Attempt to cast each value to type signature.output_fields[k].annotation.
106123 for k , v in fields .items ():
107124 if k in signature .output_fields :
108125 fields [k ] = parse_value (v , signature .output_fields [k ].annotation )
@@ -116,12 +133,12 @@ def format_field_with_value(self, fields_with_values: Dict[FieldInfoWithName, An
116133 """
117134 Formats the values of the specified fields according to the field's DSPy type (input or output),
118135 annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values
119- into a single string, which is is a multiline string if there are multiple fields.
136+ into a single string, which is a multiline string if there are multiple fields.
120137
121138 Args:
122- fields_with_values: A dictionary mapping information about a field to its corresponding value.
139+ fields_with_values: A dictionary mapping information about a field to its corresponding value.
123140 Returns:
124- The joined formatted values of the fields, represented as a string
141+ The joined formatted values of the fields, represented as a string.
125142 """
126143 if role == "user" :
127144 output = []
@@ -140,49 +157,73 @@ def format_finetune_data(
140157 # TODO: implement format_finetune_data method in JSONAdapter
141158 raise NotImplementedError
142159
143- def _get_structured_outputs_response_format (self , signature : SignatureMeta ) -> pydantic .BaseModel :
144- """
145- Obtains the LiteLLM / OpenAI `response_format` parameter for generating structured outputs from
146- an LM request, based on the output fields of the specified DSPy signature.
147160
148- Args:
149- signature: The DSPy signature for which to obtain the `response_format` request parameter.
150- Returns:
151- A Pydantic model representing the `response_format` parameter for the LM request.
152- """
161+ def _get_structured_outputs_response_format (signature : SignatureMeta ) -> type [pydantic .BaseModel ]:
162+ """
163+ Builds a Pydantic model from a DSPy signature's output_fields and ensures the generated JSON schema
164+ is compatible with OpenAI Structured Outputs (all objects have a "required" key listing every property,
165+ and additionalProperties is always false).
166+
167+ IMPORTANT: If any field's annotation is an open-ended mapping (e.g. dict[str, Any]), then a structured
168+ schema cannot be generated since all properties must be explicitly declared. In that case, an exception
169+ is raised so that the caller can fall back to using a plain "json_object" response_format.
170+ """
171+ # Although we've already performed an early check, we keep this here as a final guard.
172+ for name , field in signature .output_fields .items ():
173+ annotation = field .annotation
174+ if get_origin (annotation ) is dict :
175+ raise ValueError (
176+ f"Field '{ name } ' has an open-ended mapping type which is not supported by Structured Outputs."
177+ )
153178
154- def filter_json_schema_extra (field_name : str , field_info : FieldInfo ) -> FieldInfo :
155- """
156- Recursively filter the `json_schema_extra` of a FieldInfo to exclude DSPy internal attributes
157- (e.g. `__dspy_field_type`) and remove descriptions that are placeholders for the field name.
158- """
159- field_copy = deepcopy (field_info ) # Make a copy to avoid mutating the original
160-
161- # Update `json_schema_extra` for the copied field
162- if field_copy .json_schema_extra :
163- field_copy .json_schema_extra = {
164- key : value
165- for key , value in field_info .json_schema_extra .items ()
166- if key not in ("desc" , "__dspy_field_type" )
167- }
168- field_desc = field_info .json_schema_extra .get ("desc" )
169- if field_desc is not None and field_desc != f"${{{ field_name } }}" :
170- field_copy .json_schema_extra ["desc" ] = field_desc
171-
172- # Handle nested fields
173- if hasattr (field_copy .annotation , "__pydantic_model__" ):
174- # Recursively update fields of the nested model
175- nested_model = field_copy .annotation .__pydantic_model__
176- updated_fields = {
177- key : filter_json_schema_extra (key , value ) for key , value in nested_model .__fields__ .items ()
178- }
179- # Create a new model with the same name and updated fields
180- field_copy .annotation = create_model (nested_model .__name__ , ** updated_fields )
181-
182- return field_copy
183-
184- output_pydantic_fields = {
185- key : (value .annotation , filter_json_schema_extra (key , value ))
186- for key , value in signature .output_fields .items ()
187- }
188- return create_model ("DSPyProgramOutputs" , ** output_pydantic_fields )
179+ fields = {}
180+ for name , field in signature .output_fields .items ():
181+ annotation = field .annotation
182+ default = field .default if hasattr (field , "default" ) else ...
183+ fields [name ] = (annotation , default )
184+
185+ # Build the model with extra fields forbidden.
186+ Model = pydantic .create_model ("DSPyProgramOutputs" , ** fields , __config__ = type ("Config" , (), {"extra" : "forbid" }))
187+
188+ # Generate the initial schema.
189+ schema = Model .schema ()
190+
191+ # Remove any DSPy-specific metadata.
192+ for prop in schema .get ("properties" , {}).values ():
193+ prop .pop ("json_schema_extra" , None )
194+
195+ def enforce_required (schema_part : dict ):
196+ """
197+ Recursively ensure that:
198+ - for any object schema, a "required" key is added with all property names (or [] if no properties)
199+ - additionalProperties is set to False regardless of the previous value.
200+ - the same enforcement is run for nested arrays and definitions.
201+ """
202+ if schema_part .get ("type" ) == "object" :
203+ props = schema_part .get ("properties" )
204+ if props is not None :
205+ # For objects with explicitly declared properties:
206+ schema_part ["required" ] = list (props .keys ())
207+ schema_part ["additionalProperties" ] = False
208+ for sub_schema in props .values ():
209+ if isinstance (sub_schema , dict ):
210+ enforce_required (sub_schema )
211+ else :
212+ # For objects with no properties (should not happen normally but a fallback).
213+ schema_part ["properties" ] = {}
214+ schema_part ["required" ] = []
215+ schema_part ["additionalProperties" ] = False
216+ if schema_part .get ("type" ) == "array" and isinstance (schema_part .get ("items" ), dict ):
217+ enforce_required (schema_part ["items" ])
218+ # Also enforce in any nested definitions / $defs.
219+ for key in ("$defs" , "definitions" ):
220+ if key in schema_part :
221+ for def_schema in schema_part [key ].values ():
222+ enforce_required (def_schema )
223+
224+ enforce_required (schema )
225+
226+ # Override the model's JSON schema generation to return our precomputed schema.
227+ Model .model_json_schema = lambda * args , ** kwargs : schema
228+
229+ return Model
0 commit comments