22import json
33import re
44import textwrap
5- from typing import get_args , get_origin
5+ from typing import Any , Dict , KeysView , List , Literal , NamedTuple , get_args , get_origin
66
77import pydantic
88from pydantic import TypeAdapter
9+ from pydantic .fields import FieldInfo
910
11+ from ..signatures .field import OutputField
12+ from ..signatures .signature import SignatureMeta
13+ from ..signatures .utils import get_dspy_field_type
1014from .base import Adapter
1115
1216field_header_pattern = re .compile (r"\[\[ ## (\w+) ## \]\]" )
1317
1418
19+ class FieldInfoWithName (NamedTuple ):
20+ """
21+ A tuple containing a field name and its corresponding FieldInfo object.
22+ """
23+
24+ name : str
25+ info : FieldInfo
26+
27+
28+ # Built-in field indicating that a chat turn (i.e. a user or assistant reply to a chat
29+ # thread) has been completed.
30+ BuiltInCompletedOutputFieldInfo = FieldInfoWithName (name = "completed" , info = OutputField ())
31+
32+
1533class ChatAdapter (Adapter ):
1634 def __init__ (self ):
1735 pass
@@ -79,29 +97,68 @@ def format_blob(blob):
7997 return f"«««\n { modified_blob } \n »»»"
8098
8199
82- def format_list (items ):
83- if len (items ) == 0 :
100+ def format_input_list_field_value (value : List [Any ]) -> str :
101+ """
102+ Formats the value of an input field of type List[Any].
103+
104+ Args:
105+ value: The value of the list-type input field.
106+ Returns:
107+ A string representation of the input field's list value.
108+ """
109+ if len (value ) == 0 :
84110 return "N/A"
85- if len (items ) == 1 :
86- return format_blob (items [0 ])
111+ if len (value ) == 1 :
112+ return format_blob (value [0 ])
87113
88- return "\n " .join ([f"[{ idx + 1 } ] { format_blob (txt )} " for idx , txt in enumerate (items )])
114+ return "\n " .join ([f"[{ idx + 1 } ] { format_blob (txt )} " for idx , txt in enumerate (value )])
89115
90116
91- def _format_field_value (value ) -> str :
117+ def _format_field_value (field_info : FieldInfo , value : Any ) -> str :
118+ """
119+ Formats the value of the specified field according to the field's DSPy type (input or output),
120+ annotation (e.g. str, int, etc.), and the type of the value itself.
121+
122+ Args:
123+ field_info: Information about the field, including its DSPy field type and annotation.
124+ value: The value of the field.
125+ Returns:
126+ The formatted value of the field, represented as a string.
127+ """
128+ dspy_field_type : Literal ["input" , "output" ] = get_dspy_field_type (field_info )
92129 if isinstance (value , list ):
93- return format_list (value )
130+ if dspy_field_type == "input" or field_info .annotation is str :
131+ # If the field is an input field or has no special type requirements, format it as
132+ # numbered list so that it's organized in a way suitable for presenting long context
133+ # to an LLM (i.e. not JSON)
134+ return format_input_list_field_value (value )
135+ else :
136+ # If the field is an output field that has strict parsing requirements, format the
137+ # value as a stringified JSON Array. This ensures that downstream routines can parse
138+ # the field value correctly using methods from the `ujson` or `json` packages.
139+ return json .dumps (value )
94140 elif isinstance (value , pydantic .BaseModel ):
95141 return value .model_dump_json ()
96142 else :
97143 return str (value )
98144
99145
100- def format_fields (fields ):
146+ def format_fields (fields_with_values : Dict [FieldInfoWithName , Any ]) -> str :
147+ """
148+ Formats the values of the specified fields according to the field's DSPy type (input or output),
149+ annotation (e.g. str, int, etc.), and the type of the value itself. Joins the formatted values
150+ into a single string, which is is a multiline string if there are multiple fields.
151+
152+ Args:
153+ fields_with_values: A dictionary mapping information about a field to its corresponding
154+ value.
155+ Returns:
156+ The joined formatted values of the fields, represented as a string.
157+ """
101158 output = []
102- for k , v in fields .items ():
103- v = _format_field_value (v )
104- output .append (f"[[ ## { k } ## ]]\n { v } " )
159+ for field , field_value in fields_with_values .items ():
160+ formatted_field_value = _format_field_value (field_info = field . info , value = field_value )
161+ output .append (f"[[ ## { field . name } ## ]]\n { formatted_field_value } " )
105162
106163 return "\n \n " .join (output ).strip ()
107164
@@ -121,21 +178,48 @@ def parse_value(value, annotation):
121178 return TypeAdapter (annotation ).validate_python (parsed_value )
122179
123180
124- def format_turn (signature , values , role , incomplete = False ):
181+ def format_turn (signature : SignatureMeta , values : Dict [str , Any ], role , incomplete = False ) -> Dict [str , str ]:
182+ """
183+ Constructs a new message ("turn") to append to a chat thread. The message is carefully formatted
184+ so that it can instruct an LLM to generate responses conforming to the specified DSPy signature.
185+
186+ Args:
187+ signature: The DSPy signature to which future LLM responses should conform.
188+ values: A dictionary mapping field names (from the DSPy signature) to corresponding values
189+ that should be included in the message.
190+ role: The role of the message, which can be either "user" or "assistant".
191+ incomplete: If True, indicates that output field values are present in the set of specified
192+ ``values``. If False, indicates that ``values`` only contains input field values.
193+ Returns:
194+ A chat message that can be appended to a chat thread. The message contains two string fields:
195+ ``role`` ("user" or "assistant") and ``content`` (the message text).
196+ """
125197 content = []
126198
127199 if role == "user" :
128- field_names = signature .input_fields . keys ()
200+ fields : Dict [ str , FieldInfo ] = signature .input_fields
129201 if incomplete :
130202 content .append ("This is an example of the task, though some input or output fields are not supplied." )
131203 else :
132- field_names , values = list (signature .output_fields .keys ()) + ["completed" ], {** values , "completed" : "" }
204+ fields : Dict [str , FieldInfo ] = signature .output_fields
205+ # Add the built-in field indicating that the chat turn has been completed
206+ fields [BuiltInCompletedOutputFieldInfo .name ] = BuiltInCompletedOutputFieldInfo .info
207+ values = {** values , BuiltInCompletedOutputFieldInfo .name : "" }
133208
134209 if not incomplete :
210+ field_names : KeysView = fields .keys ()
135211 if not set (values ).issuperset (set (field_names )):
136212 raise ValueError (f"Expected { field_names } but got { values .keys ()} " )
137213
138- content .append (format_fields ({k : values .get (k , "Not supplied for this particular example." ) for k in field_names }))
214+ formatted_fields = format_fields (
215+ fields_with_values = {
216+ FieldInfoWithName (name = field_name , info = field_info ): values .get (
217+ field_name , "Not supplied for this particular example."
218+ )
219+ for field_name , field_info in fields .items ()
220+ }
221+ )
222+ content .append (formatted_fields )
139223
140224 if role == "user" :
141225 content .append (
@@ -170,15 +254,23 @@ def enumerate_fields(fields):
170254 return "\n " .join (parts ).strip ()
171255
172256
173- def prepare_instructions (signature ):
257+ def prepare_instructions (signature : SignatureMeta ):
174258 parts = []
175259 parts .append ("Your input fields are:\n " + enumerate_fields (signature .input_fields ))
176260 parts .append ("Your output fields are:\n " + enumerate_fields (signature .output_fields ))
177261 parts .append ("All interactions will be structured in the following way, with the appropriate values filled in." )
178262
179- parts .append (format_fields ({f : f"{{{ f } }}" for f in signature .input_fields }))
180- parts .append (format_fields ({f : f"{{{ f } }}" for f in signature .output_fields }))
181- parts .append (format_fields ({"completed" : "" }))
263+ def format_signature_fields_for_instructions (fields : Dict [str , FieldInfo ]):
264+ return format_fields (
265+ fields_with_values = {
266+ FieldInfoWithName (name = field_name , info = field_info ): f"{{{ field_name } }}"
267+ for field_name , field_info in fields .items ()
268+ }
269+ )
270+
271+ parts .append (format_signature_fields_for_instructions (signature .input_fields ))
272+ parts .append (format_signature_fields_for_instructions (signature .output_fields ))
273+ parts .append (format_fields ({BuiltInCompletedOutputFieldInfo : "" }))
182274
183275 instructions = textwrap .dedent (signature .instructions )
184276 objective = ("\n " + " " * 8 ).join (["" ] + instructions .splitlines ())
0 commit comments