@@ -93,38 +93,65 @@ def get_history(self) -> List[Dict]:
93
93
94
94
Returns:
95
95
List[Dict]: The list of messages in the chat history as dictionaries.
96
- Each dictionary has 'role' and 'content' keys, where 'content' is a list
97
- that may contain strings ( JSON) or multimodal objects.
96
+ Each dictionary has 'role' and 'content' keys, where 'content' contains
97
+ either a single JSON string or a mixed array of JSON and multimodal objects.
98
98
99
99
Note:
100
- This method does not support nested multimodal content. If your schema
101
- contains nested objects that themselves contain multimodal content,
102
- only the top-level multimodal content will be properly processed.
103
-
100
+ This method supports multimodal content by keeping multimodal objects
101
+ separate while generating cohesive JSON for text-based fields.
104
102
"""
105
103
history = []
106
104
for message in self .history :
107
105
input_content = message .content
108
- processed_content = []
109
- for field_name , field in input_content .__class__ .model_fields .items ():
110
106
107
+ # Check if content has any multimodal fields
108
+ multimodal_objects = []
109
+ has_multimodal = False
110
+
111
+ # Extract multimodal content first
112
+ for field_name , field in input_content .__class__ .model_fields .items ():
111
113
field_value = getattr (input_content , field_name )
112
114
113
115
if isinstance (field_value , list ):
114
- has_multimodal_in_list = False
115
116
for item in field_value :
116
117
if isinstance (item , INSTRUCTOR_MULTIMODAL_TYPES ):
117
- processed_content .append (item )
118
- has_multimodal_in_list = True
119
- if not has_multimodal_in_list :
120
- processed_content .append (input_content .model_dump_json (include = {field_name }))
121
- else :
122
- if isinstance (field_value , INSTRUCTOR_MULTIMODAL_TYPES ):
123
- processed_content .append (field_value )
124
- else :
125
- processed_content .append (input_content .model_dump_json (include = {field_name }))
126
-
127
- history .append ({"role" : message .role , "content" : processed_content })
118
+ multimodal_objects .append (item )
119
+ has_multimodal = True
120
+ elif isinstance (field_value , INSTRUCTOR_MULTIMODAL_TYPES ):
121
+ multimodal_objects .append (field_value )
122
+ has_multimodal = True
123
+
124
+ if has_multimodal :
125
+ # For multimodal content: create mixed array with JSON + multimodal objects
126
+ processed_content = []
127
+
128
+ # Add single cohesive JSON for all non-multimodal fields
129
+ non_multimodal_data = {}
130
+ for field_name , field in input_content .__class__ .model_fields .items ():
131
+ field_value = getattr (input_content , field_name )
132
+
133
+ if isinstance (field_value , list ):
134
+ # Only include non-multimodal items from lists
135
+ non_multimodal_items = [
136
+ item for item in field_value if not isinstance (item , INSTRUCTOR_MULTIMODAL_TYPES )
137
+ ]
138
+ if non_multimodal_items :
139
+ non_multimodal_data [field_name ] = non_multimodal_items
140
+ elif not isinstance (field_value , INSTRUCTOR_MULTIMODAL_TYPES ):
141
+ non_multimodal_data [field_name ] = field_value
142
+
143
+ # Add single JSON string if there are non-multimodal fields
144
+ if non_multimodal_data :
145
+ processed_content .append (json .dumps (non_multimodal_data , ensure_ascii = False ))
146
+
147
+ # Add all multimodal objects
148
+ processed_content .extend (multimodal_objects )
149
+
150
+ history .append ({"role" : message .role , "content" : processed_content })
151
+ else :
152
+ # No multimodal content: generate single cohesive JSON string
153
+ content_json = input_content .model_dump_json ()
154
+ history .append ({"role" : message .role , "content" : content_json })
128
155
129
156
return history
130
157
0 commit comments