@@ -163,30 +163,74 @@ def _parse_content(content: Any, job: Optional['Job']) -> Tuple[str, List[Tuple[
163163 return "" .join (text_parts ), citation_blocks
164164
165165
166- def _extract_json_model (text : str , response_model : Type [BaseModel ]) -> BaseModel | None :
167- """Extract JSON from text and parse into Pydantic model."""
168- try :
169- # First try to extract JSON from markdown code blocks
170- import re
171- code_block_pattern = r'```(?:json)?\s*\n([\s\S]*?)\n```'
172- match = re .search (code_block_pattern , text )
173-
166+ def _extract_json_model (text : str , response_model : Type [BaseModel ]) -> BaseModel | Dict | None :
167+ """Extract JSON from text and parse into Pydantic model.
168+
169+ Returns:
170+ - Pydantic model instance if validation succeeds
171+ - Dict with raw JSON data if JSON parsing succeeds but Pydantic validation fails
172+ - None if JSON extraction/parsing fails completely
173+ """
174+ import re
175+ from pydantic import ValidationError
176+
177+ json_str = None
178+
179+ # Try multiple patterns to extract JSON
180+ patterns = [
181+ r'```json\s*([\s\S]*?)\s*```' , # More flexible: allows any whitespace
182+ r'```(?:json)?\s*\n([\s\S]*?)\n```' , # Original pattern
183+ r'```\s*([\s\S]*?)\s*```' , # Any code block
184+ ]
185+
186+ for i , pattern in enumerate (patterns ):
187+ match = re .search (pattern , text )
174188 if match :
175- json_str = match .group (1 )
176- else :
177- # Fall back to finding JSON in text
178- start_idx = text .find ('{' )
179- end_idx = text .rfind ('}' ) + 1
180-
181- if start_idx == - 1 or end_idx <= start_idx :
182- return None
183-
184- json_str = text [start_idx :end_idx ]
189+ json_str = match .group (1 ).strip ()
190+ logger .debug (f"Extracted JSON using pattern { i + 1 } : { pattern } " )
191+ break
192+
193+ if not json_str :
194+ # Fall back to finding JSON object in text
195+ start_idx = text .find ('{' )
196+ end_idx = text .rfind ('}' ) + 1
197+
198+ if start_idx == - 1 or end_idx <= start_idx :
199+ logger .warning ("No JSON structure found in response text" )
200+ return None
185201
202+ json_str = text [start_idx :end_idx ]
203+ logger .debug ("Extracted JSON by finding braces" )
204+
205+ # Try to parse JSON
206+ try :
186207 json_data = json .loads (json_str )
187- return response_model (** json_data )
188- except :
208+ logger .debug (f"Successfully parsed JSON with keys: { list (json_data .keys ())} " )
209+ except json .JSONDecodeError as e :
210+ logger .error (f"JSON decode failed at position { e .pos } : { e .msg } " )
211+ logger .error (f"Attempted JSON string: { json_str [:200 ]} ..." )
189212 return None
213+
214+ # Try to create Pydantic model
215+ try :
216+ model_instance = response_model (** json_data )
217+ logger .debug (f"Successfully created { response_model .__name__ } instance" )
218+ return model_instance
219+ except ValidationError as e :
220+ # Log validation errors but return the raw dict
221+ error_details = []
222+ for error in e .errors ():
223+ field = '.' .join (str (f ) for f in error ['loc' ])
224+ msg = error ['msg' ]
225+ error_details .append (f"{ field } : { msg } " )
226+
227+ logger .warning (f"Pydantic validation failed for { response_model .__name__ } : { '; ' .join (error_details )} " )
228+ logger .warning (f"Returning raw JSON data instead: { list (json_data .keys ())} " )
229+ return json_data # Return the parsed JSON as dict
230+ except Exception as e :
231+ logger .error (f"Unexpected error creating { response_model .__name__ } : { type (e ).__name__ } : { str (e )} " )
232+ logger .warning (f"Returning raw JSON data instead: { list (json_data .keys ())} " )
233+ return json_data # Return the parsed JSON as dict
190234
191235
192236def _save_raw_response (result : Any , job_id : str , raw_files_dir : str ) -> None :
0 commit comments