@@ -45,61 +45,111 @@ def _exponential_backoff(self, retry_count: int) -> None:
4545
4646 def _extract_json_from_text (self , text : str ) -> List [Dict [str , Any ]]:
4747 """
48- Extract JSON array from text response with robust parsing and better error handling.
48+ Extract JSON array from text response with robust parsing.
49+ Handles both QA pairs and evaluation responses.
50+
51+ Args:
52+ text: The text to parse
53+
54+ Returns:
55+ List of dictionaries containing parsed JSON data
4956 """
50- if not isinstance (text , str ):
51- try :
52- if isinstance (text , (list , dict )):
53- return text if isinstance (text , list ) else [text ]
54- raise JSONParsingError ("Input is neither string nor JSON-compatible" , str (text ))
55- except Exception as e :
56- raise JSONParsingError (str (e ), str (text ))
57-
58- # If the input is already in Python literal format (using single quotes)
5957 try :
60- parsed = ast .literal_eval (text )
61- if isinstance (parsed , list ):
62- return parsed
63- elif isinstance (parsed , dict ):
64- return [parsed ]
65- except (SyntaxError , ValueError ):
66- pass
67-
68- # Try direct JSON parsing
69- try :
70- parsed = json .loads (text )
71- if isinstance (parsed , list ):
72- return parsed
73- elif isinstance (parsed , dict ):
74- return [parsed ]
75- except json .JSONDecodeError :
76- pass
77-
78- # Find JSON array boundaries and clean the text
79- start_idx = text .find ('[' )
80- end_idx = text .rfind (']' ) + 1
58+ # If text is not a string, try to work with it as is
59+ if not isinstance (text , str ):
60+ try :
61+ if isinstance (text , (list , dict )):
62+ return text if isinstance (text , list ) else [text ]
63+ return []
64+ except :
65+ return []
8166
82- if start_idx != - 1 and end_idx != - 1 :
83- json_text = text [start_idx :end_idx ]
84-
85- # Convert Python-style string literals to JSON format
86- cleaned_text = (
87- json_text .replace ("'" , '"' ) # Replace single quotes with double quotes
88- .replace ('\n ' , '\\ n' ) # Escape newlines
89- .replace ('\t ' , '\\ t' ) # Escape tabs
90- )
91-
67+ # First attempt: Try direct JSON parsing of the entire text
9268 try :
93- parsed = json .loads (cleaned_text )
69+ parsed = json .loads (text )
9470 if isinstance (parsed , list ):
9571 return parsed
9672 elif isinstance (parsed , dict ):
9773 return [parsed ]
98- except json .JSONDecodeError as e :
99- #raise JSONParsingError(f"Failed to parse cleaned JSON: {str(e)}", cleaned_text)
10074 return []
75+ except json .JSONDecodeError :
76+ # Continue with other parsing methods if direct parsing fails
77+ pass
78+
79+ # Find JSON array boundaries
80+ start_idx = text .find ('[' )
81+ end_idx = text .rfind (']' ) + 1
82+
83+ if start_idx != - 1 and end_idx != - 1 :
84+ json_text = text [start_idx :end_idx ]
85+
86+ # Multiple parsing attempts
87+ try :
88+ # Try parsing the extracted JSON
89+ parsed = json .loads (json_text )
90+ if isinstance (parsed , list ):
91+ return parsed
92+ elif isinstance (parsed , dict ):
93+ return [parsed ]
94+ except json .JSONDecodeError :
95+ try :
96+ # Try using ast.literal_eval
97+ parsed = ast .literal_eval (json_text )
98+ if isinstance (parsed , list ):
99+ return parsed
100+ elif isinstance (parsed , dict ):
101+ return [parsed ]
102+ except (SyntaxError , ValueError ):
103+ # Try cleaning the text
104+ cleaned = (json_text
105+ .replace ('\n ' , ' ' )
106+ .replace ('\\ n' , ' ' )
107+ .replace ("'" , '"' )
108+ .replace ('\t ' , ' ' )
109+ .strip ())
110+ try :
111+ parsed = json .loads (cleaned )
112+ if isinstance (parsed , list ):
113+ return parsed
114+ elif isinstance (parsed , dict ):
115+ return [parsed ]
116+ except json .JSONDecodeError :
117+ pass
118+
119+ # If JSON parsing fails, try regex patterns for both formats
120+ results = []
121+
122+ # Try to extract score and justification pattern
123+ score_pattern = r'"score":\s*(\d+\.?\d*),\s*"justification":\s*"([^"]*)"'
124+ score_matches = re .findall (score_pattern , text , re .DOTALL )
125+ if score_matches :
126+ for score , justification in score_matches :
127+ results .append ({
128+ "score" : float (score ),
129+ "justification" : justification .strip ()
130+ })
131+
132+ # Try to extract question and solution pattern
133+ qa_pattern = r'"question":\s*"([^"]*)",\s*"solution":\s*"([^"]*)"'
134+ qa_matches = re .findall (qa_pattern , text , re .DOTALL )
135+ if qa_matches :
136+ for question , solution in qa_matches :
137+ results .append ({
138+ "question" : question .strip (),
139+ "solution" : solution .strip ()
140+ })
141+
142+ if results :
143+ return results
144+
145+ # If all parsing attempts fail, return the original text wrapped in a list
146+ return [{"text" : text }]
147+
148+ except Exception as e :
149+ print (f"ERROR: JSON extraction failed: { str (e )} " )
150+ print (f"Raw text: { text } " )
151+ return []
101152
102- #raise JSONParsingError("No valid JSON structure found", text)
103153
104154
105155 def generate_response (self , prompt : str , retry_with_reduced_tokens : bool = True ) -> List [Dict [str , str ]]:
0 commit comments