Skip to content

Commit 4ef46fb

Browse files
committed
fix json aprsing
1 parent 42c5293 commit 4ef46fb

File tree

3 files changed

+158
-32
lines changed

3 files changed

+158
-32
lines changed

optillm/plugins/deepthink/self_discover.py

Lines changed: 130 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,11 @@ def _select_modules(self, task_description: str, task_examples: List[str] = None
8383
2. Select 3-7 reasoning modules that are most relevant for this task
8484
3. Consider both the complexity of the task and the complementary nature of different modules
8585
4. Avoid selecting too many similar modules
86+
5. IMPORTANT: Respond ONLY with a valid JSON array of numbers
8687
87-
Respond with a JSON list containing the numbers of the selected modules. For example: [1, 5, 9, 15, 23]
88+
Example response format: [1, 5, 9, 15, 23]
8889
89-
Selected modules:"""
90+
Selected modules (JSON array only):"""
9091

9192
response = self.client.chat.completions.create(
9293
model=self.model,
@@ -209,7 +210,10 @@ def _implement_structure(self, adapted_modules: List[str], task_description: str
209210
5. Ensure the structure flows logically from problem understanding to final answer
210211
6. The structure should be comprehensive enough to handle the complexity of the task
211212
212-
Create the reasoning structure in valid JSON format:"""
213+
7. IMPORTANT: Return ONLY valid JSON with double quotes around all property names and string values
214+
8. Do not include any text before or after the JSON structure
215+
216+
Valid JSON reasoning structure:"""
213217

214218
response = self.client.chat.completions.create(
215219
model=self.model,
@@ -222,33 +226,131 @@ def _implement_structure(self, adapted_modules: List[str], task_description: str
222226

223227
response_text = response.choices[0].message.content.strip()
224228

225-
# Extract JSON from response
229+
# Extract and parse JSON from response with improved error handling
230+
return self._parse_json_structure(response_text)
231+
232+
def _parse_json_structure(self, response_text: str) -> Dict[str, Any]:
233+
"""Parse JSON structure with robust error handling and cleanup."""
234+
235+
# Define fallback structure
236+
fallback_structure = {
237+
"problem_understanding": "Analyze and understand the problem requirements",
238+
"solution_approach": "Determine the best approach based on problem characteristics",
239+
"step_by_step_reasoning": "Work through the problem systematically",
240+
"verification": "Verify the solution is correct and complete",
241+
"final_answer": "State the final answer clearly"
242+
}
243+
244+
# Try multiple JSON extraction and parsing strategies
245+
strategies = [
246+
self._extract_json_strategy_1,
247+
self._extract_json_strategy_2,
248+
self._extract_json_strategy_3,
249+
self._clean_and_parse_strategy
250+
]
251+
252+
for i, strategy in enumerate(strategies, 1):
253+
try:
254+
structure = strategy(response_text)
255+
if structure and isinstance(structure, dict) and len(structure) > 0:
256+
logger.debug(f"Successfully parsed JSON using strategy {i}")
257+
return structure
258+
except Exception as e:
259+
logger.debug(f"Strategy {i} failed: {e}")
260+
continue
261+
262+
logger.warning(f"All JSON parsing strategies failed. Using fallback structure.")
263+
logger.debug(f"Raw response that failed to parse: {response_text[:500]}...")
264+
return fallback_structure
265+
266+
def _extract_json_strategy_1(self, text: str) -> Dict[str, Any]:
267+
"""Strategy 1: Find first complete JSON object with balanced braces."""
268+
start_idx = text.find('{')
269+
if start_idx == -1:
270+
raise ValueError("No opening brace found")
271+
272+
brace_count = 0
273+
end_idx = start_idx
274+
275+
for i in range(start_idx, len(text)):
276+
if text[i] == '{':
277+
brace_count += 1
278+
elif text[i] == '}':
279+
brace_count -= 1
280+
if brace_count == 0:
281+
end_idx = i + 1
282+
break
283+
284+
if brace_count != 0:
285+
raise ValueError("Unbalanced braces")
286+
287+
json_str = text[start_idx:end_idx]
288+
return json.loads(json_str)
289+
290+
def _extract_json_strategy_2(self, text: str) -> Dict[str, Any]:
291+
"""Strategy 2: Use regex with non-greedy matching."""
292+
# Look for JSON object with non-greedy matching
293+
json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', text)
294+
if not json_match:
295+
raise ValueError("No JSON object found with regex")
296+
297+
json_str = json_match.group(0)
298+
return json.loads(json_str)
299+
300+
def _extract_json_strategy_3(self, text: str) -> Dict[str, Any]:
301+
"""Strategy 3: Extract between ```json``` code blocks."""
302+
patterns = [
303+
r'```json\s*([^`]+)```',
304+
r'```\s*([^`]+)```',
305+
r'`([^`]+)`'
306+
]
307+
308+
for pattern in patterns:
309+
match = re.search(pattern, text, re.DOTALL)
310+
if match:
311+
json_str = match.group(1).strip()
312+
try:
313+
return json.loads(json_str)
314+
except:
315+
continue
316+
317+
raise ValueError("No valid JSON found in code blocks")
318+
319+
def _clean_and_parse_strategy(self, text: str) -> Dict[str, Any]:
320+
"""Strategy 4: Clean common formatting issues and parse."""
321+
# Find JSON-like content
322+
json_match = re.search(r'\{.*\}', text, re.DOTALL)
323+
if not json_match:
324+
raise ValueError("No JSON-like content found")
325+
326+
json_str = json_match.group(0)
327+
328+
# Common cleanup operations
329+
cleanups = [
330+
# Fix single quotes to double quotes (but be careful about apostrophes)
331+
(r"(?<!\\)'([^']*)'(?=\s*[,}])", r'"\1"'),
332+
# Fix unquoted property names
333+
(r'([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":'),
334+
# Fix trailing commas
335+
(r',\s*([}\]])', r'\1'),
336+
# Fix extra commas
337+
(r',,+', r','),
338+
]
339+
340+
for pattern, replacement in cleanups:
341+
json_str = re.sub(pattern, replacement, json_str)
342+
343+
# Try parsing the cleaned JSON
226344
try:
227-
# Look for JSON object in the response
228-
json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
229-
if json_match:
230-
structure = json.loads(json_match.group(0))
345+
return json.loads(json_str)
346+
except json.JSONDecodeError as e:
347+
# One more attempt: try to fix the specific error location
348+
if "line 1 column 2" in str(e):
349+
# Common issue: extra characters at start
350+
json_str = re.sub(r'^[^{]*', '', json_str)
351+
return json.loads(json_str)
231352
else:
232-
# Fallback structure
233-
structure = {
234-
"problem_understanding": "Analyze and understand the problem requirements",
235-
"solution_approach": "Determine the best approach based on problem characteristics",
236-
"step_by_step_reasoning": "Work through the problem systematically",
237-
"verification": "Verify the solution is correct and complete",
238-
"final_answer": "State the final answer clearly"
239-
}
240-
241-
return structure
242-
243-
except Exception as e:
244-
logger.warning(f"Error parsing reasoning structure: {e}")
245-
# Return fallback structure
246-
return {
247-
"analysis": "Analyze the problem systematically",
248-
"approach": "Select appropriate solution method",
249-
"reasoning": "Apply step-by-step logical reasoning",
250-
"conclusion": "Draw final conclusion with supporting evidence"
251-
}
353+
raise e
252354

253355
def solve_with_structure(self, problem: str, reasoning_structure: Dict[str, Any]) -> str:
254356
"""

optillm/plugins/deepthink/uncertainty_cot.py

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,21 @@ def generate_with_uncertainty_routing(
7878
# Evaluate confidence through consistency
7979
confidence_score = self._evaluate_confidence(sample_data)
8080

81+
# Log confidence evaluation details
82+
logger.debug(f"Confidence evaluation completed: {confidence_score:.3f}")
83+
logger.debug(f"Sample answers: {[sample['answer'][:50] + '...' if len(sample['answer']) > 50 else sample['answer'] for sample in sample_data if sample['answer']]}")
84+
8185
# Route decision based on confidence
8286
if confidence_score >= confidence_threshold:
8387
# High confidence: use majority vote
8488
final_response = self._majority_vote_response(sample_data)
8589
routing_decision = "majority_vote"
86-
logger.info(f"High confidence ({confidence_score:.3f}) - using majority vote")
90+
logger.info(f"High confidence ({confidence_score:.3f} >= {confidence_threshold}) - using majority vote")
8791
else:
8892
# Low confidence: use greedy sample
8993
final_response = greedy_sample
9094
routing_decision = "greedy"
91-
logger.info(f"Low confidence ({confidence_score:.3f}) - using greedy sample")
95+
logger.info(f"Low confidence ({confidence_score:.3f} < {confidence_threshold}) - using greedy sample")
9296

9397
return {
9498
"final_response": final_response,
@@ -206,10 +210,19 @@ def _evaluate_confidence(self, sample_data: List[Dict[str, Any]]) -> float:
206210
# Combine metrics (weighted average)
207211
confidence = (0.6 * answer_consistency + 0.4 * reasoning_consistency)
208212

209-
logger.debug(f"Answer consistency: {answer_consistency:.3f}")
210-
logger.debug(f"Reasoning consistency: {reasoning_consistency:.3f}")
213+
logger.debug(f"Answer consistency: {answer_consistency:.3f} (weight: 0.6)")
214+
logger.debug(f"Reasoning consistency: {reasoning_consistency:.3f} (weight: 0.4)")
211215
logger.debug(f"Combined confidence: {confidence:.3f}")
212216

217+
# Log additional details for debugging low confidence
218+
if confidence < 0.5:
219+
logger.debug(f"Low confidence detected. Sample count: {len(sample_data)}")
220+
logger.debug(f"Answers found: {len(answers)}, Thinking texts: {len(thinking_texts)}")
221+
if answers:
222+
logger.debug(f"Sample answers: {answers}")
223+
if len(answers) >= 2:
224+
logger.debug(f"Most common answer appears {max(Counter(answers).values())} times out of {len(answers)}")
225+
213226
return confidence
214227

215228
def _calculate_answer_consistency(self, answers: List[str]) -> float:
@@ -233,6 +246,9 @@ def _calculate_answer_consistency(self, answers: List[str]) -> float:
233246
# Calculate agreement ratio
234247
agreement_ratio = most_common_count / total_answers
235248

249+
logger.debug(f"Answer distribution: {dict(answer_counts)}")
250+
logger.debug(f"Agreement ratio: {agreement_ratio:.3f} ({most_common_count}/{total_answers})")
251+
236252
# Also consider semantic similarity for non-identical answers
237253
max_similarity = 0.0
238254
for i, ans1 in enumerate(normalized_answers):
@@ -264,6 +280,9 @@ def _calculate_reasoning_consistency(self, thinking_texts: List[str]) -> float:
264280
# Return average similarity
265281
avg_similarity = sum(similarities) / len(similarities)
266282

283+
logger.debug(f"Reasoning similarity pairs: {[f'{s:.3f}' for s in similarities]}")
284+
logger.debug(f"Average reasoning similarity: {avg_similarity:.3f}")
285+
267286
return min(avg_similarity, 1.0)
268287

269288
def _majority_vote_response(self, sample_data: List[Dict[str, Any]]) -> str:

optillm/plugins/deepthink_plugin.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,11 @@ def run(
110110

111111
except Exception as e:
112112
logger.error(f"Error in Deep Think plugin: {str(e)}")
113+
logger.debug(f"Exception traceback:", exc_info=True)
114+
113115
# Fallback to simple generation
114116
try:
117+
logger.info("Attempting fallback to simple generation")
115118
response = client.chat.completions.create(
116119
model=model,
117120
messages=[
@@ -123,10 +126,12 @@ def run(
123126
top_p=config["top_p"]
124127
)
125128

129+
logger.info("Fallback generation successful")
126130
return response.choices[0].message.content.strip(), response.usage.completion_tokens
127131

128132
except Exception as fallback_error:
129133
logger.error(f"Fallback generation also failed: {str(fallback_error)}")
134+
logger.debug(f"Fallback exception traceback:", exc_info=True)
130135
return f"Error in Deep Think plugin: {str(e)}", 0
131136

132137
def _parse_config(request_config: Dict[str, Any]) -> Dict[str, Any]:

0 commit comments

Comments
 (0)