fix json aprsing

codelion · codelion · commit 4ef46fb6c7f2 · 2025-05-25T07:26:08.000+08:00
diff --git a/optillm/plugins/deepthink/self_discover.py b/optillm/plugins/deepthink/self_discover.py
@@ -83,10 +83,11 @@ def _select_modules(self, task_description: str, task_examples: List[str] = None
 2. Select 3-7 reasoning modules that are most relevant for this task
 3. Consider both the complexity of the task and the complementary nature of different modules
 4. Avoid selecting too many similar modules
+5. IMPORTANT: Respond ONLY with a valid JSON array of numbers
 
-Respond with a JSON list containing the numbers of the selected modules. For example: [1, 5, 9, 15, 23]
+Example response format: [1, 5, 9, 15, 23]
 
-Selected modules:"""
+Selected modules (JSON array only):"""
 
         response = self.client.chat.completions.create(
             model=self.model,
@@ -209,7 +210,10 @@ def _implement_structure(self, adapted_modules: List[str], task_description: str
 5. Ensure the structure flows logically from problem understanding to final answer
 6. The structure should be comprehensive enough to handle the complexity of the task
 
-Create the reasoning structure in valid JSON format:"""
+7. IMPORTANT: Return ONLY valid JSON with double quotes around all property names and string values
+8. Do not include any text before or after the JSON structure
+
+Valid JSON reasoning structure:"""
 
         response = self.client.chat.completions.create(
             model=self.model,
@@ -222,33 +226,131 @@ def _implement_structure(self, adapted_modules: List[str], task_description: str
         
         response_text = response.choices[0].message.content.strip()
         
-        # Extract JSON from response
+        # Extract and parse JSON from response with improved error handling
+        return self._parse_json_structure(response_text)
+    
+    def _parse_json_structure(self, response_text: str) -> Dict[str, Any]:
+        """Parse JSON structure with robust error handling and cleanup."""
+        
+        # Define fallback structure
+        fallback_structure = {
+            "problem_understanding": "Analyze and understand the problem requirements",
+            "solution_approach": "Determine the best approach based on problem characteristics", 
+            "step_by_step_reasoning": "Work through the problem systematically",
+            "verification": "Verify the solution is correct and complete",
+            "final_answer": "State the final answer clearly"
+        }
+        
+        # Try multiple JSON extraction and parsing strategies
+        strategies = [
+            self._extract_json_strategy_1,
+            self._extract_json_strategy_2,
+            self._extract_json_strategy_3,
+            self._clean_and_parse_strategy
+        ]
+        
+        for i, strategy in enumerate(strategies, 1):
+            try:
+                structure = strategy(response_text)
+                if structure and isinstance(structure, dict) and len(structure) > 0:
+                    logger.debug(f"Successfully parsed JSON using strategy {i}")
+                    return structure
+            except Exception as e:
+                logger.debug(f"Strategy {i} failed: {e}")
+                continue
+        
+        logger.warning(f"All JSON parsing strategies failed. Using fallback structure.")
+        logger.debug(f"Raw response that failed to parse: {response_text[:500]}...")
+        return fallback_structure
+    
+    def _extract_json_strategy_1(self, text: str) -> Dict[str, Any]:
+        """Strategy 1: Find first complete JSON object with balanced braces."""
+        start_idx = text.find('{')
+        if start_idx == -1:
+            raise ValueError("No opening brace found")
+        
+        brace_count = 0
+        end_idx = start_idx
+        
+        for i in range(start_idx, len(text)):
+            if text[i] == '{':
+                brace_count += 1
+            elif text[i] == '}':
+                brace_count -= 1
+                if brace_count == 0:
+                    end_idx = i + 1
+                    break
+        
+        if brace_count != 0:
+            raise ValueError("Unbalanced braces")
+        
+        json_str = text[start_idx:end_idx]
+        return json.loads(json_str)
+    
+    def _extract_json_strategy_2(self, text: str) -> Dict[str, Any]:
+        """Strategy 2: Use regex with non-greedy matching."""
+        # Look for JSON object with non-greedy matching
+        json_match = re.search(r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}', text)
+        if not json_match:
+            raise ValueError("No JSON object found with regex")
+        
+        json_str = json_match.group(0)
+        return json.loads(json_str)
+    
+    def _extract_json_strategy_3(self, text: str) -> Dict[str, Any]:
+        """Strategy 3: Extract between ```json``` code blocks."""
+        patterns = [
+            r'```json\s*([^`]+)```',
+            r'```\s*([^`]+)```',
+            r'`([^`]+)`'
+        ]
+        
+        for pattern in patterns:
+            match = re.search(pattern, text, re.DOTALL)
+            if match:
+                json_str = match.group(1).strip()
+                try:
+                    return json.loads(json_str)
+                except:
+                    continue
+        
+        raise ValueError("No valid JSON found in code blocks")
+    
+    def _clean_and_parse_strategy(self, text: str) -> Dict[str, Any]:
+        """Strategy 4: Clean common formatting issues and parse."""
+        # Find JSON-like content
+        json_match = re.search(r'\{.*\}', text, re.DOTALL)
+        if not json_match:
+            raise ValueError("No JSON-like content found")
+        
+        json_str = json_match.group(0)
+        
+        # Common cleanup operations
+        cleanups = [
+            # Fix single quotes to double quotes (but be careful about apostrophes)
+            (r"(?<!\\)'([^']*)'(?=\s*[,}])", r'"\1"'),
+            # Fix unquoted property names
+            (r'([{,]\s*)([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":'),
+            # Fix trailing commas
+            (r',\s*([}\]])', r'\1'),
+            # Fix extra commas
+            (r',,+', r','),
+        ]
+        
+        for pattern, replacement in cleanups:
+            json_str = re.sub(pattern, replacement, json_str)
+        
+        # Try parsing the cleaned JSON
         try:
-            # Look for JSON object in the response
-            json_match = re.search(r'\{.*\}', response_text, re.DOTALL)
-            if json_match:
-                structure = json.loads(json_match.group(0))
+            return json.loads(json_str)
+        except json.JSONDecodeError as e:
+            # One more attempt: try to fix the specific error location
+            if "line 1 column 2" in str(e):
+                # Common issue: extra characters at start
+                json_str = re.sub(r'^[^{]*', '', json_str)
+                return json.loads(json_str)
             else:
-                # Fallback structure
-                structure = {
-                    "problem_understanding": "Analyze and understand the problem requirements",
-                    "solution_approach": "Determine the best approach based on problem characteristics",
-                    "step_by_step_reasoning": "Work through the problem systematically",
-                    "verification": "Verify the solution is correct and complete",
-                    "final_answer": "State the final answer clearly"
-                }
-            
-            return structure
-            
-        except Exception as e:
-            logger.warning(f"Error parsing reasoning structure: {e}")
-            # Return fallback structure
-            return {
-                "analysis": "Analyze the problem systematically",
-                "approach": "Select appropriate solution method",
-                "reasoning": "Apply step-by-step logical reasoning",
-                "conclusion": "Draw final conclusion with supporting evidence"
-            }
+                raise e
     
     def solve_with_structure(self, problem: str, reasoning_structure: Dict[str, Any]) -> str:
         """
diff --git a/optillm/plugins/deepthink/uncertainty_cot.py b/optillm/plugins/deepthink/uncertainty_cot.py
@@ -78,17 +78,21 @@ def generate_with_uncertainty_routing(
         # Evaluate confidence through consistency
         confidence_score = self._evaluate_confidence(sample_data)
         
+        # Log confidence evaluation details
+        logger.debug(f"Confidence evaluation completed: {confidence_score:.3f}")
+        logger.debug(f"Sample answers: {[sample['answer'][:50] + '...' if len(sample['answer']) > 50 else sample['answer'] for sample in sample_data if sample['answer']]}")
+        
         # Route decision based on confidence
         if confidence_score >= confidence_threshold:
             # High confidence: use majority vote
             final_response = self._majority_vote_response(sample_data)
             routing_decision = "majority_vote"
-            logger.info(f"High confidence ({confidence_score:.3f}) - using majority vote")
+            logger.info(f"High confidence ({confidence_score:.3f} >= {confidence_threshold}) - using majority vote")
         else:
             # Low confidence: use greedy sample
             final_response = greedy_sample
             routing_decision = "greedy"
-            logger.info(f"Low confidence ({confidence_score:.3f}) - using greedy sample")
+            logger.info(f"Low confidence ({confidence_score:.3f} < {confidence_threshold}) - using greedy sample")
         
         return {
             "final_response": final_response,
@@ -206,10 +210,19 @@ def _evaluate_confidence(self, sample_data: List[Dict[str, Any]]) -> float:
         # Combine metrics (weighted average)
         confidence = (0.6 * answer_consistency + 0.4 * reasoning_consistency)
         
-        logger.debug(f"Answer consistency: {answer_consistency:.3f}")
-        logger.debug(f"Reasoning consistency: {reasoning_consistency:.3f}")
+        logger.debug(f"Answer consistency: {answer_consistency:.3f} (weight: 0.6)")
+        logger.debug(f"Reasoning consistency: {reasoning_consistency:.3f} (weight: 0.4)")
         logger.debug(f"Combined confidence: {confidence:.3f}")
         
+        # Log additional details for debugging low confidence
+        if confidence < 0.5:
+            logger.debug(f"Low confidence detected. Sample count: {len(sample_data)}")
+            logger.debug(f"Answers found: {len(answers)}, Thinking texts: {len(thinking_texts)}")
+            if answers:
+                logger.debug(f"Sample answers: {answers}")
+            if len(answers) >= 2:
+                logger.debug(f"Most common answer appears {max(Counter(answers).values())} times out of {len(answers)}")
+        
         return confidence
     
     def _calculate_answer_consistency(self, answers: List[str]) -> float:
@@ -233,6 +246,9 @@ def _calculate_answer_consistency(self, answers: List[str]) -> float:
         # Calculate agreement ratio
         agreement_ratio = most_common_count / total_answers
         
+        logger.debug(f"Answer distribution: {dict(answer_counts)}")
+        logger.debug(f"Agreement ratio: {agreement_ratio:.3f} ({most_common_count}/{total_answers})")
+        
         # Also consider semantic similarity for non-identical answers
         max_similarity = 0.0
         for i, ans1 in enumerate(normalized_answers):
@@ -264,6 +280,9 @@ def _calculate_reasoning_consistency(self, thinking_texts: List[str]) -> float:
         # Return average similarity
         avg_similarity = sum(similarities) / len(similarities)
         
+        logger.debug(f"Reasoning similarity pairs: {[f'{s:.3f}' for s in similarities]}")
+        logger.debug(f"Average reasoning similarity: {avg_similarity:.3f}")
+        
         return min(avg_similarity, 1.0)
     
     def _majority_vote_response(self, sample_data: List[Dict[str, Any]]) -> str:
diff --git a/optillm/plugins/deepthink_plugin.py b/optillm/plugins/deepthink_plugin.py
@@ -110,8 +110,11 @@ def run(
         
     except Exception as e:
         logger.error(f"Error in Deep Think plugin: {str(e)}")
+        logger.debug(f"Exception traceback:", exc_info=True)
+        
         # Fallback to simple generation
         try:
+            logger.info("Attempting fallback to simple generation")
             response = client.chat.completions.create(
                 model=model,
                 messages=[
@@ -123,10 +126,12 @@ def run(
                 top_p=config["top_p"]
             )
             
+            logger.info("Fallback generation successful")
             return response.choices[0].message.content.strip(), response.usage.completion_tokens
             
         except Exception as fallback_error:
             logger.error(f"Fallback generation also failed: {str(fallback_error)}")
+            logger.debug(f"Fallback exception traceback:", exc_info=True)
             return f"Error in Deep Think plugin: {str(e)}", 0
 
 def _parse_config(request_config: Dict[str, Any]) -> Dict[str, Any]: