Enhance placeholder tag cleanup and gap analysis

codelion · codelion · commit 7448d365acce · 2025-07-24T20:44:52.000+08:00
Added a robust cleanup function to remove all research placeholder tags from final reports. Improved gap analysis to prioritize placeholder tags and updated search logic to address high-priority gaps first. Increased default max_iterations and max_sources for more thorough research. Updated final report synthesis to ensure no placeholder tags remain.
diff --git a/optillm/plugins/deep_research/research_engine.py b/optillm/plugins/deep_research/research_engine.py
@@ -61,6 +61,60 @@ def clean_reasoning_tags(text: str) -> str:
     return cleaned_text
 
 
+def cleanup_placeholder_tags(text: str) -> str:
+    """
+    Remove any remaining placeholder tags from the final report.
+    
+    This is a final cleanup step to ensure no incomplete research tags remain
+    in the published report.
+    
+    Args:
+        text: Research report text
+        
+    Returns:
+        Text with all placeholder tags removed
+    """
+    if not text:
+        return text
+    
+    # Patterns for research placeholder tags
+    placeholder_patterns = [
+        r'\[NEEDS RESEARCH[^\]]*\]',
+        r'\[SOURCE NEEDED[^\]]*\]', 
+        r'\[RESEARCH NEEDED[^\]]*\]',
+        r'\[CITATION NEEDED[^\]]*\]',
+        r'\[MORE RESEARCH NEEDED[^\]]*\]',
+        r'\[REQUIRES INVESTIGATION[^\]]*\]',
+        r'\[TO BE RESEARCHED[^\]]*\]',
+        r'\[VERIFY[^\]]*\]',
+        r'\[CHECK[^\]]*\]',
+    ]
+    
+    cleaned_text = text
+    for pattern in placeholder_patterns:
+        # Remove the placeholder tags
+        cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
+    
+    # Also remove any sentences that are entirely placeholder-based
+    lines = cleaned_text.split('\n')
+    filtered_lines = []
+    
+    for line in lines:
+        # Skip lines that are mostly just removed placeholders (now empty or just punctuation)
+        stripped = line.strip()
+        if stripped and not re.match(r'^[\s\-\*\.\,\;\:]*$', stripped):
+            filtered_lines.append(line)
+        elif not stripped:  # Keep empty lines for formatting
+            filtered_lines.append(line)
+    
+    # Rejoin and clean up extra whitespace
+    result = '\n'.join(filtered_lines)
+    result = re.sub(r'\n\s*\n\s*\n+', '\n\n', result)  # Multiple empty lines to double
+    result = result.strip()
+    
+    return result
+
+
 class DeepResearcher:
     """
     Implementation of Test-Time Diffusion Deep Researcher (TTD-DR) algorithm
@@ -71,7 +125,7 @@ class DeepResearcher:
     Based on: https://arxiv.org/abs/2507.16075v1
     """
     
-    def __init__(self, client, model: str, max_iterations: int = 5, max_sources: int = 10):
+    def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int = 15):
         self.client = client
         self.model = model
         self.max_iterations = max_iterations
@@ -99,6 +153,21 @@ def __init__(self, client, model: str, max_iterations: int = 5, max_sources: int
         }
         self.gap_analysis_history = []  # Track identified gaps over time
     
+    def cleanup_placeholder_tags(self, text: str) -> str:
+        """
+        Remove any remaining placeholder tags from the final report.
+        
+        This is a final cleanup step to ensure no incomplete research tags remain
+        in the published report.
+        
+        Args:
+            text: Research report text
+            
+        Returns:
+            Text with all placeholder tags removed
+        """
+        return cleanup_placeholder_tags(text)
+    
     def decompose_query(self, system_prompt: str, initial_query: str) -> List[str]:
         """
         Decompose complex research query into focused sub-queries
@@ -394,26 +463,33 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
         """
         gap_analysis_prompt = f"""
         Analyze the following research draft to identify specific gaps and areas that need external research.
+        Pay special attention to any placeholder tags like [NEEDS RESEARCH], [SOURCE NEEDED], etc.
         
         Original Query: {original_query}
         
         Current Draft:
         {current_draft}
         
+        PRIORITY ANALYSIS:
+        1. First, identify any [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] or similar placeholder tags
+        2. Then identify other substantial gaps in content, evidence, or depth
+        
         For each gap you identify, provide:
         1. SECTION: Which section has the gap
-        2. GAP_TYPE: [MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
+        2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
         3. SPECIFIC_NEED: Exactly what information is needed
         4. SEARCH_QUERY: A specific search query to address this gap
+        5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags that need immediate resolution
         
         Format each gap as:
         GAP_ID: [number]
         SECTION: [section name]
         GAP_TYPE: [type]
         SPECIFIC_NEED: [what's missing]
         SEARCH_QUERY: [search query to find this info]
+        PRIORITY: [priority level]
         
-        Identify 3-5 most critical gaps.
+        Identify 3-6 most critical gaps, prioritizing any placeholder tags that need resolution.
         """
         
         try:
@@ -468,10 +544,17 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
     def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
         """
         Perform targeted searches based on identified gaps in the current draft
+        Prioritizes HIGH priority gaps (placeholder tags) first
         """
         all_results = []
         
-        for gap in gaps:
+        # Sort gaps by priority - HIGH priority first (placeholder tags)
+        sorted_gaps = sorted(gaps, key=lambda g: (
+            0 if g.get('priority', '').upper() == 'HIGH' else
+            1 if g.get('priority', '').upper() == 'MEDIUM' else 2
+        ))
+        
+        for gap in sorted_gaps:
             search_query = gap.get('search_query', '')
             if not search_query:
                 continue
@@ -807,7 +890,8 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]:
             print(f"  - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}")
             
             # Terminate if high quality achieved or minimal improvement
-            if completeness > 0.85 or improvement < 0.05:
+            # More lenient termination to ensure complete research
+            if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7):
                 print("  - Quality threshold reached, research complete")
                 break
             
@@ -839,8 +923,11 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
         3. Add a compelling title and executive summary
         4. Ensure smooth transitions between sections
         5. Add conclusion that directly addresses the original query
-        6. Remove any remaining [NEEDS RESEARCH] tags
-        7. Polish language and style for clarity and impact
+        6. **CRITICAL**: Remove ALL [NEEDS RESEARCH], [SOURCE NEEDED], and similar placeholder tags
+        7. Replace any remaining placeholders with actual content or remove incomplete sections
+        8. Polish language and style for clarity and impact
+        
+        **IMPORTANT**: The final report must NOT contain any [NEEDS RESEARCH], [SOURCE NEEDED], [RESEARCH NEEDED], [CITATION NEEDED], or similar placeholder tags. If any placeholders remain, replace them with available information or remove the incomplete statements.
         
         Return the final polished research report.
         """
@@ -858,6 +945,10 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
             
             polished_report = response.choices[0].message.content.strip()
             polished_report = clean_reasoning_tags(polished_report)
+            
+            # Final cleanup: Remove any remaining placeholder tags
+            polished_report = self.cleanup_placeholder_tags(polished_report)
+            
             self.total_tokens += response.usage.completion_tokens
             
             # Add references section
diff --git a/optillm/plugins/deep_research_plugin.py b/optillm/plugins/deep_research_plugin.py
@@ -32,16 +32,16 @@ def run(system_prompt: str, initial_query: str, client, model: str, request_conf
         client: OpenAI client for LLM calls
         model: Model name to use for synthesis
         request_config: Optional configuration dict with keys:
-            - max_iterations: Maximum research iterations (default: 5)
-            - max_sources: Maximum web sources per search (default: 10)
+            - max_iterations: Maximum research iterations (default: 8)
+            - max_sources: Maximum web sources per search (default: 15)
     
     Returns:
         Tuple of (comprehensive_research_response, total_completion_tokens)
     """
     # Parse configuration
     config = request_config or {}
-    max_iterations = config.get("max_iterations", 5)
-    max_sources = config.get("max_sources", 10)
+    max_iterations = config.get("max_iterations", 8)  # Increased to 8 for thorough research
+    max_sources = config.get("max_sources", 15)  # Increased to 15 for comprehensive coverage
     
     # Validate inputs
     if not initial_query.strip():