@@ -61,6 +61,60 @@ def clean_reasoning_tags(text: str) -> str:
6161 return cleaned_text
6262
6363
64+ def cleanup_placeholder_tags (text : str ) -> str :
65+ """
66+ Remove any remaining placeholder tags from the final report.
67+
68+ This is a final cleanup step to ensure no incomplete research tags remain
69+ in the published report.
70+
71+ Args:
72+ text: Research report text
73+
74+ Returns:
75+ Text with all placeholder tags removed
76+ """
77+ if not text :
78+ return text
79+
80+ # Patterns for research placeholder tags
81+ placeholder_patterns = [
82+ r'\[NEEDS RESEARCH[^\]]*\]' ,
83+ r'\[SOURCE NEEDED[^\]]*\]' ,
84+ r'\[RESEARCH NEEDED[^\]]*\]' ,
85+ r'\[CITATION NEEDED[^\]]*\]' ,
86+ r'\[MORE RESEARCH NEEDED[^\]]*\]' ,
87+ r'\[REQUIRES INVESTIGATION[^\]]*\]' ,
88+ r'\[TO BE RESEARCHED[^\]]*\]' ,
89+ r'\[VERIFY[^\]]*\]' ,
90+ r'\[CHECK[^\]]*\]' ,
91+ ]
92+
93+ cleaned_text = text
94+ for pattern in placeholder_patterns :
95+ # Remove the placeholder tags
96+ cleaned_text = re .sub (pattern , '' , cleaned_text , flags = re .IGNORECASE )
97+
98+ # Also remove any sentences that are entirely placeholder-based
99+ lines = cleaned_text .split ('\n ' )
100+ filtered_lines = []
101+
102+ for line in lines :
103+ # Skip lines that are mostly just removed placeholders (now empty or just punctuation)
104+ stripped = line .strip ()
105+ if stripped and not re .match (r'^[\s\-\*\.\,\;\:]*$' , stripped ):
106+ filtered_lines .append (line )
107+ elif not stripped : # Keep empty lines for formatting
108+ filtered_lines .append (line )
109+
110+ # Rejoin and clean up extra whitespace
111+ result = '\n ' .join (filtered_lines )
112+ result = re .sub (r'\n\s*\n\s*\n+' , '\n \n ' , result ) # Multiple empty lines to double
113+ result = result .strip ()
114+
115+ return result
116+
117+
64118class DeepResearcher :
65119 """
66120 Implementation of Test-Time Diffusion Deep Researcher (TTD-DR) algorithm
@@ -71,7 +125,7 @@ class DeepResearcher:
71125 Based on: https://arxiv.org/abs/2507.16075v1
72126 """
73127
74- def __init__ (self , client , model : str , max_iterations : int = 5 , max_sources : int = 10 ):
128+ def __init__ (self , client , model : str , max_iterations : int = 8 , max_sources : int = 15 ):
75129 self .client = client
76130 self .model = model
77131 self .max_iterations = max_iterations
@@ -99,6 +153,21 @@ def __init__(self, client, model: str, max_iterations: int = 5, max_sources: int
99153 }
100154 self .gap_analysis_history = [] # Track identified gaps over time
101155
156+ def cleanup_placeholder_tags (self , text : str ) -> str :
157+ """
158+ Remove any remaining placeholder tags from the final report.
159+
160+ This is a final cleanup step to ensure no incomplete research tags remain
161+ in the published report.
162+
163+ Args:
164+ text: Research report text
165+
166+ Returns:
167+ Text with all placeholder tags removed
168+ """
169+ return cleanup_placeholder_tags (text )
170+
102171 def decompose_query (self , system_prompt : str , initial_query : str ) -> List [str ]:
103172 """
104173 Decompose complex research query into focused sub-queries
@@ -394,26 +463,33 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
394463 """
395464 gap_analysis_prompt = f"""
396465 Analyze the following research draft to identify specific gaps and areas that need external research.
466+ Pay special attention to any placeholder tags like [NEEDS RESEARCH], [SOURCE NEEDED], etc.
397467
398468 Original Query: { original_query }
399469
400470 Current Draft:
401471 { current_draft }
402472
473+ PRIORITY ANALYSIS:
474+ 1. First, identify any [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] or similar placeholder tags
475+ 2. Then identify other substantial gaps in content, evidence, or depth
476+
403477 For each gap you identify, provide:
404478 1. SECTION: Which section has the gap
405- 2. GAP_TYPE: [MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
479+ 2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
406480 3. SPECIFIC_NEED: Exactly what information is needed
407481 4. SEARCH_QUERY: A specific search query to address this gap
482+ 5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags that need immediate resolution
408483
409484 Format each gap as:
410485 GAP_ID: [number]
411486 SECTION: [section name]
412487 GAP_TYPE: [type]
413488 SPECIFIC_NEED: [what's missing]
414489 SEARCH_QUERY: [search query to find this info]
490+ PRIORITY: [priority level]
415491
416- Identify 3-5 most critical gaps.
492+ Identify 3-6 most critical gaps, prioritizing any placeholder tags that need resolution .
417493 """
418494
419495 try :
@@ -468,10 +544,17 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
468544 def perform_gap_targeted_search (self , gaps : List [Dict [str , str ]]) -> str :
469545 """
470546 Perform targeted searches based on identified gaps in the current draft
547+ Prioritizes HIGH priority gaps (placeholder tags) first
471548 """
472549 all_results = []
473550
474- for gap in gaps :
551+ # Sort gaps by priority - HIGH priority first (placeholder tags)
552+ sorted_gaps = sorted (gaps , key = lambda g : (
553+ 0 if g .get ('priority' , '' ).upper () == 'HIGH' else
554+ 1 if g .get ('priority' , '' ).upper () == 'MEDIUM' else 2
555+ ))
556+
557+ for gap in sorted_gaps :
475558 search_query = gap .get ('search_query' , '' )
476559 if not search_query :
477560 continue
@@ -807,7 +890,8 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]:
807890 print (f" - Quality scores: Completeness={ completeness :.2f} , Improvement={ improvement :.2f} " )
808891
809892 # Terminate if high quality achieved or minimal improvement
810- if completeness > 0.85 or improvement < 0.05 :
893+ # More lenient termination to ensure complete research
894+ if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7 ):
811895 print (" - Quality threshold reached, research complete" )
812896 break
813897
@@ -839,8 +923,11 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
839923 3. Add a compelling title and executive summary
840924 4. Ensure smooth transitions between sections
841925 5. Add conclusion that directly addresses the original query
842- 6. Remove any remaining [NEEDS RESEARCH] tags
843- 7. Polish language and style for clarity and impact
926+ 6. **CRITICAL**: Remove ALL [NEEDS RESEARCH], [SOURCE NEEDED], and similar placeholder tags
927+ 7. Replace any remaining placeholders with actual content or remove incomplete sections
928+ 8. Polish language and style for clarity and impact
929+
930+ **IMPORTANT**: The final report must NOT contain any [NEEDS RESEARCH], [SOURCE NEEDED], [RESEARCH NEEDED], [CITATION NEEDED], or similar placeholder tags. If any placeholders remain, replace them with available information or remove the incomplete statements.
844931
845932 Return the final polished research report.
846933 """
@@ -858,6 +945,10 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
858945
859946 polished_report = response .choices [0 ].message .content .strip ()
860947 polished_report = clean_reasoning_tags (polished_report )
948+
949+ # Final cleanup: Remove any remaining placeholder tags
950+ polished_report = self .cleanup_placeholder_tags (polished_report )
951+
861952 self .total_tokens += response .usage .completion_tokens
862953
863954 # Add references section
0 commit comments