@@ -77,8 +77,9 @@ def cleanup_placeholder_tags(text: str) -> str:
7777 if not text :
7878 return text
7979
80- # Patterns for research placeholder tags
80+ # Comprehensive patterns for research placeholder tags
8181 placeholder_patterns = [
82+ # Research placeholders
8283 r'\[NEEDS RESEARCH[^\]]*\]' ,
8384 r'\[SOURCE NEEDED[^\]]*\]' ,
8485 r'\[RESEARCH NEEDED[^\]]*\]' ,
@@ -88,6 +89,24 @@ def cleanup_placeholder_tags(text: str) -> str:
8889 r'\[TO BE RESEARCHED[^\]]*\]' ,
8990 r'\[VERIFY[^\]]*\]' ,
9091 r'\[CHECK[^\]]*\]' ,
92+
93+ # Citation placeholders (like your example)
94+ r'\[Placeholder for[^\]]+\]' ,
95+ r'\[\d+\]\s*\[Placeholder[^\]]+\]' ,
96+ r'\[Insert citation[^\]]*\]' ,
97+ r'\[Add reference[^\]]*\]' ,
98+ r'\[Reference needed[^\]]*\]' ,
99+
100+ # Content placeholders
101+ r'\[To be completed[^\]]*\]' ,
102+ r'\[Under development[^\]]*\]' ,
103+ r'\[Coming soon[^\]]*\]' ,
104+ r'\[TBD[^\]]*\]' ,
105+ r'\[TODO[^\]]*\]' ,
106+
107+ # Question placeholders and incomplete sections
108+ r'\[Question \d+[^\]]*\]' ,
109+ r'\[Research question[^\]]*\]' ,
91110 ]
92111
93112 cleaned_text = text
@@ -115,6 +134,85 @@ def cleanup_placeholder_tags(text: str) -> str:
115134 return result
116135
117136
137+ def validate_report_completeness (text : str ) -> Dict [str , Any ]:
138+ """
139+ Validate that the research report is complete and ready for publication.
140+
141+ Checks for:
142+ - Placeholder citations
143+ - Incomplete sections
144+ - Unfinished research questions
145+ - Missing content indicators
146+
147+ Returns:
148+ Dict with validation results and suggestions for fixes
149+ """
150+ if not text :
151+ return {"is_complete" : False , "issues" : ["Empty report" ], "suggestions" : []}
152+
153+ issues = []
154+ suggestions = []
155+
156+ # Check for placeholder citations
157+ placeholder_citation_patterns = [
158+ r'\[Placeholder for[^\]]+\]' ,
159+ r'\[\d+\]\s*\[Placeholder[^\]]+\]' ,
160+ r'\[Insert citation[^\]]*\]' ,
161+ r'\[Reference needed[^\]]*\]' ,
162+ ]
163+
164+ for pattern in placeholder_citation_patterns :
165+ matches = re .findall (pattern , text , re .IGNORECASE )
166+ if matches :
167+ issues .append (f"Found { len (matches )} placeholder citations: { matches [:3 ]} " )
168+ suggestions .append ("Replace placeholder citations with actual sources or remove incomplete claims" )
169+
170+ # Check for incomplete research questions sections
171+ if "Research Questions for Investigation" in text :
172+ # Look for sections that seem to be lists of questions without answers
173+ question_section_match = re .search (r'## Research Questions for Investigation.*?(?=##|$)' , text , re .DOTALL )
174+ if question_section_match :
175+ question_content = question_section_match .group (0 )
176+ # Count questions vs answers
177+ question_lines = [line for line in question_content .split ('\n ' ) if line .strip ().startswith ('*' ) or line .strip ().startswith ('-' )]
178+ if len (question_lines ) > 3 : # Many unanswered questions
179+ issues .append ("Report contains unanswered research questions section" )
180+ suggestions .append ("Convert research questions into answered findings or remove incomplete section" )
181+
182+ # Check for incomplete sections (sections with only placeholders)
183+ section_pattern = r'##\s+([^#\n]+)\n(.*?)(?=##|$)'
184+ sections = re .findall (section_pattern , text , re .DOTALL )
185+
186+ for section_title , section_content in sections :
187+ # Check if section is mostly placeholders
188+ placeholder_count = len (re .findall (r'\[[^\]]*(?:placeholder|needed|research|todo|tbd)[^\]]*\]' , section_content , re .IGNORECASE ))
189+ content_lines = [line .strip () for line in section_content .split ('\n ' ) if line .strip ()]
190+
191+ if placeholder_count > len (content_lines ) / 3 : # More than 1/3 placeholders
192+ issues .append (f"Section '{ section_title .strip ()} ' is mostly placeholders" )
193+ suggestions .append (f"Complete content for '{ section_title .strip ()} ' section or remove it" )
194+
195+ # Check for incomplete reference lists
196+ if text .count ('[' ) - text .count (']' ) != 0 :
197+ issues .append ("Unmatched brackets detected - possible incomplete citations" )
198+ suggestions .append ("Review and fix citation formatting" )
199+
200+ # Check for very short sections that might be incomplete
201+ if len (text .split ()) < 500 : # Very short report
202+ issues .append ("Report appears to be very short, possibly incomplete" )
203+ suggestions .append ("Ensure all research areas are adequately covered" )
204+
205+ is_complete = len (issues ) == 0
206+
207+ return {
208+ "is_complete" : is_complete ,
209+ "issues" : issues ,
210+ "suggestions" : suggestions ,
211+ "word_count" : len (text .split ()),
212+ "section_count" : len (sections )
213+ }
214+
215+
118216class DeepResearcher :
119217 """
120218 Implementation of Test-Time Diffusion Deep Researcher (TTD-DR) algorithm
@@ -168,6 +266,70 @@ def cleanup_placeholder_tags(self, text: str) -> str:
168266 """
169267 return cleanup_placeholder_tags (text )
170268
269+ def fix_incomplete_report (self , report : str , validation : Dict [str , Any ], original_query : str ) -> str :
270+ """
271+ Attempt to fix an incomplete report by removing problematic sections
272+ and ensuring a coherent final document.
273+
274+ This is a fallback when the report contains placeholders or incomplete sections.
275+ """
276+ print ("🔧 Attempting to fix incomplete report..." )
277+
278+ # Start with the basic cleanup
279+ fixed_report = cleanup_placeholder_tags (report )
280+
281+ # Remove sections that are mostly placeholders or incomplete
282+ if "Research Questions for Investigation" in fixed_report :
283+ # Remove unanswered research questions sections
284+ fixed_report = re .sub (
285+ r'## Research Questions for Investigation.*?(?=##|$)' ,
286+ '' ,
287+ fixed_report ,
288+ flags = re .DOTALL
289+ )
290+ print (" - Removed incomplete research questions section" )
291+
292+ # Remove citation placeholders from reference section
293+ fixed_report = re .sub (
294+ r'\[\d+\]\s*\[Placeholder[^\]]+\]\n?' ,
295+ '' ,
296+ fixed_report
297+ )
298+
299+ # Clean up any empty sections
300+ fixed_report = re .sub (r'##\s+([^#\n]+)\n\s*(?=##)' , '' , fixed_report )
301+
302+ # If report is still very short, add a completion note
303+ if len (fixed_report .split ()) < 300 :
304+ completion_note = f"""
305+
306+ ## Note on Report Completion
307+
308+ This research report represents the findings gathered during the available research time. While comprehensive coverage was the goal, some areas may require additional investigation for complete analysis.
309+
310+ For more detailed information on specific aspects of { original_query } , additional focused research sessions may be beneficial.
311+ """
312+ # Insert before references section if it exists
313+ if "## References" in fixed_report :
314+ fixed_report = fixed_report .replace ("## References" , completion_note + "\n ## References" )
315+ else :
316+ fixed_report += completion_note
317+
318+ print (" - Added completion note due to short report length" )
319+
320+ # Final cleanup
321+ fixed_report = re .sub (r'\n\s*\n\s*\n+' , '\n \n ' , fixed_report )
322+ fixed_report = fixed_report .strip ()
323+
324+ # Validate the fix
325+ new_validation = validate_report_completeness (fixed_report )
326+ if new_validation ["is_complete" ]:
327+ print ("✅ Report successfully fixed and validated" )
328+ else :
329+ print (f"⚠️ Report still has { len (new_validation ['issues' ])} issues after fixing" )
330+
331+ return fixed_report
332+
171333 def decompose_query (self , system_prompt : str , initial_query : str ) -> List [str ]:
172334 """
173335 Decompose complex research query into focused sub-queries
@@ -235,7 +397,7 @@ def perform_web_search(self, queries: List[str]) -> str:
235397
236398 enhanced_query , _ = web_search_run ("" , search_query , None , None , {
237399 "num_results" : results_per_query ,
238- "delay_seconds" : 2 if i == 0 else 1 , # Shorter delay for subsequent queries
400+ "delay_seconds" : None , # Use default random delay (4-32 seconds)
239401 "headless" : False # Allow CAPTCHA solving if needed
240402 })
241403
@@ -566,7 +728,7 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
566728 # Perform search with context about what gap we're filling
567729 enhanced_query , _ = web_search_run ("" , search_query , None , None , {
568730 "num_results" : max (1 , self .max_sources // len (gaps )),
569- "delay_seconds" : 2 ,
731+ "delay_seconds" : None , # Use default random delay (4-32 seconds)
570732 "headless" : False
571733 })
572734
@@ -927,7 +1089,13 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
9271089 7. Replace any remaining placeholders with actual content or remove incomplete sections
9281090 8. Polish language and style for clarity and impact
9291091
930- **IMPORTANT**: The final report must NOT contain any [NEEDS RESEARCH], [SOURCE NEEDED], [RESEARCH NEEDED], [CITATION NEEDED], or similar placeholder tags. If any placeholders remain, replace them with available information or remove the incomplete statements.
1092+ **CRITICAL REQUIREMENTS**:
1093+ - The final report must NOT contain ANY placeholder tags: [NEEDS RESEARCH], [SOURCE NEEDED], [Placeholder for...], etc.
1094+ - Remove incomplete "Research Questions for Investigation" sections with unanswered questions
1095+ - Do not include citation placeholders like "[1] [Placeholder for specific research citation]"
1096+ - If sections are incomplete, either complete them with available information or remove them entirely
1097+ - Ensure all statements are backed by available evidence or are clearly marked as preliminary findings
1098+ - The report must be publication-ready with no incomplete elements
9311099
9321100 Return the final polished research report.
9331101 """
@@ -949,6 +1117,19 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
9491117 # Final cleanup: Remove any remaining placeholder tags
9501118 polished_report = self .cleanup_placeholder_tags (polished_report )
9511119
1120+ # Validate report completeness
1121+ validation = validate_report_completeness (polished_report )
1122+
1123+ if not validation ["is_complete" ]:
1124+ print (f"⚠️ Report validation found { len (validation ['issues' ])} issues:" )
1125+ for issue in validation ['issues' ]:
1126+ print (f" - { issue } " )
1127+
1128+ # Attempt to fix incomplete report
1129+ polished_report = self .fix_incomplete_report (polished_report , validation , original_query )
1130+ else :
1131+ print ("✅ Report validation passed - report is complete" )
1132+
9521133 self .total_tokens += response .usage .completion_tokens
9531134
9541135 # Add references section
0 commit comments