@@ -225,7 +225,7 @@ class DeepResearcher:
225225 Based on: https://arxiv.org/abs/2507.16075v1
226226 """
227227
228- def __init__ (self , client , model : str , max_iterations : int = 8 , max_sources : int = 15 ):
228+ def __init__ (self , client , model : str , max_iterations : int = 5 , max_sources : int = 30 ):
229229 self .client = client
230230 self .model = model
231231 self .max_iterations = max_iterations
@@ -606,10 +606,17 @@ def generate_preliminary_draft(self, system_prompt: str, initial_query: str) ->
606606 5. Research Questions for Investigation
607607 6. Conclusion (preliminary thoughts)
608608
609- Mark sections that need external research with [NEEDS RESEARCH] tags.
610- Use placeholder citations like [SOURCE NEEDED] where external evidence is required.
609+ IMPORTANT: You MUST mark multiple areas that need external research with [NEEDS RESEARCH] tags.
610+ Every claim that would benefit from external evidence should have [SOURCE NEEDED].
611+ This is a preliminary draft - it should have many gaps for iterative improvement.
611612
612- This is an initial draft - it should be substantive but acknowledge limitations.
613+ Example of proper marking:
614+ - "Recent studies show [SOURCE NEEDED] that quantum computing..."
615+ - "The economic impact [NEEDS RESEARCH: current market data] is significant..."
616+ - "Historical context [NEEDS RESEARCH: specific timeline and events] shows..."
617+
618+ Include AT LEAST 5-10 [NEEDS RESEARCH] or [SOURCE NEEDED] tags throughout the draft.
619+ Be explicit about what you don't know and what needs external validation.
613620 """
614621
615622 try :
@@ -639,23 +646,27 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
639646 """
640647 gap_analysis_prompt = f"""
641648 Analyze the following research draft to identify specific gaps and areas that need external research.
642- Pay special attention to any placeholder tags like [NEEDS RESEARCH], [SOURCE NEEDED], etc .
649+ Be thorough and aggressive in finding areas for improvement - even good drafts can be enhanced .
643650
644651 Original Query: { original_query }
645652
646653 Current Draft:
647654 { current_draft }
648655
649- PRIORITY ANALYSIS:
650- 1. First, identify any [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] or similar placeholder tags
651- 2. Then identify other substantial gaps in content, evidence, or depth
656+ CRITICAL ANALYSIS REQUIRED:
657+ 1. MANDATORY: Find ALL [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] tags
658+ 2. Identify claims lacking evidence (even if not explicitly marked)
659+ 3. Find areas that could benefit from recent data or statistics
660+ 4. Spot generalizations that need specific examples
661+ 5. Locate outdated information or areas needing current updates
662+ 6. Identify missing perspectives or counterarguments
652663
653664 For each gap you identify, provide:
654665 1. SECTION: Which section has the gap
655- 2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
666+ 2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES, MISSING_PERSPECTIVE ]
656667 3. SPECIFIC_NEED: Exactly what information is needed
657- 4. SEARCH_QUERY: A specific search query to address this gap
658- 5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags that need immediate resolution
668+ 4. SEARCH_QUERY: A specific, targeted search query to address this gap
669+ 5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags and critical missing info
659670
660671 Format each gap as:
661672 GAP_ID: [number]
@@ -665,7 +676,9 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
665676 SEARCH_QUERY: [search query to find this info]
666677 PRIORITY: [priority level]
667678
668- Identify 3-6 most critical gaps, prioritizing any placeholder tags that need resolution.
679+ IMPORTANT: Identify AT LEAST 3-8 gaps. Be critical and thorough.
680+ Even well-written sections can benefit from additional evidence, examples, or perspectives.
681+ Push for depth, accuracy, and comprehensiveness in the research.
669682 """
670683
671684 try :
@@ -701,6 +714,8 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
701714 current_gap ['specific_need' ] = line .split (':' , 1 )[1 ].strip ()
702715 elif line .startswith ('SEARCH_QUERY:' ):
703716 current_gap ['search_query' ] = line .split (':' , 1 )[1 ].strip ()
717+ elif line .startswith ('PRIORITY:' ):
718+ current_gap ['priority' ] = line .split (':' , 1 )[1 ].strip ()
704719
705720 if current_gap :
706721 gaps .append (current_gap )
@@ -960,6 +975,7 @@ def generate_structured_report(self, system_prompt: str, original_query: str, sy
960975 8. Group related citations together when appropriate [1,2,3]
961976 9. Ensure the Executive Summary captures the essence of the entire report
962977 10. Make recommendations specific and actionable
978+ 11. DO NOT create a References section - it will be added automatically
963979 """
964980
965981 try :
@@ -978,6 +994,12 @@ def generate_structured_report(self, system_prompt: str, original_query: str, sy
978994 report_content = clean_reasoning_tags (report_content )
979995 self .total_tokens += response .usage .completion_tokens
980996
997+ # Remove any References section the LLM might have created
998+ # This prevents duplicate reference sections
999+ report_content = re .sub (r'##\s*References.*?(?=##|\Z)' , '' , report_content , flags = re .DOTALL )
1000+ report_content = re .sub (r'(?m)^References\s*\n\s*(?:\[\d+\]\s*\n)+' , '' , report_content )
1001+ report_content = re .sub (r'\n\s*\n\s*\n+' , '\n \n ' , report_content ) # Clean up extra newlines
1002+
9811003 # Add references section with proper formatting
9821004 references = "\n \n ## References\n \n "
9831005 for num , source in sorted (self .citations .items ()):
@@ -1132,6 +1154,7 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11321154 - If sections are incomplete, either complete them with available information or remove them entirely
11331155 - Ensure all statements are backed by available evidence or are clearly marked as preliminary findings
11341156 - The report must be publication-ready with no incomplete elements
1157+ - DO NOT create a References section - it will be added automatically
11351158
11361159 Return the final polished research report.
11371160 """
@@ -1168,6 +1191,11 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11681191
11691192 self .total_tokens += response .usage .completion_tokens
11701193
1194+ # Remove any References section the LLM might have created
1195+ polished_report = re .sub (r'##\s*References.*?(?=##|\Z)' , '' , polished_report , flags = re .DOTALL )
1196+ polished_report = re .sub (r'(?m)^References\s*\n\s*(?:\[\d+\]\s*\n)+' , '' , polished_report )
1197+ polished_report = re .sub (r'\n\s*\n\s*\n+' , '\n \n ' , polished_report ) # Clean up extra newlines
1198+
11711199 # Add references section
11721200 references = "\n \n ## References\n \n "
11731201 for num , source in sorted (self .citations .items ()):
@@ -1179,7 +1207,7 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11791207 # Add TTD-DR metadata
11801208 metadata = "\n ---\n \n **TTD-DR Research Metadata:**\n "
11811209 metadata += f"- Algorithm: Test-Time Diffusion Deep Researcher\n "
1182- metadata += f"- Denoising iterations: { len (self .draft_history )} \n "
1210+ metadata += f"- Denoising iterations: { len (self .draft_history ) - 1 } \n "
11831211 metadata += f"- Total gaps addressed: { sum (len (gaps ) for gaps in self .gap_analysis_history )} \n "
11841212 metadata += f"- Component fitness: { self .component_fitness } \n "
11851213 metadata += f"- Total sources consulted: { len (self .citations )} \n "
0 commit comments