1414from typing import Tuple , List , Dict , Optional , Any
1515from datetime import datetime
1616from collections import defaultdict
17- from optillm .plugins .web_search_plugin import run as web_search_run
17+ from optillm .plugins .web_search_plugin import run as web_search_run , BrowserSessionManager
1818from optillm .plugins .readurls_plugin import run as readurls_run
1919from optillm .plugins .memory_plugin import run as memory_run
2020
@@ -250,6 +250,7 @@ def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int
250250 "integration_ability" : 1.0
251251 }
252252 self .gap_analysis_history = [] # Track identified gaps over time
253+ self .session_manager = None # Browser session manager for web searches
253254
254255 def cleanup_placeholder_tags (self , text : str ) -> str :
255256 """
@@ -386,6 +387,10 @@ def perform_web_search(self, queries: List[str]) -> str:
386387 """
387388 all_results = []
388389
390+ # Check if session manager is available
391+ if not hasattr (self , 'session_manager' ):
392+ self .session_manager = None
393+
389394 # Perform individual searches for each query to avoid truncation issues
390395 for i , query in enumerate (queries ):
391396 try :
@@ -398,7 +403,8 @@ def perform_web_search(self, queries: List[str]) -> str:
398403 enhanced_query , _ = web_search_run ("" , search_query , None , None , {
399404 "num_results" : results_per_query ,
400405 "delay_seconds" : None , # Use default random delay (4-32 seconds)
401- "headless" : False # Allow CAPTCHA solving if needed
406+ "headless" : False , # Allow CAPTCHA solving if needed
407+ "session_manager" : self .session_manager # Use shared browser session
402408 })
403409
404410 if enhanced_query and "Web Search Results" in enhanced_query :
@@ -710,6 +716,10 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
710716 """
711717 all_results = []
712718
719+ # Check if session manager is available
720+ if not hasattr (self , 'session_manager' ):
721+ self .session_manager = None
722+
713723 # Sort gaps by priority - HIGH priority first (placeholder tags)
714724 sorted_gaps = sorted (gaps , key = lambda g : (
715725 0 if g .get ('priority' , '' ).upper () == 'HIGH' else
@@ -729,7 +739,8 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
729739 enhanced_query , _ = web_search_run ("" , search_query , None , None , {
730740 "num_results" : max (1 , self .max_sources // len (gaps )),
731741 "delay_seconds" : None , # Use default random delay (4-32 seconds)
732- "headless" : False
742+ "headless" : False ,
743+ "session_manager" : self .session_manager # Use shared browser session
733744 })
734745
735746 if enhanced_query and "Web Search Results" in enhanced_query :
@@ -995,77 +1006,87 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]:
9951006 4. Quality-guided termination
9961007 """
9971008
998- # PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
999- print ("TTD-DR: Generating preliminary draft..." )
1000- self .current_draft = self .generate_preliminary_draft (system_prompt , initial_query )
1001- self .draft_history .append (self .current_draft )
1002-
1003- # PHASE 2: ITERATIVE DENOISING LOOP
1004- for iteration in range (self .max_iterations ):
1005- self .research_state ["iteration" ] = iteration + 1
1006- print (f"TTD-DR: Denoising iteration { iteration + 1 } /{ self .max_iterations } " )
1007-
1008- # STEP 1: Analyze current draft for gaps (draft-guided search)
1009- print (" - Analyzing draft gaps..." )
1010- gaps = self .analyze_draft_gaps (self .current_draft , initial_query )
1011- self .gap_analysis_history .append (gaps )
1012-
1013- if not gaps :
1014- print (" - No significant gaps found, research complete" )
1015- break
1016-
1017- # STEP 2: Perform gap-targeted retrieval
1018- print (f" - Performing targeted search for { len (gaps )} gaps..." )
1019- retrieval_content = self .perform_gap_targeted_search (gaps )
1020-
1021- # STEP 3: Extract and fetch URLs from search results
1022- print (" - Extracting and fetching content..." )
1023- content_with_urls , sources = self .extract_and_fetch_urls (retrieval_content )
1024-
1025- # Register sources for citations
1026- for source in sources :
1027- if 'url' in source :
1028- self .citation_counter += 1
1029- self .citations [self .citation_counter ] = source
1030-
1031- # STEP 4: DENOISING - Integrate retrieved info with current draft
1032- print (" - Performing denoising step..." )
1033- previous_draft = self .current_draft
1034- self .current_draft = self .denoise_draft_with_retrieval (
1035- self .current_draft , content_with_urls , initial_query
1036- )
1037- self .draft_history .append (self .current_draft )
1038-
1039- # STEP 5: Evaluate quality improvement
1040- print (" - Evaluating draft quality..." )
1041- quality_scores = self .evaluate_draft_quality (
1042- self .current_draft , previous_draft , initial_query
1043- )
1044-
1045- # STEP 6: Component self-evolution based on feedback
1046- self .update_component_fitness (quality_scores )
1047-
1048- # STEP 7: Check termination conditions
1049- completeness = quality_scores .get ('completeness' , 0.0 )
1050- improvement = quality_scores .get ('improvement' , 0.0 )
1051-
1052- print (f" - Quality scores: Completeness={ completeness :.2f} , Improvement={ improvement :.2f} " )
1053-
1054- # Terminate if high quality achieved or minimal improvement
1055- # More lenient termination to ensure complete research
1056- if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7 ):
1057- print (" - Quality threshold reached, research complete" )
1058- break
1009+ # Use a single browser session for all searches in this research
1010+ with BrowserSessionManager (headless = False , timeout = 30 ) as session_manager :
1011+ print ("🔬 Starting deep research with single browser session" )
1012+ self .session_manager = session_manager # Store for use in search methods
10591013
1060- # Store current state for tracking
1061- self .research_state ["content" ].append (content_with_urls )
1062- self .research_state ["sources" ].extend ([s ['url' ] for s in sources if 'url' in s ])
1063-
1064- # PHASE 3: FINALIZATION - Polish the final draft
1065- print ("TTD-DR: Finalizing research report..." )
1066- final_report = self .finalize_research_report (system_prompt , initial_query , self .current_draft )
1067-
1068- return final_report , self .total_tokens
1014+ try :
1015+ # PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
1016+ print ("TTD-DR: Generating preliminary draft..." )
1017+ self .current_draft = self .generate_preliminary_draft (system_prompt , initial_query )
1018+ self .draft_history .append (self .current_draft )
1019+
1020+ # PHASE 2: ITERATIVE DENOISING LOOP
1021+ for iteration in range (self .max_iterations ):
1022+ self .research_state ["iteration" ] = iteration + 1
1023+ print (f"TTD-DR: Denoising iteration { iteration + 1 } /{ self .max_iterations } " )
1024+
1025+ # STEP 1: Analyze current draft for gaps (draft-guided search)
1026+ print (" - Analyzing draft gaps..." )
1027+ gaps = self .analyze_draft_gaps (self .current_draft , initial_query )
1028+ self .gap_analysis_history .append (gaps )
1029+
1030+ if not gaps :
1031+ print (" - No significant gaps found, research complete" )
1032+ break
1033+
1034+ # STEP 2: Perform gap-targeted retrieval
1035+ print (f" - Performing targeted search for { len (gaps )} gaps..." )
1036+ retrieval_content = self .perform_gap_targeted_search (gaps )
1037+
1038+ # STEP 3: Extract and fetch URLs from search results
1039+ print (" - Extracting and fetching content..." )
1040+ content_with_urls , sources = self .extract_and_fetch_urls (retrieval_content )
1041+
1042+ # Register sources for citations
1043+ for source in sources :
1044+ if 'url' in source :
1045+ self .citation_counter += 1
1046+ self .citations [self .citation_counter ] = source
1047+
1048+ # STEP 4: DENOISING - Integrate retrieved info with current draft
1049+ print (" - Performing denoising step..." )
1050+ previous_draft = self .current_draft
1051+ self .current_draft = self .denoise_draft_with_retrieval (
1052+ self .current_draft , content_with_urls , initial_query
1053+ )
1054+ self .draft_history .append (self .current_draft )
1055+
1056+ # STEP 5: Evaluate quality improvement
1057+ print (" - Evaluating draft quality..." )
1058+ quality_scores = self .evaluate_draft_quality (
1059+ self .current_draft , previous_draft , initial_query
1060+ )
1061+
1062+ # STEP 6: Component self-evolution based on feedback
1063+ self .update_component_fitness (quality_scores )
1064+
1065+ # STEP 7: Check termination conditions
1066+ completeness = quality_scores .get ('completeness' , 0.0 )
1067+ improvement = quality_scores .get ('improvement' , 0.0 )
1068+
1069+ print (f" - Quality scores: Completeness={ completeness :.2f} , Improvement={ improvement :.2f} " )
1070+
1071+ # Terminate if high quality achieved or minimal improvement
1072+ # More lenient termination to ensure complete research
1073+ if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7 ):
1074+ print (" - Quality threshold reached, research complete" )
1075+ break
1076+
1077+ # Store current state for tracking
1078+ self .research_state ["content" ].append (content_with_urls )
1079+ self .research_state ["sources" ].extend ([s ['url' ] for s in sources if 'url' in s ])
1080+
1081+ # PHASE 3: FINALIZATION - Polish the final draft
1082+ print ("TTD-DR: Finalizing research report..." )
1083+ final_report = self .finalize_research_report (system_prompt , initial_query , self .current_draft )
1084+
1085+ return final_report , self .total_tokens
1086+
1087+ finally :
1088+ # Clean up session manager reference
1089+ self .session_manager = None
10691090
10701091 def finalize_research_report (self , system_prompt : str , original_query : str , final_draft : str ) -> str :
10711092 """
0 commit comments