Skip to content

Commit e08d4ab

Browse files
committed
Add browser session manager for web searches
Introduces BrowserSessionManager to enable reuse of a single browser session across multiple web searches, improving efficiency and reliability. DeepResearcher now uses a shared browser session for all search operations within a research run, and web_search_plugin's run function supports session reuse via the new manager.
1 parent 239cdc7 commit e08d4ab

File tree

2 files changed

+174
-78
lines changed

2 files changed

+174
-78
lines changed

optillm/plugins/deep_research/research_engine.py

Lines changed: 94 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from typing import Tuple, List, Dict, Optional, Any
1515
from datetime import datetime
1616
from collections import defaultdict
17-
from optillm.plugins.web_search_plugin import run as web_search_run
17+
from optillm.plugins.web_search_plugin import run as web_search_run, BrowserSessionManager
1818
from optillm.plugins.readurls_plugin import run as readurls_run
1919
from optillm.plugins.memory_plugin import run as memory_run
2020

@@ -250,6 +250,7 @@ def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int
250250
"integration_ability": 1.0
251251
}
252252
self.gap_analysis_history = [] # Track identified gaps over time
253+
self.session_manager = None # Browser session manager for web searches
253254

254255
def cleanup_placeholder_tags(self, text: str) -> str:
255256
"""
@@ -386,6 +387,10 @@ def perform_web_search(self, queries: List[str]) -> str:
386387
"""
387388
all_results = []
388389

390+
# Check if session manager is available
391+
if not hasattr(self, 'session_manager'):
392+
self.session_manager = None
393+
389394
# Perform individual searches for each query to avoid truncation issues
390395
for i, query in enumerate(queries):
391396
try:
@@ -398,7 +403,8 @@ def perform_web_search(self, queries: List[str]) -> str:
398403
enhanced_query, _ = web_search_run("", search_query, None, None, {
399404
"num_results": results_per_query,
400405
"delay_seconds": None, # Use default random delay (4-32 seconds)
401-
"headless": False # Allow CAPTCHA solving if needed
406+
"headless": False, # Allow CAPTCHA solving if needed
407+
"session_manager": self.session_manager # Use shared browser session
402408
})
403409

404410
if enhanced_query and "Web Search Results" in enhanced_query:
@@ -710,6 +716,10 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
710716
"""
711717
all_results = []
712718

719+
# Check if session manager is available
720+
if not hasattr(self, 'session_manager'):
721+
self.session_manager = None
722+
713723
# Sort gaps by priority - HIGH priority first (placeholder tags)
714724
sorted_gaps = sorted(gaps, key=lambda g: (
715725
0 if g.get('priority', '').upper() == 'HIGH' else
@@ -729,7 +739,8 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
729739
enhanced_query, _ = web_search_run("", search_query, None, None, {
730740
"num_results": max(1, self.max_sources // len(gaps)),
731741
"delay_seconds": None, # Use default random delay (4-32 seconds)
732-
"headless": False
742+
"headless": False,
743+
"session_manager": self.session_manager # Use shared browser session
733744
})
734745

735746
if enhanced_query and "Web Search Results" in enhanced_query:
@@ -995,77 +1006,87 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]:
9951006
4. Quality-guided termination
9961007
"""
9971008

998-
# PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
999-
print("TTD-DR: Generating preliminary draft...")
1000-
self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query)
1001-
self.draft_history.append(self.current_draft)
1002-
1003-
# PHASE 2: ITERATIVE DENOISING LOOP
1004-
for iteration in range(self.max_iterations):
1005-
self.research_state["iteration"] = iteration + 1
1006-
print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}")
1007-
1008-
# STEP 1: Analyze current draft for gaps (draft-guided search)
1009-
print(" - Analyzing draft gaps...")
1010-
gaps = self.analyze_draft_gaps(self.current_draft, initial_query)
1011-
self.gap_analysis_history.append(gaps)
1012-
1013-
if not gaps:
1014-
print(" - No significant gaps found, research complete")
1015-
break
1016-
1017-
# STEP 2: Perform gap-targeted retrieval
1018-
print(f" - Performing targeted search for {len(gaps)} gaps...")
1019-
retrieval_content = self.perform_gap_targeted_search(gaps)
1020-
1021-
# STEP 3: Extract and fetch URLs from search results
1022-
print(" - Extracting and fetching content...")
1023-
content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content)
1024-
1025-
# Register sources for citations
1026-
for source in sources:
1027-
if 'url' in source:
1028-
self.citation_counter += 1
1029-
self.citations[self.citation_counter] = source
1030-
1031-
# STEP 4: DENOISING - Integrate retrieved info with current draft
1032-
print(" - Performing denoising step...")
1033-
previous_draft = self.current_draft
1034-
self.current_draft = self.denoise_draft_with_retrieval(
1035-
self.current_draft, content_with_urls, initial_query
1036-
)
1037-
self.draft_history.append(self.current_draft)
1038-
1039-
# STEP 5: Evaluate quality improvement
1040-
print(" - Evaluating draft quality...")
1041-
quality_scores = self.evaluate_draft_quality(
1042-
self.current_draft, previous_draft, initial_query
1043-
)
1044-
1045-
# STEP 6: Component self-evolution based on feedback
1046-
self.update_component_fitness(quality_scores)
1047-
1048-
# STEP 7: Check termination conditions
1049-
completeness = quality_scores.get('completeness', 0.0)
1050-
improvement = quality_scores.get('improvement', 0.0)
1051-
1052-
print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}")
1053-
1054-
# Terminate if high quality achieved or minimal improvement
1055-
# More lenient termination to ensure complete research
1056-
if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7):
1057-
print(" - Quality threshold reached, research complete")
1058-
break
1009+
# Use a single browser session for all searches in this research
1010+
with BrowserSessionManager(headless=False, timeout=30) as session_manager:
1011+
print("🔬 Starting deep research with single browser session")
1012+
self.session_manager = session_manager # Store for use in search methods
10591013

1060-
# Store current state for tracking
1061-
self.research_state["content"].append(content_with_urls)
1062-
self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s])
1063-
1064-
# PHASE 3: FINALIZATION - Polish the final draft
1065-
print("TTD-DR: Finalizing research report...")
1066-
final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft)
1067-
1068-
return final_report, self.total_tokens
1014+
try:
1015+
# PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
1016+
print("TTD-DR: Generating preliminary draft...")
1017+
self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query)
1018+
self.draft_history.append(self.current_draft)
1019+
1020+
# PHASE 2: ITERATIVE DENOISING LOOP
1021+
for iteration in range(self.max_iterations):
1022+
self.research_state["iteration"] = iteration + 1
1023+
print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}")
1024+
1025+
# STEP 1: Analyze current draft for gaps (draft-guided search)
1026+
print(" - Analyzing draft gaps...")
1027+
gaps = self.analyze_draft_gaps(self.current_draft, initial_query)
1028+
self.gap_analysis_history.append(gaps)
1029+
1030+
if not gaps:
1031+
print(" - No significant gaps found, research complete")
1032+
break
1033+
1034+
# STEP 2: Perform gap-targeted retrieval
1035+
print(f" - Performing targeted search for {len(gaps)} gaps...")
1036+
retrieval_content = self.perform_gap_targeted_search(gaps)
1037+
1038+
# STEP 3: Extract and fetch URLs from search results
1039+
print(" - Extracting and fetching content...")
1040+
content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content)
1041+
1042+
# Register sources for citations
1043+
for source in sources:
1044+
if 'url' in source:
1045+
self.citation_counter += 1
1046+
self.citations[self.citation_counter] = source
1047+
1048+
# STEP 4: DENOISING - Integrate retrieved info with current draft
1049+
print(" - Performing denoising step...")
1050+
previous_draft = self.current_draft
1051+
self.current_draft = self.denoise_draft_with_retrieval(
1052+
self.current_draft, content_with_urls, initial_query
1053+
)
1054+
self.draft_history.append(self.current_draft)
1055+
1056+
# STEP 5: Evaluate quality improvement
1057+
print(" - Evaluating draft quality...")
1058+
quality_scores = self.evaluate_draft_quality(
1059+
self.current_draft, previous_draft, initial_query
1060+
)
1061+
1062+
# STEP 6: Component self-evolution based on feedback
1063+
self.update_component_fitness(quality_scores)
1064+
1065+
# STEP 7: Check termination conditions
1066+
completeness = quality_scores.get('completeness', 0.0)
1067+
improvement = quality_scores.get('improvement', 0.0)
1068+
1069+
print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}")
1070+
1071+
# Terminate if high quality achieved or minimal improvement
1072+
# More lenient termination to ensure complete research
1073+
if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7):
1074+
print(" - Quality threshold reached, research complete")
1075+
break
1076+
1077+
# Store current state for tracking
1078+
self.research_state["content"].append(content_with_urls)
1079+
self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s])
1080+
1081+
# PHASE 3: FINALIZATION - Polish the final draft
1082+
print("TTD-DR: Finalizing research report...")
1083+
final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft)
1084+
1085+
return final_report, self.total_tokens
1086+
1087+
finally:
1088+
# Clean up session manager reference
1089+
self.session_manager = None
10691090

10701091
def finalize_research_report(self, system_prompt: str, original_query: str, final_draft: str) -> str:
10711092
"""

optillm/plugins/web_search_plugin.py

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,67 @@
1717

1818
SLUG = "web_search"
1919

20+
21+
class BrowserSessionManager:
22+
"""
23+
Manages a single browser session across multiple searches.
24+
Implements context manager for automatic cleanup.
25+
"""
26+
def __init__(self, headless: bool = False, timeout: int = 30):
27+
self.headless = headless
28+
self.timeout = timeout
29+
self._searcher = None
30+
self._search_count = 0
31+
self._session_start_time = None
32+
33+
def __enter__(self):
34+
"""Context manager entry - ensures browser is ready"""
35+
self.get_or_create_searcher()
36+
self._session_start_time = time.time()
37+
return self
38+
39+
def __exit__(self, exc_type, exc_val, exc_tb):
40+
"""Context manager exit - ensures browser cleanup"""
41+
self.close()
42+
return False # Don't suppress exceptions
43+
44+
def get_or_create_searcher(self) -> 'GoogleSearcher':
45+
"""Get existing searcher or create a new one"""
46+
if self._searcher is None:
47+
print("🌐 Creating new browser session for research...")
48+
self._searcher = GoogleSearcher(
49+
headless=self.headless,
50+
timeout=self.timeout
51+
)
52+
return self._searcher
53+
54+
def search(self, query: str, num_results: int = 10, delay_seconds: Optional[int] = None) -> List[Dict[str, str]]:
55+
"""Perform a search using the managed browser session"""
56+
searcher = self.get_or_create_searcher()
57+
self._search_count += 1
58+
print(f"🔍 Search #{self._search_count} in current session: {query[:50]}...")
59+
return searcher.search(query, num_results, delay_seconds)
60+
61+
def close(self):
62+
"""Close the browser session"""
63+
if self._searcher is not None:
64+
try:
65+
self._searcher.close()
66+
if self._session_start_time:
67+
duration = time.time() - self._session_start_time
68+
print(f"🏁 Browser session closed after {self._search_count} searches ({duration:.1f}s)")
69+
except Exception as e:
70+
print(f"⚠️ Error closing browser session: {e}")
71+
finally:
72+
self._searcher = None
73+
self._search_count = 0
74+
self._session_start_time = None
75+
76+
def is_active(self) -> bool:
77+
"""Check if browser session is active"""
78+
return self._searcher is not None and self._searcher.driver is not None
79+
80+
2081
class GoogleSearcher:
2182
def __init__(self, headless: bool = False, timeout: int = 30):
2283
self.timeout = timeout
@@ -459,31 +520,44 @@ def run(system_prompt: str, initial_query: str, client=None, model: str = None,
459520
Set to 0 to disable delays, or specify exact seconds
460521
- headless: Run browser in headless mode (default: False)
461522
- timeout: Browser timeout in seconds (default: 30)
523+
- session_manager: BrowserSessionManager instance for session reuse
462524
463525
Returns:
464526
Tuple of (enhanced_query_with_search_results, completion_tokens)
465527
"""
466528
# Parse configuration
467529
config = request_config or {}
468530
num_results = config.get("num_results", 10)
469-
delay_seconds = config.get("delay_seconds", None) # None means random 32-128
531+
delay_seconds = config.get("delay_seconds", None) # None means random 4-32
470532
headless = config.get("headless", False) # Default to non-headless
471533
timeout = config.get("timeout", 30) # Standard timeout
534+
session_manager = config.get("session_manager", None) # For session reuse
472535

473536
# Extract search queries from the input
474537
search_queries = extract_search_queries(initial_query)
475538

476539
if not search_queries:
477540
return initial_query, 0
478541

479-
searcher = None
542+
# Determine if we should manage the browser lifecycle
543+
own_session = session_manager is None
544+
480545
try:
481-
searcher = GoogleSearcher(headless=headless, timeout=timeout)
546+
# Use provided session manager or create temporary one
547+
if own_session:
548+
# Create temporary searcher for standalone use
549+
searcher = GoogleSearcher(headless=headless, timeout=timeout)
550+
482551
enhanced_query = initial_query
483552

484553
for query in search_queries:
485554
# Perform the search
486-
results = searcher.search(query, num_results=num_results, delay_seconds=delay_seconds)
555+
if session_manager:
556+
# Use session manager's search method
557+
results = session_manager.search(query, num_results=num_results, delay_seconds=delay_seconds)
558+
else:
559+
# Use temporary searcher
560+
results = searcher.search(query, num_results=num_results, delay_seconds=delay_seconds)
487561

488562
# Format results
489563
if results:
@@ -502,5 +576,6 @@ def run(system_prompt: str, initial_query: str, client=None, model: str = None,
502576
return enhanced_query, 0
503577

504578
finally:
505-
if searcher:
579+
# Only close if we created our own searcher
580+
if own_session and 'searcher' in locals():
506581
searcher.close()

0 commit comments

Comments
 (0)