Skip to content

Commit 85c205a

Browse files
committed
Add session state management for deep research
Introduces session_state.py to manage browser sessions for concurrent deep research requests, ensuring thread safety and proper cleanup. Updates DeepResearcher to use unique session IDs and centralized session management, and improves search query extraction logic in web_search_plugin.py for more robust handling of search commands.
1 parent 77d5162 commit 85c205a

File tree

3 files changed

+253
-96
lines changed

3 files changed

+253
-96
lines changed

optillm/plugins/deep_research/research_engine.py

Lines changed: 92 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
from optillm.plugins.web_search_plugin import run as web_search_run, BrowserSessionManager
1818
from optillm.plugins.readurls_plugin import run as readurls_run
1919
from optillm.plugins.memory_plugin import run as memory_run
20+
from optillm.plugins.deep_research.session_state import get_session_manager, close_session
21+
import uuid
2022

2123

2224
def clean_reasoning_tags(text: str) -> str:
@@ -228,6 +230,8 @@ def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int
228230
self.model = model
229231
self.max_iterations = max_iterations
230232
self.max_sources = max_sources
233+
self.session_id = str(uuid.uuid4()) # Unique session ID for this research
234+
self.session_manager = None # Will be set when research starts
231235
self.research_state = {
232236
"queries": [],
233237
"sources": [],
@@ -388,8 +392,12 @@ def perform_web_search(self, queries: List[str]) -> str:
388392
all_results = []
389393

390394
# Check if session manager is available
391-
if not hasattr(self, 'session_manager'):
395+
if not hasattr(self, 'session_manager') or self.session_manager is None:
396+
# Log warning - this shouldn't happen in normal flow
397+
print(f"⚠️ Warning: session_manager not available in perform_web_search (session_id: {getattr(self, 'session_id', 'N/A')})")
392398
self.session_manager = None
399+
else:
400+
print(f"📊 Using existing session manager for web search (session_id: {self.session_id}, manager: {id(self.session_manager)})")
393401

394402
# Perform individual searches for each query to avoid truncation issues
395403
for i, query in enumerate(queries):
@@ -717,7 +725,9 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str:
717725
all_results = []
718726

719727
# Check if session manager is available
720-
if not hasattr(self, 'session_manager'):
728+
if not hasattr(self, 'session_manager') or self.session_manager is None:
729+
# Log warning - this shouldn't happen in normal flow
730+
print("⚠️ Warning: session_manager not available in perform_web_search")
721731
self.session_manager = None
722732

723733
# Sort gaps by priority - HIGH priority first (placeholder tags)
@@ -1006,86 +1016,91 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]:
10061016
4. Quality-guided termination
10071017
"""
10081018

1009-
# Use a single browser session for all searches in this research
1010-
with BrowserSessionManager(headless=False, timeout=30) as session_manager:
1011-
print("🔬 Starting deep research with single browser session")
1012-
self.session_manager = session_manager # Store for use in search methods
1019+
# Get or create a browser session for this research session
1020+
self.session_manager = get_session_manager(self.session_id, headless=False, timeout=30)
1021+
if self.session_manager:
1022+
print(f"🔬 Starting deep research with session ID: {self.session_id} (DeepResearcher instance: {id(self)})")
1023+
else:
1024+
print("⚠️ Failed to create browser session, proceeding without web search")
10131025

1014-
try:
1015-
# PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
1016-
print("TTD-DR: Generating preliminary draft...")
1017-
self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query)
1026+
try:
1027+
# PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton)
1028+
print("TTD-DR: Generating preliminary draft...")
1029+
self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query)
1030+
self.draft_history.append(self.current_draft)
1031+
1032+
# PHASE 2: ITERATIVE DENOISING LOOP
1033+
for iteration in range(self.max_iterations):
1034+
self.research_state["iteration"] = iteration + 1
1035+
print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}")
1036+
1037+
# STEP 1: Analyze current draft for gaps (draft-guided search)
1038+
print(" - Analyzing draft gaps...")
1039+
gaps = self.analyze_draft_gaps(self.current_draft, initial_query)
1040+
self.gap_analysis_history.append(gaps)
1041+
1042+
if not gaps:
1043+
print(" - No significant gaps found, research complete")
1044+
break
1045+
1046+
# STEP 2: Perform gap-targeted retrieval
1047+
print(f" - Performing targeted search for {len(gaps)} gaps...")
1048+
retrieval_content = self.perform_gap_targeted_search(gaps)
1049+
1050+
# STEP 3: Extract and fetch URLs from search results
1051+
print(" - Extracting and fetching content...")
1052+
content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content)
1053+
1054+
# Register sources for citations
1055+
for source in sources:
1056+
if 'url' in source:
1057+
self.citation_counter += 1
1058+
self.citations[self.citation_counter] = source
1059+
1060+
# STEP 4: DENOISING - Integrate retrieved info with current draft
1061+
print(" - Performing denoising step...")
1062+
previous_draft = self.current_draft
1063+
self.current_draft = self.denoise_draft_with_retrieval(
1064+
self.current_draft, content_with_urls, initial_query
1065+
)
10181066
self.draft_history.append(self.current_draft)
1019-
1020-
# PHASE 2: ITERATIVE DENOISING LOOP
1021-
for iteration in range(self.max_iterations):
1022-
self.research_state["iteration"] = iteration + 1
1023-
print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}")
1024-
1025-
# STEP 1: Analyze current draft for gaps (draft-guided search)
1026-
print(" - Analyzing draft gaps...")
1027-
gaps = self.analyze_draft_gaps(self.current_draft, initial_query)
1028-
self.gap_analysis_history.append(gaps)
1029-
1030-
if not gaps:
1031-
print(" - No significant gaps found, research complete")
1032-
break
1033-
1034-
# STEP 2: Perform gap-targeted retrieval
1035-
print(f" - Performing targeted search for {len(gaps)} gaps...")
1036-
retrieval_content = self.perform_gap_targeted_search(gaps)
1037-
1038-
# STEP 3: Extract and fetch URLs from search results
1039-
print(" - Extracting and fetching content...")
1040-
content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content)
1041-
1042-
# Register sources for citations
1043-
for source in sources:
1044-
if 'url' in source:
1045-
self.citation_counter += 1
1046-
self.citations[self.citation_counter] = source
1047-
1048-
# STEP 4: DENOISING - Integrate retrieved info with current draft
1049-
print(" - Performing denoising step...")
1050-
previous_draft = self.current_draft
1051-
self.current_draft = self.denoise_draft_with_retrieval(
1052-
self.current_draft, content_with_urls, initial_query
1053-
)
1054-
self.draft_history.append(self.current_draft)
1055-
1056-
# STEP 5: Evaluate quality improvement
1057-
print(" - Evaluating draft quality...")
1058-
quality_scores = self.evaluate_draft_quality(
1059-
self.current_draft, previous_draft, initial_query
1060-
)
1061-
1062-
# STEP 6: Component self-evolution based on feedback
1063-
self.update_component_fitness(quality_scores)
1064-
1065-
# STEP 7: Check termination conditions
1066-
completeness = quality_scores.get('completeness', 0.0)
1067-
improvement = quality_scores.get('improvement', 0.0)
1068-
1069-
print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}")
1070-
1071-
# Terminate if high quality achieved or minimal improvement
1072-
# More lenient termination to ensure complete research
1073-
if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7):
1074-
print(" - Quality threshold reached, research complete")
1075-
break
1076-
1077-
# Store current state for tracking
1078-
self.research_state["content"].append(content_with_urls)
1079-
self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s])
10801067

1081-
# PHASE 3: FINALIZATION - Polish the final draft
1082-
print("TTD-DR: Finalizing research report...")
1083-
final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft)
1068+
# STEP 5: Evaluate quality improvement
1069+
print(" - Evaluating draft quality...")
1070+
quality_scores = self.evaluate_draft_quality(
1071+
self.current_draft, previous_draft, initial_query
1072+
)
1073+
1074+
# STEP 6: Component self-evolution based on feedback
1075+
self.update_component_fitness(quality_scores)
1076+
1077+
# STEP 7: Check termination conditions
1078+
completeness = quality_scores.get('completeness', 0.0)
1079+
improvement = quality_scores.get('improvement', 0.0)
1080+
1081+
print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}")
10841082

1085-
return final_report, self.total_tokens
1083+
# Terminate if high quality achieved or minimal improvement
1084+
# More lenient termination to ensure complete research
1085+
if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7):
1086+
print(" - Quality threshold reached, research complete")
1087+
break
1088+
1089+
# Store current state for tracking
1090+
self.research_state["content"].append(content_with_urls)
1091+
self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s])
1092+
1093+
# PHASE 3: FINALIZATION - Polish the final draft
1094+
print("TTD-DR: Finalizing research report...")
1095+
final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft)
1096+
1097+
return final_report, self.total_tokens
10861098

1087-
finally:
1088-
# Clean up session manager reference
1099+
finally:
1100+
# Clean up browser session
1101+
if self.session_manager:
1102+
print(f"🏁 Closing research session: {self.session_id}")
1103+
close_session(self.session_id)
10891104
self.session_manager = None
10901105

10911106
def finalize_research_report(self, system_prompt: str, original_query: str, final_draft: str) -> str:
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
"""
2+
Session state management for deep research to handle concurrent requests
3+
"""
4+
5+
import threading
6+
import time
7+
from typing import Dict, Optional
8+
from optillm.plugins.web_search_plugin import BrowserSessionManager
9+
10+
class ResearchSessionState:
11+
"""
12+
Thread-safe session state manager for deep research.
13+
Ensures only one browser session is active per research query.
14+
"""
15+
def __init__(self):
16+
self._sessions: Dict[str, BrowserSessionManager] = {}
17+
self._lock = threading.Lock()
18+
self._session_timestamps: Dict[str, float] = {}
19+
self._max_session_age = 300 # 5 minutes
20+
21+
def get_or_create_session(self, session_id: str, headless: bool = False, timeout: int = 30) -> Optional[BrowserSessionManager]:
22+
"""
23+
Get an existing session or create a new one for the given session ID.
24+
"""
25+
with self._lock:
26+
print(f"🔍 Session state: {len(self._sessions)} active sessions, checking for ID: {session_id}")
27+
28+
# Clean up old sessions
29+
self._cleanup_old_sessions()
30+
31+
# Check if session exists and is active
32+
if session_id in self._sessions:
33+
session = self._sessions[session_id]
34+
print(f"📋 Found existing session for ID: {session_id}, active: {session.is_active()}, instance: {id(session)}")
35+
if session.is_active():
36+
print(f"♻️ Reusing existing browser session for research ID: {session_id}")
37+
return session
38+
else:
39+
# Session exists but is not active, remove it
40+
print(f"🔄 Removing inactive session for research ID: {session_id}")
41+
del self._sessions[session_id]
42+
if session_id in self._session_timestamps:
43+
del self._session_timestamps[session_id]
44+
45+
# Create new session
46+
print(f"🌐 Creating new browser session for research ID: {session_id}")
47+
session = BrowserSessionManager(headless=headless, timeout=timeout)
48+
session.get_or_create_searcher() # Initialize the browser
49+
50+
self._sessions[session_id] = session
51+
self._session_timestamps[session_id] = time.time()
52+
53+
print(f"✅ Created new session instance: {id(session)} for ID: {session_id}")
54+
print(f"📊 Total active sessions: {len(self._sessions)}")
55+
56+
return session
57+
58+
def remove_session(self, session_id: str):
59+
"""
60+
Remove and close a session.
61+
"""
62+
with self._lock:
63+
if session_id in self._sessions:
64+
session = self._sessions[session_id]
65+
try:
66+
session.close()
67+
except Exception as e:
68+
print(f"⚠️ Error closing session {session_id}: {e}")
69+
70+
del self._sessions[session_id]
71+
if session_id in self._session_timestamps:
72+
del self._session_timestamps[session_id]
73+
74+
print(f"🏁 Removed session for research ID: {session_id}")
75+
76+
def _cleanup_old_sessions(self):
77+
"""
78+
Clean up sessions older than max_session_age.
79+
"""
80+
current_time = time.time()
81+
sessions_to_remove = []
82+
83+
for session_id, timestamp in self._session_timestamps.items():
84+
if current_time - timestamp > self._max_session_age:
85+
sessions_to_remove.append(session_id)
86+
87+
for session_id in sessions_to_remove:
88+
print(f"🧹 Cleaning up old session: {session_id}")
89+
if session_id in self._sessions:
90+
try:
91+
self._sessions[session_id].close()
92+
except:
93+
pass
94+
del self._sessions[session_id]
95+
del self._session_timestamps[session_id]
96+
97+
98+
# Global session state instance
99+
_session_state = ResearchSessionState()
100+
101+
102+
def get_session_manager(session_id: str, headless: bool = False, timeout: int = 30) -> Optional[BrowserSessionManager]:
103+
"""
104+
Get or create a browser session for the given session ID.
105+
"""
106+
return _session_state.get_or_create_session(session_id, headless, timeout)
107+
108+
109+
def close_session(session_id: str):
110+
"""
111+
Close and remove a session.
112+
"""
113+
_session_state.remove_session(session_id)

0 commit comments

Comments
 (0)