|
17 | 17 | from optillm.plugins.web_search_plugin import run as web_search_run, BrowserSessionManager |
18 | 18 | from optillm.plugins.readurls_plugin import run as readurls_run |
19 | 19 | from optillm.plugins.memory_plugin import run as memory_run |
| 20 | +from optillm.plugins.deep_research.session_state import get_session_manager, close_session |
| 21 | +import uuid |
20 | 22 |
|
21 | 23 |
|
22 | 24 | def clean_reasoning_tags(text: str) -> str: |
@@ -228,6 +230,8 @@ def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int |
228 | 230 | self.model = model |
229 | 231 | self.max_iterations = max_iterations |
230 | 232 | self.max_sources = max_sources |
| 233 | + self.session_id = str(uuid.uuid4()) # Unique session ID for this research |
| 234 | + self.session_manager = None # Will be set when research starts |
231 | 235 | self.research_state = { |
232 | 236 | "queries": [], |
233 | 237 | "sources": [], |
@@ -388,8 +392,12 @@ def perform_web_search(self, queries: List[str]) -> str: |
388 | 392 | all_results = [] |
389 | 393 |
|
390 | 394 | # Check if session manager is available |
391 | | - if not hasattr(self, 'session_manager'): |
| 395 | + if not hasattr(self, 'session_manager') or self.session_manager is None: |
| 396 | + # Log warning - this shouldn't happen in normal flow |
| 397 | + print(f"⚠️ Warning: session_manager not available in perform_web_search (session_id: {getattr(self, 'session_id', 'N/A')})") |
392 | 398 | self.session_manager = None |
| 399 | + else: |
| 400 | + print(f"📊 Using existing session manager for web search (session_id: {self.session_id}, manager: {id(self.session_manager)})") |
393 | 401 |
|
394 | 402 | # Perform individual searches for each query to avoid truncation issues |
395 | 403 | for i, query in enumerate(queries): |
@@ -717,7 +725,9 @@ def perform_gap_targeted_search(self, gaps: List[Dict[str, str]]) -> str: |
717 | 725 | all_results = [] |
718 | 726 |
|
719 | 727 | # Check if session manager is available |
720 | | - if not hasattr(self, 'session_manager'): |
| 728 | + if not hasattr(self, 'session_manager') or self.session_manager is None: |
| 729 | + # Log warning - this shouldn't happen in normal flow |
| 730 | + print("⚠️ Warning: session_manager not available in perform_web_search") |
721 | 731 | self.session_manager = None |
722 | 732 |
|
723 | 733 | # Sort gaps by priority - HIGH priority first (placeholder tags) |
@@ -1006,86 +1016,91 @@ def research(self, system_prompt: str, initial_query: str) -> Tuple[str, int]: |
1006 | 1016 | 4. Quality-guided termination |
1007 | 1017 | """ |
1008 | 1018 |
|
1009 | | - # Use a single browser session for all searches in this research |
1010 | | - with BrowserSessionManager(headless=False, timeout=30) as session_manager: |
1011 | | - print("🔬 Starting deep research with single browser session") |
1012 | | - self.session_manager = session_manager # Store for use in search methods |
| 1019 | + # Get or create a browser session for this research session |
| 1020 | + self.session_manager = get_session_manager(self.session_id, headless=False, timeout=30) |
| 1021 | + if self.session_manager: |
| 1022 | + print(f"🔬 Starting deep research with session ID: {self.session_id} (DeepResearcher instance: {id(self)})") |
| 1023 | + else: |
| 1024 | + print("⚠️ Failed to create browser session, proceeding without web search") |
1013 | 1025 |
|
1014 | | - try: |
1015 | | - # PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton) |
1016 | | - print("TTD-DR: Generating preliminary draft...") |
1017 | | - self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query) |
| 1026 | + try: |
| 1027 | + # PHASE 1: INITIALIZATION - Generate preliminary draft (updatable skeleton) |
| 1028 | + print("TTD-DR: Generating preliminary draft...") |
| 1029 | + self.current_draft = self.generate_preliminary_draft(system_prompt, initial_query) |
| 1030 | + self.draft_history.append(self.current_draft) |
| 1031 | + |
| 1032 | + # PHASE 2: ITERATIVE DENOISING LOOP |
| 1033 | + for iteration in range(self.max_iterations): |
| 1034 | + self.research_state["iteration"] = iteration + 1 |
| 1035 | + print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}") |
| 1036 | + |
| 1037 | + # STEP 1: Analyze current draft for gaps (draft-guided search) |
| 1038 | + print(" - Analyzing draft gaps...") |
| 1039 | + gaps = self.analyze_draft_gaps(self.current_draft, initial_query) |
| 1040 | + self.gap_analysis_history.append(gaps) |
| 1041 | + |
| 1042 | + if not gaps: |
| 1043 | + print(" - No significant gaps found, research complete") |
| 1044 | + break |
| 1045 | + |
| 1046 | + # STEP 2: Perform gap-targeted retrieval |
| 1047 | + print(f" - Performing targeted search for {len(gaps)} gaps...") |
| 1048 | + retrieval_content = self.perform_gap_targeted_search(gaps) |
| 1049 | + |
| 1050 | + # STEP 3: Extract and fetch URLs from search results |
| 1051 | + print(" - Extracting and fetching content...") |
| 1052 | + content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content) |
| 1053 | + |
| 1054 | + # Register sources for citations |
| 1055 | + for source in sources: |
| 1056 | + if 'url' in source: |
| 1057 | + self.citation_counter += 1 |
| 1058 | + self.citations[self.citation_counter] = source |
| 1059 | + |
| 1060 | + # STEP 4: DENOISING - Integrate retrieved info with current draft |
| 1061 | + print(" - Performing denoising step...") |
| 1062 | + previous_draft = self.current_draft |
| 1063 | + self.current_draft = self.denoise_draft_with_retrieval( |
| 1064 | + self.current_draft, content_with_urls, initial_query |
| 1065 | + ) |
1018 | 1066 | self.draft_history.append(self.current_draft) |
1019 | | - |
1020 | | - # PHASE 2: ITERATIVE DENOISING LOOP |
1021 | | - for iteration in range(self.max_iterations): |
1022 | | - self.research_state["iteration"] = iteration + 1 |
1023 | | - print(f"TTD-DR: Denoising iteration {iteration + 1}/{self.max_iterations}") |
1024 | | - |
1025 | | - # STEP 1: Analyze current draft for gaps (draft-guided search) |
1026 | | - print(" - Analyzing draft gaps...") |
1027 | | - gaps = self.analyze_draft_gaps(self.current_draft, initial_query) |
1028 | | - self.gap_analysis_history.append(gaps) |
1029 | | - |
1030 | | - if not gaps: |
1031 | | - print(" - No significant gaps found, research complete") |
1032 | | - break |
1033 | | - |
1034 | | - # STEP 2: Perform gap-targeted retrieval |
1035 | | - print(f" - Performing targeted search for {len(gaps)} gaps...") |
1036 | | - retrieval_content = self.perform_gap_targeted_search(gaps) |
1037 | | - |
1038 | | - # STEP 3: Extract and fetch URLs from search results |
1039 | | - print(" - Extracting and fetching content...") |
1040 | | - content_with_urls, sources = self.extract_and_fetch_urls(retrieval_content) |
1041 | | - |
1042 | | - # Register sources for citations |
1043 | | - for source in sources: |
1044 | | - if 'url' in source: |
1045 | | - self.citation_counter += 1 |
1046 | | - self.citations[self.citation_counter] = source |
1047 | | - |
1048 | | - # STEP 4: DENOISING - Integrate retrieved info with current draft |
1049 | | - print(" - Performing denoising step...") |
1050 | | - previous_draft = self.current_draft |
1051 | | - self.current_draft = self.denoise_draft_with_retrieval( |
1052 | | - self.current_draft, content_with_urls, initial_query |
1053 | | - ) |
1054 | | - self.draft_history.append(self.current_draft) |
1055 | | - |
1056 | | - # STEP 5: Evaluate quality improvement |
1057 | | - print(" - Evaluating draft quality...") |
1058 | | - quality_scores = self.evaluate_draft_quality( |
1059 | | - self.current_draft, previous_draft, initial_query |
1060 | | - ) |
1061 | | - |
1062 | | - # STEP 6: Component self-evolution based on feedback |
1063 | | - self.update_component_fitness(quality_scores) |
1064 | | - |
1065 | | - # STEP 7: Check termination conditions |
1066 | | - completeness = quality_scores.get('completeness', 0.0) |
1067 | | - improvement = quality_scores.get('improvement', 0.0) |
1068 | | - |
1069 | | - print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}") |
1070 | | - |
1071 | | - # Terminate if high quality achieved or minimal improvement |
1072 | | - # More lenient termination to ensure complete research |
1073 | | - if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7): |
1074 | | - print(" - Quality threshold reached, research complete") |
1075 | | - break |
1076 | | - |
1077 | | - # Store current state for tracking |
1078 | | - self.research_state["content"].append(content_with_urls) |
1079 | | - self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s]) |
1080 | 1067 |
|
1081 | | - # PHASE 3: FINALIZATION - Polish the final draft |
1082 | | - print("TTD-DR: Finalizing research report...") |
1083 | | - final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft) |
| 1068 | + # STEP 5: Evaluate quality improvement |
| 1069 | + print(" - Evaluating draft quality...") |
| 1070 | + quality_scores = self.evaluate_draft_quality( |
| 1071 | + self.current_draft, previous_draft, initial_query |
| 1072 | + ) |
| 1073 | + |
| 1074 | + # STEP 6: Component self-evolution based on feedback |
| 1075 | + self.update_component_fitness(quality_scores) |
| 1076 | + |
| 1077 | + # STEP 7: Check termination conditions |
| 1078 | + completeness = quality_scores.get('completeness', 0.0) |
| 1079 | + improvement = quality_scores.get('improvement', 0.0) |
| 1080 | + |
| 1081 | + print(f" - Quality scores: Completeness={completeness:.2f}, Improvement={improvement:.2f}") |
1084 | 1082 |
|
1085 | | - return final_report, self.total_tokens |
| 1083 | + # Terminate if high quality achieved or minimal improvement |
| 1084 | + # More lenient termination to ensure complete research |
| 1085 | + if completeness > 0.9 or (improvement < 0.03 and completeness > 0.7): |
| 1086 | + print(" - Quality threshold reached, research complete") |
| 1087 | + break |
| 1088 | + |
| 1089 | + # Store current state for tracking |
| 1090 | + self.research_state["content"].append(content_with_urls) |
| 1091 | + self.research_state["sources"].extend([s['url'] for s in sources if 'url' in s]) |
| 1092 | + |
| 1093 | + # PHASE 3: FINALIZATION - Polish the final draft |
| 1094 | + print("TTD-DR: Finalizing research report...") |
| 1095 | + final_report = self.finalize_research_report(system_prompt, initial_query, self.current_draft) |
| 1096 | + |
| 1097 | + return final_report, self.total_tokens |
1086 | 1098 |
|
1087 | | - finally: |
1088 | | - # Clean up session manager reference |
| 1099 | + finally: |
| 1100 | + # Clean up browser session |
| 1101 | + if self.session_manager: |
| 1102 | + print(f"🏁 Closing research session: {self.session_id}") |
| 1103 | + close_session(self.session_id) |
1089 | 1104 | self.session_manager = None |
1090 | 1105 |
|
1091 | 1106 | def finalize_research_report(self, system_prompt: str, original_query: str, final_draft: str) -> str: |
|
0 commit comments