Skip to content

Commit c95f643

Browse files
committed
Improve timeout handling and robustness in plugins
Extended timeout and retry logic for Gradio chat and deep research plugins to support long-running operations. Enhanced DeepResearcher prompts for more explicit gap analysis and research needs. Improved browser session recovery in web search plugin to handle invalidated sessions and prevent crashes. Updated default iteration and source limits for deep research to balance speed and coverage.
1 parent 85c205a commit c95f643

File tree

4 files changed

+227
-53
lines changed

4 files changed

+227
-53
lines changed

optillm.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,11 @@ def parse_args():
773773
if extra and extra[0]: # Check if there are choices for this argument
774774
parser.add_argument(arg, type=type_, default=default, help=help_text, choices=extra[0])
775775
else:
776-
parser.add_argument(arg, type=type_, default=default, help=help_text)
776+
if type_ == bool:
777+
# For boolean flags, use store_true action
778+
parser.add_argument(arg, action='store_true', default=default, help=help_text)
779+
else:
780+
parser.add_argument(arg, type=type_, default=default, help=help_text)
777781

778782
# Special handling for best_of_n to support both formats
779783
best_of_n_default = int(os.environ.get("OPTILLM_BEST_OF_N", 3))
@@ -855,12 +859,45 @@ def main():
855859
base_url = f"http://localhost:{port}/v1"
856860
logger.info(f"Launching Gradio interface connected to {base_url}")
857861

858-
# Launch Gradio interface
859-
demo = gr.load_chat(
860-
base_url,
861-
model=server_config['model'],
862-
token=None
862+
# Create custom chat function with extended timeout
863+
def chat_with_optillm(message, history):
864+
import httpx
865+
from openai import OpenAI
866+
867+
# Create client with extended timeout and no retries
868+
custom_client = OpenAI(
869+
api_key="optillm",
870+
base_url=base_url,
871+
timeout=httpx.Timeout(1800.0, connect=5.0), # 30 min timeout
872+
max_retries=0 # No retries - prevents duplicate requests
873+
)
874+
875+
# Convert history to messages format
876+
messages = []
877+
for h in history:
878+
if h[0]: # User message
879+
messages.append({"role": "user", "content": h[0]})
880+
if h[1]: # Assistant message
881+
messages.append({"role": "assistant", "content": h[1]})
882+
messages.append({"role": "user", "content": message})
883+
884+
# Make request
885+
try:
886+
response = custom_client.chat.completions.create(
887+
model=server_config['model'],
888+
messages=messages
889+
)
890+
return response.choices[0].message.content
891+
except Exception as e:
892+
return f"Error: {str(e)}"
893+
894+
# Create Gradio interface with queue for long operations
895+
demo = gr.ChatInterface(
896+
chat_with_optillm,
897+
title="OptILLM Chat Interface",
898+
description=f"Connected to OptILLM proxy at {base_url}"
863899
)
900+
demo.queue() # Enable queue to handle long operations properly
864901
demo.launch(server_name="0.0.0.0", share=False)
865902
except ImportError:
866903
logger.error("Gradio is required for GUI. Install it with: pip install gradio")

optillm/plugins/deep_research/research_engine.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class DeepResearcher:
225225
Based on: https://arxiv.org/abs/2507.16075v1
226226
"""
227227

228-
def __init__(self, client, model: str, max_iterations: int = 8, max_sources: int = 15):
228+
def __init__(self, client, model: str, max_iterations: int = 5, max_sources: int = 30):
229229
self.client = client
230230
self.model = model
231231
self.max_iterations = max_iterations
@@ -606,10 +606,17 @@ def generate_preliminary_draft(self, system_prompt: str, initial_query: str) ->
606606
5. Research Questions for Investigation
607607
6. Conclusion (preliminary thoughts)
608608
609-
Mark sections that need external research with [NEEDS RESEARCH] tags.
610-
Use placeholder citations like [SOURCE NEEDED] where external evidence is required.
609+
IMPORTANT: You MUST mark multiple areas that need external research with [NEEDS RESEARCH] tags.
610+
Every claim that would benefit from external evidence should have [SOURCE NEEDED].
611+
This is a preliminary draft - it should have many gaps for iterative improvement.
611612
612-
This is an initial draft - it should be substantive but acknowledge limitations.
613+
Example of proper marking:
614+
- "Recent studies show [SOURCE NEEDED] that quantum computing..."
615+
- "The economic impact [NEEDS RESEARCH: current market data] is significant..."
616+
- "Historical context [NEEDS RESEARCH: specific timeline and events] shows..."
617+
618+
Include AT LEAST 5-10 [NEEDS RESEARCH] or [SOURCE NEEDED] tags throughout the draft.
619+
Be explicit about what you don't know and what needs external validation.
613620
"""
614621

615622
try:
@@ -639,23 +646,27 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
639646
"""
640647
gap_analysis_prompt = f"""
641648
Analyze the following research draft to identify specific gaps and areas that need external research.
642-
Pay special attention to any placeholder tags like [NEEDS RESEARCH], [SOURCE NEEDED], etc.
649+
Be thorough and aggressive in finding areas for improvement - even good drafts can be enhanced.
643650
644651
Original Query: {original_query}
645652
646653
Current Draft:
647654
{current_draft}
648655
649-
PRIORITY ANALYSIS:
650-
1. First, identify any [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] or similar placeholder tags
651-
2. Then identify other substantial gaps in content, evidence, or depth
656+
CRITICAL ANALYSIS REQUIRED:
657+
1. MANDATORY: Find ALL [NEEDS RESEARCH], [SOURCE NEEDED], [CITATION NEEDED] tags
658+
2. Identify claims lacking evidence (even if not explicitly marked)
659+
3. Find areas that could benefit from recent data or statistics
660+
4. Spot generalizations that need specific examples
661+
5. Locate outdated information or areas needing current updates
662+
6. Identify missing perspectives or counterarguments
652663
653664
For each gap you identify, provide:
654665
1. SECTION: Which section has the gap
655-
2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES]
666+
2. GAP_TYPE: [PLACEHOLDER_TAG, MISSING_INFO, OUTDATED_INFO, NEEDS_EVIDENCE, LACKS_DEPTH, NEEDS_EXAMPLES, MISSING_PERSPECTIVE]
656667
3. SPECIFIC_NEED: Exactly what information is needed
657-
4. SEARCH_QUERY: A specific search query to address this gap
658-
5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags that need immediate resolution
668+
4. SEARCH_QUERY: A specific, targeted search query to address this gap
669+
5. PRIORITY: [HIGH, MEDIUM, LOW] - HIGH for placeholder tags and critical missing info
659670
660671
Format each gap as:
661672
GAP_ID: [number]
@@ -665,7 +676,9 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
665676
SEARCH_QUERY: [search query to find this info]
666677
PRIORITY: [priority level]
667678
668-
Identify 3-6 most critical gaps, prioritizing any placeholder tags that need resolution.
679+
IMPORTANT: Identify AT LEAST 3-8 gaps. Be critical and thorough.
680+
Even well-written sections can benefit from additional evidence, examples, or perspectives.
681+
Push for depth, accuracy, and comprehensiveness in the research.
669682
"""
670683

671684
try:
@@ -701,6 +714,8 @@ def analyze_draft_gaps(self, current_draft: str, original_query: str) -> List[Di
701714
current_gap['specific_need'] = line.split(':', 1)[1].strip()
702715
elif line.startswith('SEARCH_QUERY:'):
703716
current_gap['search_query'] = line.split(':', 1)[1].strip()
717+
elif line.startswith('PRIORITY:'):
718+
current_gap['priority'] = line.split(':', 1)[1].strip()
704719

705720
if current_gap:
706721
gaps.append(current_gap)
@@ -960,6 +975,7 @@ def generate_structured_report(self, system_prompt: str, original_query: str, sy
960975
8. Group related citations together when appropriate [1,2,3]
961976
9. Ensure the Executive Summary captures the essence of the entire report
962977
10. Make recommendations specific and actionable
978+
11. DO NOT create a References section - it will be added automatically
963979
"""
964980

965981
try:
@@ -978,6 +994,12 @@ def generate_structured_report(self, system_prompt: str, original_query: str, sy
978994
report_content = clean_reasoning_tags(report_content)
979995
self.total_tokens += response.usage.completion_tokens
980996

997+
# Remove any References section the LLM might have created
998+
# This prevents duplicate reference sections
999+
report_content = re.sub(r'##\s*References.*?(?=##|\Z)', '', report_content, flags=re.DOTALL)
1000+
report_content = re.sub(r'(?m)^References\s*\n\s*(?:\[\d+\]\s*\n)+', '', report_content)
1001+
report_content = re.sub(r'\n\s*\n\s*\n+', '\n\n', report_content) # Clean up extra newlines
1002+
9811003
# Add references section with proper formatting
9821004
references = "\n\n## References\n\n"
9831005
for num, source in sorted(self.citations.items()):
@@ -1132,6 +1154,7 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11321154
- If sections are incomplete, either complete them with available information or remove them entirely
11331155
- Ensure all statements are backed by available evidence or are clearly marked as preliminary findings
11341156
- The report must be publication-ready with no incomplete elements
1157+
- DO NOT create a References section - it will be added automatically
11351158
11361159
Return the final polished research report.
11371160
"""
@@ -1168,6 +1191,11 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11681191

11691192
self.total_tokens += response.usage.completion_tokens
11701193

1194+
# Remove any References section the LLM might have created
1195+
polished_report = re.sub(r'##\s*References.*?(?=##|\Z)', '', polished_report, flags=re.DOTALL)
1196+
polished_report = re.sub(r'(?m)^References\s*\n\s*(?:\[\d+\]\s*\n)+', '', polished_report)
1197+
polished_report = re.sub(r'\n\s*\n\s*\n+', '\n\n', polished_report) # Clean up extra newlines
1198+
11711199
# Add references section
11721200
references = "\n\n## References\n\n"
11731201
for num, source in sorted(self.citations.items()):
@@ -1179,7 +1207,7 @@ def finalize_research_report(self, system_prompt: str, original_query: str, fina
11791207
# Add TTD-DR metadata
11801208
metadata = "\n---\n\n**TTD-DR Research Metadata:**\n"
11811209
metadata += f"- Algorithm: Test-Time Diffusion Deep Researcher\n"
1182-
metadata += f"- Denoising iterations: {len(self.draft_history)}\n"
1210+
metadata += f"- Denoising iterations: {len(self.draft_history) - 1}\n"
11831211
metadata += f"- Total gaps addressed: {sum(len(gaps) for gaps in self.gap_analysis_history)}\n"
11841212
metadata += f"- Component fitness: {self.component_fitness}\n"
11851213
metadata += f"- Total sources consulted: {len(self.citations)}\n"

optillm/plugins/deep_research_plugin.py

Lines changed: 99 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,93 @@
1616
SLUG = "deep_research"
1717

1818

19+
class DeepResearchClientWrapper:
20+
"""Wrapper that adds extended timeout support for different client types"""
21+
22+
def __init__(self, client, timeout=1800.0, max_retries=0):
23+
self.client = client
24+
self.timeout = timeout
25+
self.max_retries = max_retries
26+
self.client_type = self._detect_client_type()
27+
self.chat = self.Chat(self)
28+
29+
def _detect_client_type(self):
30+
"""Detect the type of client based on class name"""
31+
class_name = self.client.__class__.__name__
32+
module_name = self.client.__class__.__module__
33+
34+
# Check for OpenAI-compatible clients (OpenAI, Cerebras, AzureOpenAI)
35+
if 'OpenAI' in class_name or 'Cerebras' in class_name:
36+
return 'openai_compatible'
37+
# Check for LiteLLM wrapper
38+
elif 'LiteLLMWrapper' in class_name:
39+
return 'litellm'
40+
# All other clients (OptILLM inference, etc.)
41+
else:
42+
return 'other'
43+
44+
class Chat:
45+
def __init__(self, parent):
46+
self.parent = parent
47+
self.completions = self.Completions(parent)
48+
49+
class Completions:
50+
def __init__(self, parent):
51+
self.parent = parent
52+
53+
def create(self, **kwargs):
54+
"""Create completion with appropriate timeout handling"""
55+
if self.parent.client_type == 'openai_compatible':
56+
# For OpenAI-compatible clients, recreate with timeout
57+
try:
58+
# Import here to avoid circular dependencies
59+
if 'Cerebras' in self.parent.client.__class__.__name__:
60+
from cerebras.cloud.sdk import Cerebras
61+
custom_client = Cerebras(
62+
api_key=self.parent.client.api_key,
63+
base_url=getattr(self.parent.client, 'base_url', None),
64+
timeout=self.parent.timeout,
65+
max_retries=self.parent.max_retries
66+
)
67+
else:
68+
# OpenAI or AzureOpenAI
69+
if 'Azure' in self.parent.client.__class__.__name__:
70+
from openai import AzureOpenAI
71+
# AzureOpenAI has different parameters
72+
custom_client = AzureOpenAI(
73+
api_key=self.parent.client.api_key,
74+
api_version=getattr(self.parent.client, 'api_version', None),
75+
azure_endpoint=getattr(self.parent.client, 'azure_endpoint', None),
76+
azure_ad_token_provider=getattr(self.parent.client, 'azure_ad_token_provider', None),
77+
timeout=self.parent.timeout,
78+
max_retries=self.parent.max_retries
79+
)
80+
else:
81+
from openai import OpenAI
82+
custom_client = OpenAI(
83+
api_key=self.parent.client.api_key,
84+
base_url=getattr(self.parent.client, 'base_url', None),
85+
timeout=self.parent.timeout,
86+
max_retries=self.parent.max_retries
87+
)
88+
return custom_client.chat.completions.create(**kwargs)
89+
except Exception as e:
90+
# If recreation fails, use original client
91+
print(f"⚠️ Warning: Could not create custom client with timeout: {str(e)}")
92+
return self.parent.client.chat.completions.create(**kwargs)
93+
94+
elif self.parent.client_type == 'litellm':
95+
# For LiteLLM, add timeout to the call
96+
kwargs['timeout'] = self.parent.timeout
97+
return self.parent.client.chat.completions.create(**kwargs)
98+
99+
else:
100+
# For other clients (like OptILLM), just pass through
101+
# They handle timeouts internally
102+
print(f"ℹ️ Using original client (type: {self.parent.client.__class__.__name__}) without timeout modification")
103+
return self.parent.client.chat.completions.create(**kwargs)
104+
105+
19106
def run(system_prompt: str, initial_query: str, client, model: str, request_config: Optional[Dict] = None) -> Tuple[str, int]:
20107
"""
21108
Deep Research plugin implementing TTD-DR (Test-Time Diffusion Deep Researcher)
@@ -32,16 +119,16 @@ def run(system_prompt: str, initial_query: str, client, model: str, request_conf
32119
client: OpenAI client for LLM calls
33120
model: Model name to use for synthesis
34121
request_config: Optional configuration dict with keys:
35-
- max_iterations: Maximum research iterations (default: 8)
36-
- max_sources: Maximum web sources per search (default: 15)
122+
- max_iterations: Maximum research iterations (default: 5)
123+
- max_sources: Maximum web sources per search (default: 30)
37124
38125
Returns:
39126
Tuple of (comprehensive_research_response, total_completion_tokens)
40127
"""
41128
# Parse configuration
42129
config = request_config or {}
43-
max_iterations = config.get("max_iterations", 8) # Increased to 8 for thorough research
44-
max_sources = config.get("max_sources", 15) # Increased to 15 for comprehensive coverage
130+
max_iterations = config.get("max_iterations", 5) # Default to 5 iterations for faster results
131+
max_sources = config.get("max_sources", 30) # Balanced for comprehensive coverage
45132

46133
# Validate inputs
47134
if not initial_query.strip():
@@ -50,9 +137,13 @@ def run(system_prompt: str, initial_query: str, client, model: str, request_conf
50137
if not client:
51138
return "Error: No LLM client provided for research synthesis", 0
52139

53-
# Initialize researcher
140+
# Create a wrapped client with extended timeout for deep research
141+
# Deep research can take a long time, so we need 30 minutes timeout and no retries
142+
wrapped_client = DeepResearchClientWrapper(client, timeout=1800.0, max_retries=0)
143+
144+
# Initialize researcher with wrapped client
54145
researcher = DeepResearcher(
55-
client=client,
146+
client=wrapped_client,
56147
model=model,
57148
max_iterations=max_iterations,
58149
max_sources=max_sources
@@ -64,22 +155,5 @@ def run(system_prompt: str, initial_query: str, client, model: str, request_conf
64155
return result, total_tokens
65156

66157
except Exception as e:
67-
error_response = f"Deep research failed: {str(e)}\n\nFalling back to basic response..."
68-
69-
# Fallback: provide basic response using just the model
70-
try:
71-
fallback_response = client.chat.completions.create(
72-
model=model,
73-
messages=[
74-
{"role": "system", "content": system_prompt},
75-
{"role": "user", "content": initial_query}
76-
]
77-
)
78-
79-
result = fallback_response.choices[0].message.content.strip()
80-
tokens = fallback_response.usage.completion_tokens
81-
82-
return f"{error_response}\n\n{result}", tokens
83-
84-
except Exception as fallback_error:
85-
return f"Deep research and fallback both failed: {str(e)} | {str(fallback_error)}", 0
158+
error_message = f"Deep research failed: {str(e)}"
159+
return error_message, 0

0 commit comments

Comments
 (0)