1
-
2
1
import pdb
3
2
4
3
from dotenv import load_dotenv
21
20
from src .agent .custom_prompts import CustomSystemPrompt , CustomAgentMessagePrompt
22
21
from src .controller .custom_controller import CustomController
23
22
from src .browser .custom_browser import CustomBrowser
23
+ from src .browser .custom_context import BrowserContextConfig
24
+ from browser_use .browser .context import (
25
+ BrowserContextConfig ,
26
+ BrowserContextWindowSize ,
27
+ )
24
28
25
29
logger = logging .getLogger (__name__ )
26
30
27
- async def deep_research (task , llm , agent_state , ** kwargs ):
31
+
32
+ async def deep_research (task , llm , agent_state = None , ** kwargs ):
28
33
task_id = str (uuid4 ())
29
34
save_dir = kwargs .get ("save_dir" , os .path .join (f"./tmp/deep_research/{ task_id } " ))
30
35
logger .info (f"Save Deep Research at: { save_dir } " )
31
36
os .makedirs (save_dir , exist_ok = True )
32
-
37
+
33
38
# max qyery num per iteration
34
39
max_query_num = kwargs .get ("max_query_num" , 3 )
40
+
41
+ use_own_browser = kwargs .get ("use_own_browser" , False )
42
+ extra_chromium_args = []
43
+ if use_own_browser :
44
+ # TODO: if use own browser, max query num must be 1 per iter, how to solve it?
45
+ max_query_num = 1
46
+ chrome_path = os .getenv ("CHROME_PATH" , None )
47
+ if chrome_path == "" :
48
+ chrome_path = None
49
+ chrome_user_data = os .getenv ("CHROME_USER_DATA" , None )
50
+ if chrome_user_data :
51
+ extra_chromium_args += [f"--user-data-dir={ chrome_user_data } " ]
52
+
53
+ browser = CustomBrowser (
54
+ config = BrowserConfig (
55
+ headless = kwargs .get ("headless" , False ),
56
+ disable_security = kwargs .get ("disable_security" , True ),
57
+ chrome_instance_path = chrome_path ,
58
+ extra_chromium_args = extra_chromium_args ,
59
+ )
60
+ )
61
+ browser_context = await browser .new_context ()
62
+ else :
63
+ browser = None
64
+ browser_context = None
65
+
66
+ controller = CustomController ()
67
+
35
68
search_system_prompt = f"""
36
69
You are a **Deep Researcher**, an AI agent specializing in in-depth information gathering and research using a web browser with **automated execution capabilities**. Your expertise lies in formulating comprehensive research plans and executing them meticulously to fulfill complex user requests. You will analyze user instructions, devise a detailed research plan, and determine the necessary search queries to gather the required information.
37
70
@@ -111,26 +144,12 @@ async def deep_research(task, llm, agent_state, **kwargs):
111
144
112
145
1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking.
113
146
2. **Previous Recorded Information:** Textual data gathered and recorded from previous searches and processing, represented as a single text string.
114
- 3. **Current Search Results:** Textual data gathered from the most recent search query.
147
+ 3. **Current Search Plan:** Research plan for current search.
148
+ 4. **Current Search Query:** The current search query.
149
+ 5. **Current Search Results:** Textual data gathered from the most recent search query.
115
150
"""
116
151
record_messages = [SystemMessage (content = record_system_prompt )]
117
152
118
- use_own_browser = kwargs .get ("use_own_browser" , False )
119
- extra_chromium_args = []
120
- if use_own_browser :
121
- # if use own browser, max query num should be 1 per iter
122
- max_query_num = 1
123
- chrome_path = os .getenv ("CHROME_PATH" , None )
124
- if chrome_path == "" :
125
- chrome_path = None
126
- chrome_user_data = os .getenv ("CHROME_USER_DATA" , None )
127
- if chrome_user_data :
128
- extra_chromium_args += [f"--user-data-dir={ chrome_user_data } " ]
129
- else :
130
- chrome_path = None
131
- browser = None
132
- controller = CustomController ()
133
-
134
153
search_iteration = 0
135
154
max_search_iterations = kwargs .get ("max_search_iterations" , 10 ) # Limit search iterations to prevent infinite loop
136
155
use_vision = kwargs .get ("use_vision" , False )
@@ -167,35 +186,42 @@ async def deep_research(task, llm, agent_state, **kwargs):
167
186
logger .info (query_tasks )
168
187
169
188
# 2. Perform Web Search and Auto exec
170
- # Paralle BU agents
189
+ # Parallel BU agents
171
190
add_infos = "1. Please click on the most relevant link to get information and go deeper, instead of just staying on the search page. \n " \
172
- "2. When opening a PDF file, please remember to extract the content using extract_content instead of simply opening it for the user to view."
191
+ "2. When opening a PDF file, please remember to extract the content using extract_content instead of simply opening it for the user to view.\n "
173
192
if use_own_browser :
174
- browser = CustomBrowser (
175
- config = BrowserConfig (
176
- headless = kwargs .get ("headless" , False ),
177
- disable_security = kwargs .get ("disable_security" , True ),
178
- chrome_instance_path = chrome_path ,
179
- extra_chromium_args = extra_chromium_args ,
180
- )
193
+ agent = CustomAgent (
194
+ task = query_tasks [0 ],
195
+ llm = llm ,
196
+ add_infos = add_infos ,
197
+ browser = browser ,
198
+ browser_context = browser_context ,
199
+ use_vision = use_vision ,
200
+ system_prompt_class = CustomSystemPrompt ,
201
+ agent_prompt_class = CustomAgentMessagePrompt ,
202
+ max_actions_per_step = 5 ,
203
+ controller = controller ,
204
+ agent_state = agent_state
181
205
)
182
- agents = [CustomAgent (
183
- task = task ,
184
- llm = llm ,
185
- add_infos = add_infos ,
186
- browser = browser ,
187
- use_vision = use_vision ,
188
- system_prompt_class = CustomSystemPrompt ,
189
- agent_prompt_class = CustomAgentMessagePrompt ,
190
- max_actions_per_step = 5 ,
191
- controller = controller ,
192
- agent_state = agent_state
193
- ) for task in query_tasks ]
194
- query_results = await asyncio .gather (* [agent .run (max_steps = kwargs .get ("max_steps" , 10 )) for agent in agents ])
195
- if browser :
196
- await browser .close ()
197
- browser = None
198
- logger .info ("Browser closed." )
206
+ agent_result = await agent .run (max_steps = kwargs .get ("max_steps" , 10 ))
207
+ query_results = [agent_result ]
208
+ else :
209
+ agents = [CustomAgent (
210
+ task = query_tasks [0 ],
211
+ llm = llm ,
212
+ add_infos = add_infos ,
213
+ browser = browser ,
214
+ browser_context = browser_context ,
215
+ use_vision = use_vision ,
216
+ system_prompt_class = CustomSystemPrompt ,
217
+ agent_prompt_class = CustomAgentMessagePrompt ,
218
+ max_actions_per_step = 5 ,
219
+ controller = controller ,
220
+ agent_state = agent_state
221
+ ) for task in query_tasks ]
222
+ query_results = await asyncio .gather (
223
+ * [agent .run (max_steps = kwargs .get ("max_steps" , 10 )) for agent in agents ])
224
+
199
225
if agent_state and agent_state .is_stop_requested ():
200
226
# Stop
201
227
break
@@ -211,19 +237,27 @@ async def deep_research(task, llm, agent_state, **kwargs):
211
237
with open (querr_save_path , "w" , encoding = "utf-8" ) as fw :
212
238
fw .write (f"Query: { query_tasks [i ]} \n " )
213
239
fw .write (query_result )
214
- history_infos_ = json .dumps (history_infos , indent = 4 )
215
- record_prompt = f"User Instruction:{ task } . \n Previous Recorded Information:\n { json .dumps (history_infos_ )} \n Current Search Results: { query_result } \n "
216
- record_messages .append (HumanMessage (content = record_prompt ))
217
- ai_record_msg = llm .invoke (record_messages [:1 ] + record_messages [- 1 :])
218
- record_messages .append (ai_record_msg )
219
- if hasattr (ai_record_msg , "reasoning_content" ):
220
- logger .info ("🤯 Start Record Deep Thinking: " )
221
- logger .info (ai_record_msg .reasoning_content )
222
- logger .info ("🤯 End Record Deep Thinking" )
223
- record_content = ai_record_msg .content
224
- record_content = repair_json (record_content )
225
- new_record_infos = json .loads (record_content )
226
- history_infos .extend (new_record_infos )
240
+ # split query result in case the content is too long
241
+ query_results_split = query_result .split ("Extracted page content:" )
242
+ for qi , query_result_ in enumerate (query_results_split ):
243
+ if not query_result_ :
244
+ continue
245
+ else :
246
+ # TODO: limit content lenght: 128k tokens, ~3 chars per token
247
+ query_result_ = query_result_ [:128000 * 3 ]
248
+ history_infos_ = json .dumps (history_infos , indent = 4 )
249
+ record_prompt = f"User Instruction:{ task } . \n Previous Recorded Information:\n { history_infos_ } \n Current Search Iteration: { search_iteration } \n Current Search Plan:\n { query_plan } \n Current Search Query:\n { query_tasks [i ]} \n Current Search Results: { query_result_ } \n "
250
+ record_messages .append (HumanMessage (content = record_prompt ))
251
+ ai_record_msg = llm .invoke (record_messages [:1 ] + record_messages [- 1 :])
252
+ record_messages .append (ai_record_msg )
253
+ if hasattr (ai_record_msg , "reasoning_content" ):
254
+ logger .info ("🤯 Start Record Deep Thinking: " )
255
+ logger .info (ai_record_msg .reasoning_content )
256
+ logger .info ("🤯 End Record Deep Thinking" )
257
+ record_content = ai_record_msg .content
258
+ record_content = repair_json (record_content )
259
+ new_record_infos = json .loads (record_content )
260
+ history_infos .extend (new_record_infos )
227
261
228
262
logger .info ("\n Finish Searching, Start Generating Report..." )
229
263
@@ -258,7 +292,7 @@ async def deep_research(task, llm, agent_state, **kwargs):
258
292
1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking.
259
293
2. **Search Information:** Information gathered from the search queries.
260
294
"""
261
-
295
+
262
296
history_infos_ = json .dumps (history_infos , indent = 4 )
263
297
record_json_path = os .path .join (save_dir , "record_infos.json" )
264
298
logger .info (f"save All recorded information at { record_json_path } " )
@@ -288,5 +322,6 @@ async def deep_research(task, llm, agent_state, **kwargs):
288
322
finally :
289
323
if browser :
290
324
await browser .close ()
291
- browser = None
292
- logger .info ("Browser closed." )
325
+ if browser_context :
326
+ await browser_context .close ()
327
+ logger .info ("Browser closed." )
0 commit comments