Skip to content

Commit ecfc1d5

Browse files
authored
Merge pull request #249 from vvincent1234/fix/dr-bugs
Fix/dr bugs
2 parents 3c69544 + b7ee26a commit ecfc1d5

File tree

5 files changed

+130
-70
lines changed

5 files changed

+130
-70
lines changed

README.md

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,11 +58,6 @@ Activate the virtual environment:
5858
```bash
5959
source .venv/bin/activate
6060
```
61-
alternative activation for Windows:
62-
63-
```bash
64-
.\.venv\Scripts\Activate
65-
```
6661

6762
#### Step 3: Install Dependencies
6863
Install Python packages:

src/controller/custom_controller.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,6 @@ async def extract_content(params: ExtractPageContentAction, browser: BrowserCont
6666
)
6767
# go back to org url
6868
await page.go_back()
69-
msg = f'📄 Extracted page content as {output_format}\n: {content}\n'
69+
msg = f'Extracted page content:\n {content}\n'
7070
logger.info(msg)
7171
return ActionResult(extracted_content=msg)

src/utils/deep_research.py

Lines changed: 97 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
import pdb
32

43
from dotenv import load_dotenv
@@ -21,17 +20,51 @@
2120
from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt
2221
from src.controller.custom_controller import CustomController
2322
from src.browser.custom_browser import CustomBrowser
23+
from src.browser.custom_context import BrowserContextConfig
24+
from browser_use.browser.context import (
25+
BrowserContextConfig,
26+
BrowserContextWindowSize,
27+
)
2428

2529
logger = logging.getLogger(__name__)
2630

27-
async def deep_research(task, llm, agent_state, **kwargs):
31+
32+
async def deep_research(task, llm, agent_state=None, **kwargs):
2833
task_id = str(uuid4())
2934
save_dir = kwargs.get("save_dir", os.path.join(f"./tmp/deep_research/{task_id}"))
3035
logger.info(f"Save Deep Research at: {save_dir}")
3136
os.makedirs(save_dir, exist_ok=True)
32-
37+
3338
# max qyery num per iteration
3439
max_query_num = kwargs.get("max_query_num", 3)
40+
41+
use_own_browser = kwargs.get("use_own_browser", False)
42+
extra_chromium_args = []
43+
if use_own_browser:
44+
# TODO: if use own browser, max query num must be 1 per iter, how to solve it?
45+
max_query_num = 1
46+
chrome_path = os.getenv("CHROME_PATH", None)
47+
if chrome_path == "":
48+
chrome_path = None
49+
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
50+
if chrome_user_data:
51+
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
52+
53+
browser = CustomBrowser(
54+
config=BrowserConfig(
55+
headless=kwargs.get("headless", False),
56+
disable_security=kwargs.get("disable_security", True),
57+
chrome_instance_path=chrome_path,
58+
extra_chromium_args=extra_chromium_args,
59+
)
60+
)
61+
browser_context = await browser.new_context()
62+
else:
63+
browser = None
64+
browser_context = None
65+
66+
controller = CustomController()
67+
3568
search_system_prompt = f"""
3669
You are a **Deep Researcher**, an AI agent specializing in in-depth information gathering and research using a web browser with **automated execution capabilities**. Your expertise lies in formulating comprehensive research plans and executing them meticulously to fulfill complex user requests. You will analyze user instructions, devise a detailed research plan, and determine the necessary search queries to gather the required information.
3770
@@ -111,26 +144,12 @@ async def deep_research(task, llm, agent_state, **kwargs):
111144
112145
1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking.
113146
2. **Previous Recorded Information:** Textual data gathered and recorded from previous searches and processing, represented as a single text string.
114-
3. **Current Search Results:** Textual data gathered from the most recent search query.
147+
3. **Current Search Plan:** Research plan for current search.
148+
4. **Current Search Query:** The current search query.
149+
5. **Current Search Results:** Textual data gathered from the most recent search query.
115150
"""
116151
record_messages = [SystemMessage(content=record_system_prompt)]
117152

118-
use_own_browser = kwargs.get("use_own_browser", False)
119-
extra_chromium_args = []
120-
if use_own_browser:
121-
# if use own browser, max query num should be 1 per iter
122-
max_query_num = 1
123-
chrome_path = os.getenv("CHROME_PATH", None)
124-
if chrome_path == "":
125-
chrome_path = None
126-
chrome_user_data = os.getenv("CHROME_USER_DATA", None)
127-
if chrome_user_data:
128-
extra_chromium_args += [f"--user-data-dir={chrome_user_data}"]
129-
else:
130-
chrome_path = None
131-
browser = None
132-
controller = CustomController()
133-
134153
search_iteration = 0
135154
max_search_iterations = kwargs.get("max_search_iterations", 10) # Limit search iterations to prevent infinite loop
136155
use_vision = kwargs.get("use_vision", False)
@@ -167,35 +186,42 @@ async def deep_research(task, llm, agent_state, **kwargs):
167186
logger.info(query_tasks)
168187

169188
# 2. Perform Web Search and Auto exec
170-
# Paralle BU agents
189+
# Parallel BU agents
171190
add_infos = "1. Please click on the most relevant link to get information and go deeper, instead of just staying on the search page. \n" \
172-
"2. When opening a PDF file, please remember to extract the content using extract_content instead of simply opening it for the user to view."
191+
"2. When opening a PDF file, please remember to extract the content using extract_content instead of simply opening it for the user to view.\n"
173192
if use_own_browser:
174-
browser = CustomBrowser(
175-
config=BrowserConfig(
176-
headless=kwargs.get("headless", False),
177-
disable_security=kwargs.get("disable_security", True),
178-
chrome_instance_path=chrome_path,
179-
extra_chromium_args=extra_chromium_args,
180-
)
193+
agent = CustomAgent(
194+
task=query_tasks[0],
195+
llm=llm,
196+
add_infos=add_infos,
197+
browser=browser,
198+
browser_context=browser_context,
199+
use_vision=use_vision,
200+
system_prompt_class=CustomSystemPrompt,
201+
agent_prompt_class=CustomAgentMessagePrompt,
202+
max_actions_per_step=5,
203+
controller=controller,
204+
agent_state=agent_state
181205
)
182-
agents = [CustomAgent(
183-
task=task,
184-
llm=llm,
185-
add_infos=add_infos,
186-
browser=browser,
187-
use_vision=use_vision,
188-
system_prompt_class=CustomSystemPrompt,
189-
agent_prompt_class=CustomAgentMessagePrompt,
190-
max_actions_per_step=5,
191-
controller=controller,
192-
agent_state=agent_state
193-
) for task in query_tasks]
194-
query_results = await asyncio.gather(*[agent.run(max_steps=kwargs.get("max_steps", 10)) for agent in agents])
195-
if browser:
196-
await browser.close()
197-
browser = None
198-
logger.info("Browser closed.")
206+
agent_result = await agent.run(max_steps=kwargs.get("max_steps", 10))
207+
query_results = [agent_result]
208+
else:
209+
agents = [CustomAgent(
210+
task=query_tasks[0],
211+
llm=llm,
212+
add_infos=add_infos,
213+
browser=browser,
214+
browser_context=browser_context,
215+
use_vision=use_vision,
216+
system_prompt_class=CustomSystemPrompt,
217+
agent_prompt_class=CustomAgentMessagePrompt,
218+
max_actions_per_step=5,
219+
controller=controller,
220+
agent_state=agent_state
221+
) for task in query_tasks]
222+
query_results = await asyncio.gather(
223+
*[agent.run(max_steps=kwargs.get("max_steps", 10)) for agent in agents])
224+
199225
if agent_state and agent_state.is_stop_requested():
200226
# Stop
201227
break
@@ -211,19 +237,27 @@ async def deep_research(task, llm, agent_state, **kwargs):
211237
with open(querr_save_path, "w", encoding="utf-8") as fw:
212238
fw.write(f"Query: {query_tasks[i]}\n")
213239
fw.write(query_result)
214-
history_infos_ = json.dumps(history_infos, indent=4)
215-
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {json.dumps(history_infos_)} \n Current Search Results: {query_result}\n "
216-
record_messages.append(HumanMessage(content=record_prompt))
217-
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
218-
record_messages.append(ai_record_msg)
219-
if hasattr(ai_record_msg, "reasoning_content"):
220-
logger.info("🤯 Start Record Deep Thinking: ")
221-
logger.info(ai_record_msg.reasoning_content)
222-
logger.info("🤯 End Record Deep Thinking")
223-
record_content = ai_record_msg.content
224-
record_content = repair_json(record_content)
225-
new_record_infos = json.loads(record_content)
226-
history_infos.extend(new_record_infos)
240+
# split query result in case the content is too long
241+
query_results_split = query_result.split("Extracted page content:")
242+
for qi, query_result_ in enumerate(query_results_split):
243+
if not query_result_:
244+
continue
245+
else:
246+
# TODO: limit content lenght: 128k tokens, ~3 chars per token
247+
query_result_ = query_result_[:128000*3]
248+
history_infos_ = json.dumps(history_infos, indent=4)
249+
record_prompt = f"User Instruction:{task}. \nPrevious Recorded Information:\n {history_infos_}\n Current Search Iteration: {search_iteration}\n Current Search Plan:\n{query_plan}\n Current Search Query:\n {query_tasks[i]}\n Current Search Results: {query_result_}\n "
250+
record_messages.append(HumanMessage(content=record_prompt))
251+
ai_record_msg = llm.invoke(record_messages[:1] + record_messages[-1:])
252+
record_messages.append(ai_record_msg)
253+
if hasattr(ai_record_msg, "reasoning_content"):
254+
logger.info("🤯 Start Record Deep Thinking: ")
255+
logger.info(ai_record_msg.reasoning_content)
256+
logger.info("🤯 End Record Deep Thinking")
257+
record_content = ai_record_msg.content
258+
record_content = repair_json(record_content)
259+
new_record_infos = json.loads(record_content)
260+
history_infos.extend(new_record_infos)
227261

228262
logger.info("\nFinish Searching, Start Generating Report...")
229263

@@ -258,7 +292,7 @@ async def deep_research(task, llm, agent_state, **kwargs):
258292
1. **User Instruction:** The original instruction given by the user. This helps you determine what kind of information will be useful and how to structure your thinking.
259293
2. **Search Information:** Information gathered from the search queries.
260294
"""
261-
295+
262296
history_infos_ = json.dumps(history_infos, indent=4)
263297
record_json_path = os.path.join(save_dir, "record_infos.json")
264298
logger.info(f"save All recorded information at {record_json_path}")
@@ -288,5 +322,6 @@ async def deep_research(task, llm, agent_state, **kwargs):
288322
finally:
289323
if browser:
290324
await browser.close()
291-
browser = None
292-
logger.info("Browser closed.")
325+
if browser_context:
326+
await browser_context.close()
327+
logger.info("Browser closed.")

tests/test_browser_use.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,5 +357,5 @@ async def test_browser_use_parallel():
357357

358358
if __name__ == "__main__":
359359
# asyncio.run(test_browser_use_org())
360-
asyncio.run(test_browser_use_parallel())
361-
# asyncio.run(test_browser_use_custom())
360+
# asyncio.run(test_browser_use_parallel())
361+
asyncio.run(test_browser_use_custom())

tests/test_deep_research.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import asyncio
2+
import os
3+
from dotenv import load_dotenv
4+
5+
load_dotenv()
6+
import sys
7+
8+
sys.path.append(".")
9+
10+
async def test_deep_research():
11+
from src.utils.deep_research import deep_research
12+
from src.utils import utils
13+
14+
task = "write a report about DeepSeek-R1, get its pdf"
15+
llm = utils.get_llm_model(
16+
provider="gemini",
17+
model_name="gemini-2.0-flash-thinking-exp-01-21",
18+
temperature=1.0,
19+
api_key=os.getenv("GOOGLE_API_KEY", "")
20+
)
21+
22+
report_content, report_file_path = await deep_research(task=task, llm=llm, agent_state=None,
23+
max_search_iterations=1,
24+
max_query_num=3,
25+
use_own_browser=False)
26+
27+
28+
29+
if __name__ == "__main__":
30+
asyncio.run(test_deep_research())

0 commit comments

Comments
 (0)