Skip to content

Commit 037f8e5

Browse files
authored
Merge pull request #240 from vvincent1234/feat/deep-research
Feat/deep research
2 parents b65d43c + 8640bcb commit 037f8e5

File tree

8 files changed

+482
-10
lines changed

8 files changed

+482
-10
lines changed

src/agent/custom_agent.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ def __init__(
111111

112112
# record last actions
113113
self._last_actions = None
114+
# record extract content
115+
self.extracted_content = ""
114116
# custom new info
115117
self.add_infos = add_infos
116118
# agent_state for Stop
@@ -261,9 +263,16 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
261263
if len(actions) == 0:
262264
# TODO: fix no action case
263265
result = [ActionResult(is_done=True, extracted_content=step_info.memory, include_in_memory=True)]
266+
for ret_ in result:
267+
if "Extracted page" in ret_.extracted_content:
268+
# record every extracted page
269+
self.extracted_content += ret_.extracted_content
264270
self._last_result = result
265271
self._last_actions = actions
266272
if len(result) > 0 and result[-1].is_done:
273+
if not self.extracted_content:
274+
self.extracted_content = step_info.memory
275+
result[-1].extracted_content = self.extracted_content
267276
logger.info(f"📄 Result: {result[-1].extracted_content}")
268277

269278
self.consecutive_failures = 0
@@ -338,6 +347,10 @@ async def run(self, max_steps: int = 100) -> AgentHistoryList:
338347
break
339348
else:
340349
logger.info("❌ Failed to complete task in maximum steps")
350+
if not self.extracted_content:
351+
self.history.history[-1].result[-1].extracted_content = step_info.memory
352+
else:
353+
self.history.history[-1].result[-1].extracted_content = self.extracted_content
341354

342355
return self.history
343356

src/agent/custom_prompts.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from browser_use.agent.views import ActionResult, ActionModel
66
from browser_use.browser.views import BrowserState
77
from langchain_core.messages import HumanMessage, SystemMessage
8+
from datetime import datetime
89

910
from .custom_views import CustomAgentStepInfo
1011

@@ -116,15 +117,11 @@ def get_system_message(self) -> SystemMessage:
116117
Returns:
117118
str: Formatted system prompt
118119
"""
119-
time_str = self.current_date.strftime("%Y-%m-%d %H:%M")
120-
121120
AGENT_PROMPT = f"""You are a precise browser automation agent that interacts with websites through structured commands. Your role is to:
122121
1. Analyze the provided webpage elements and structure
123122
2. Plan a sequence of actions to accomplish the given task
124123
3. Your final result MUST be a valid JSON as the **RESPONSE FORMAT** described, containing your action sequence and state assessment, No need extra content to expalin.
125124
126-
Current date and time: {time_str}
127-
128125
{self.input_format()}
129126
130127
{self.important_rules()}
@@ -159,6 +156,9 @@ def get_user_message(self) -> HumanMessage:
159156
step_info_description = f'Current step: {self.step_info.step_number}/{self.step_info.max_steps}\n'
160157
else:
161158
step_info_description = ''
159+
160+
time_str = datetime.now().strftime("%Y-%m-%d %H:%M")
161+
step_info_description += "Current date and time: {time_str}"
162162

163163
elements_text = self.state.element_tree.clickable_elements_to_string(include_attributes=self.include_attributes)
164164

src/controller/custom_controller.py

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,33 @@
1+
import pdb
2+
13
import pyperclip
24
from typing import Optional, Type
35
from pydantic import BaseModel
46
from browser_use.agent.views import ActionResult
57
from browser_use.browser.context import BrowserContext
68
from browser_use.controller.service import Controller, DoneAction
9+
from main_content_extractor import MainContentExtractor
10+
from browser_use.controller.views import (
11+
ClickElementAction,
12+
DoneAction,
13+
ExtractPageContentAction,
14+
GoToUrlAction,
15+
InputTextAction,
16+
OpenTabAction,
17+
ScrollAction,
18+
SearchGoogleAction,
19+
SendKeysAction,
20+
SwitchTabAction,
21+
)
22+
import logging
23+
24+
logger = logging.getLogger(__name__)
725

826

927
class CustomController(Controller):
1028
def __init__(self, exclude_actions: list[str] = [],
11-
output_model: Optional[Type[BaseModel]] = None
12-
):
29+
output_model: Optional[Type[BaseModel]] = None
30+
):
1331
super().__init__(exclude_actions=exclude_actions, output_model=output_model)
1432
self._register_custom_actions()
1533

@@ -29,3 +47,25 @@ async def paste_from_clipboard(browser: BrowserContext):
2947
await page.keyboard.type(text)
3048

3149
return ActionResult(extracted_content=text)
50+
51+
@self.registry.action(
52+
'Extract page content to get the pure text or markdown with links if include_links is set to true',
53+
param_model=ExtractPageContentAction,
54+
requires_browser=True,
55+
)
56+
async def extract_content(params: ExtractPageContentAction, browser: BrowserContext):
57+
page = await browser.get_current_page()
58+
# use jina reader
59+
url = page.url
60+
jina_url = f"https://r.jina.ai/{url}"
61+
await page.goto(jina_url)
62+
output_format = 'markdown' if params.include_links else 'text'
63+
content = MainContentExtractor.extract( # type: ignore
64+
html=await page.content(),
65+
output_format=output_format,
66+
)
67+
# go back to org url
68+
await page.go_back()
69+
msg = f'📄 Extracted page content as {output_format}\n: {content}\n'
70+
logger.info(msg)
71+
return ActionResult(extracted_content=msg)

0 commit comments

Comments
 (0)