66from asgiref .sync import async_to_sync
77from django .conf import settings
88from langrocks .client import WebBrowser
9- from langrocks .common .models .web_browser import WebBrowserCommand , WebBrowserCommandType
9+ from langrocks .common .models .web_browser import (
10+ WebBrowserCommand ,
11+ WebBrowserCommandType ,
12+ WebBrowserContent ,
13+ )
1014from pydantic import BaseModel , Field , field_validator
1115
1216from llmstack .apps .schemas import OutputTemplate
1519 ApiProcessorInterface ,
1620 ApiProcessorSchema ,
1721)
18- from llmstack .processors .providers .promptly .web_browser import (
19- BrowserRemoteSessionData ,
20- WebBrowserOutput ,
21- )
22+ from llmstack .processors .providers .promptly .web_browser import BrowserRemoteSessionData
2223
2324logger = logging .getLogger (__name__ )
2425
@@ -86,8 +87,35 @@ class StaticWebBrowserInput(ApiProcessorSchema):
8687 )
8788
8889
90+ class StaticWebBrowserContent (WebBrowserContent ):
91+ screenshot : Optional [str ] = Field (
92+ default = None ,
93+ description = "Screenshot of the result" ,
94+ )
95+
96+
97+ class StaticWebBrowserOutput (ApiProcessorSchema ):
98+ text : str = Field (default = "" , description = "Text of the result" )
99+ video : Optional [str ] = Field (
100+ default = None ,
101+ description = "Video of the result" ,
102+ )
103+ content : Optional [StaticWebBrowserContent ] = Field (
104+ default = None ,
105+ description = "Content of the result including text, buttons, links, inputs, textareas and selects" ,
106+ )
107+ session : Optional [BrowserRemoteSessionData ] = Field (
108+ default = None ,
109+ description = "Session data from the browser" ,
110+ )
111+ steps : List [str ] = Field (
112+ default = [],
113+ description = "Steps taken to complete the task" ,
114+ )
115+
116+
89117class StaticWebBrowser (
90- ApiProcessorInterface [StaticWebBrowserInput , WebBrowserOutput , StaticWebBrowserConfiguration ],
118+ ApiProcessorInterface [StaticWebBrowserInput , StaticWebBrowserOutput , StaticWebBrowserConfiguration ],
91119):
92120 """
93121 Browse a given URL
@@ -165,7 +193,7 @@ def process(self) -> dict:
165193 async_to_sync (
166194 output_stream .write ,
167195 )(
168- WebBrowserOutput (
196+ StaticWebBrowserOutput (
169197 session = BrowserRemoteSessionData (
170198 ws_url = web_browser .get_wss_url (),
171199 ),
@@ -181,22 +209,20 @@ def process(self) -> dict:
181209 ]
182210 + list (map (self ._web_browser_instruction_to_command , self ._input .instructions ))
183211 )
184-
185- screenshot_asset = None
186- if browser_response and browser_response .screenshot :
187- screenshot_asset = self ._upload_asset_from_url (
188- f"data:image/png;name={ str (uuid .uuid4 ())} ;base64,{ base64 .b64encode (browser_response .screenshot ).decode ('utf-8' )} " ,
189- mime_type = "image/png" ,
212+ if browser_response :
213+ output_text = browser_response .text or "\n " .join (
214+ list (map (lambda entry : entry .output , browser_response .command_outputs ))
190215 )
216+ browser_content = browser_response .model_dump (exclude = ("screenshot" ,))
217+ screenshot_asset = None
218+ if browser_response .screenshot :
219+ screenshot_asset = self ._upload_asset_from_url (
220+ f"data:image/png;name={ str (uuid .uuid4 ())} ;base64,{ base64 .b64encode (browser_response .screenshot ).decode ('utf-8' )} " ,
221+ mime_type = "image/png" ,
222+ )
223+ browser_content ["screenshot" ] = screenshot_asset .objref if screenshot_asset else None
224+ async_to_sync (self ._output_stream .write )(StaticWebBrowserOutput (text = output_text , content = browser_content ))
191225
192- browser_response .screenshot = screenshot_asset .objref if screenshot_asset else None
193-
194- async_to_sync (output_stream .write )(
195- WebBrowserOutput (
196- text = browser_response .text or "" .join (list (map (lambda x : x .output , browser_response .command_outputs ))),
197- content = browser_response ,
198- ),
199- )
200226 output = output_stream .finalize ()
201227
202228 return output
0 commit comments