1111from langchain_core .documents import Document
1212from ..utils .cleanup_html import cleanup_html
1313from ..docloaders import ChromiumLoader
14+ from ..docloaders .browser_base import browser_base_fetch
1415from ..utils .convert_to_md import convert_to_md
1516from ..utils .logging import get_logger
1617from .base_node import BaseNode
@@ -74,6 +75,8 @@ def __init__(
7475 False if node_config is None else node_config .get ("cut" , True )
7576 )
7677
78+ self .browser_base = node_config .get ("browser_base" )
79+
7780 def execute (self , state ):
7881 """
7982 Executes the node's logic to fetch HTML content from a specified URL and
@@ -164,7 +167,7 @@ def execute(self, state):
164167
165168 parsed_content = source
166169
167- if isinstance (self .llm_model , ChatOpenAI ) and not self .script_creator or self .force and not self .script_creator :
170+ if isinstance (self .llm_model , ChatOpenAI ) and not self .script_creator or self .force and not self .script_creator :
168171 parsed_content = convert_to_md (source )
169172
170173 compressed_document = [
@@ -177,7 +180,7 @@ def execute(self, state):
177180 if response .status_code == 200 :
178181 if not response .text .strip ():
179182 raise ValueError ("No HTML body content found in the response." )
180-
183+
181184 parsed_content = response
182185
183186 if not self .cut :
@@ -198,8 +201,14 @@ def execute(self, state):
198201 if self .node_config is not None :
199202 loader_kwargs = self .node_config .get ("loader_kwargs" , {})
200203
201- loader = ChromiumLoader ([source ], headless = self .headless , ** loader_kwargs )
202- document = loader .load ()
204+ if self .browser_base is not None :
205+ data = browser_base_fetch (self .browser_base .get ("api_key" ),
206+ self .browser_base .get ("project_id" ), [source ])
207+
208+ document = [Document (page_content = content , metadata = {"source" : source }) for content in data ]
209+ else :
210+ loader = ChromiumLoader ([source ], headless = self .headless , ** loader_kwargs )
211+ document = loader .load ()
203212
204213 if not document or not document [0 ].page_content .strip ():
205214 raise ValueError ("No HTML body content found in the document fetched by ChromiumLoader." )
0 commit comments