@@ -133,7 +133,7 @@ def execute(self, state):
133133 state .update ({self .output [0 ]: compressed_document })
134134 return state
135135 elif input_keys [0 ] == "json" :
136- f = open (source )
136+ f = open (source , encoding = "utf-8" )
137137 compressed_document = [
138138 Document (page_content = str (json .load (f )), metadata = {"source" : "json" })
139139 ]
@@ -181,12 +181,11 @@ def execute(self, state):
181181 if not response .text .strip ():
182182 raise ValueError ("No HTML body content found in the response." )
183183
184- parsed_content = response
185-
186184 if not self .cut :
187185 parsed_content = cleanup_html (response , source )
188186
189- if (isinstance (self .llm_model , ChatOpenAI ) and not self .script_creator ) or (self .force and not self .script_creator ):
187+ if (isinstance (self .llm_model , ChatOpenAI )
188+ and not self .script_creator ) or (self .force and not self .script_creator ):
190189 parsed_content = convert_to_md (source , input_data [0 ])
191190 compressed_document = [Document (page_content = parsed_content )]
192191 else :
@@ -205,7 +204,8 @@ def execute(self, state):
205204 data = browser_base_fetch (self .browser_base .get ("api_key" ),
206205 self .browser_base .get ("project_id" ), [source ])
207206
208- document = [Document (page_content = content , metadata = {"source" : source }) for content in data ]
207+ document = [Document (page_content = content ,
208+ metadata = {"source" : source }) for content in data ]
209209 else :
210210 loader = ChromiumLoader ([source ], headless = self .headless , ** loader_kwargs )
211211 document = loader .load ()
@@ -215,10 +215,8 @@ def execute(self, state):
215215 parsed_content = document [0 ].page_content
216216
217217 if isinstance (self .llm_model , ChatOpenAI ) and not self .script_creator or self .force and not self .script_creator and not self .openai_md_enabled :
218-
219218 parsed_content = convert_to_md (document [0 ].page_content , input_data [0 ])
220219
221-
222220 compressed_document = [
223221 Document (page_content = parsed_content , metadata = {"source" : "html file" })
224222 ]
0 commit comments