@@ -121,7 +121,7 @@ def execute(self, state):
121121 "xml" : self .handle_file ,
122122 "md" : self .handle_file ,
123123 }
124-
124+
125125 if input_type in handlers :
126126 return handlers [input_type ](state , input_type , source )
127127 elif self .input == "pdf_dir" :
@@ -130,7 +130,7 @@ def execute(self, state):
130130 return self .handle_local_source (state , source )
131131 else :
132132 return self .handle_web_source (state , source )
133-
133+
134134 def handle_directory (self , state , input_type , source ):
135135 """
136136 Handles the directory by compressing the source document and updating the state.
@@ -143,7 +143,7 @@ def handle_directory(self, state, input_type, source):
143143 Returns:
144144 dict: The updated state with the compressed document.
145145 """
146-
146+
147147 compressed_document = [
148148 source
149149 ]
@@ -169,11 +169,11 @@ def handle_file(self, state, input_type, source):
169169 - "xml": Reads the content of an XML file as a string.
170170 - "md": Reads the content of a Markdown file as a string.
171171 """
172-
172+
173173 compressed_document = self .load_file_content (source , input_type )
174-
174+
175175 return self .update_state (state , compressed_document )
176-
176+
177177 def load_file_content (self , source , input_type ):
178178 """
179179 Loads the content of a file based on its input type.
@@ -185,7 +185,7 @@ def load_file_content(self, source, input_type):
185185 Returns:
186186 list: A list containing a Document object with the loaded content and metadata.
187187 """
188-
188+
189189 if input_type == "pdf" :
190190 loader = PyPDFLoader (source )
191191 return loader .load ()
@@ -198,7 +198,7 @@ def load_file_content(self, source, input_type):
198198 with open (source , "r" , encoding = "utf-8" ) as f :
199199 data = f .read ()
200200 return [Document (page_content = data , metadata = {"source" : input_type })]
201-
201+
202202 def handle_local_source (self , state , source ):
203203 """
204204 Handles the local source by fetching HTML content, optionally converting it to Markdown,
@@ -214,11 +214,11 @@ def handle_local_source(self, state, source):
214214 Raises:
215215 ValueError: If the source is empty or contains only whitespace.
216216 """
217-
217+
218218 self .logger .info (f"--- (Fetching HTML from: { source } ) ---" )
219219 if not source .strip ():
220220 raise ValueError ("No HTML body content found in the local source." )
221-
221+
222222 parsed_content = source
223223
224224 if isinstance (self .llm_model , ChatOpenAI ) and not self .script_creator or self .force and not self .script_creator :
@@ -229,13 +229,13 @@ def handle_local_source(self, state, source):
229229 compressed_document = [
230230 Document (page_content = parsed_content , metadata = {"source" : "local_dir" })
231231 ]
232-
232+
233233 return self .update_state (state , compressed_document )
234-
234+
235235 def handle_web_source (self , state , source ):
236236 """
237- Handles the web source by fetching HTML content from a URL, optionally converting it to Markdown,
238- and updating the state.
237+ Handles the web source by fetching HTML content from a URL,
238+ optionally converting it to Markdown, and updating the state.
239239
240240 Parameters:
241241 state (dict): The current state of the graph.
@@ -247,7 +247,7 @@ def handle_web_source(self, state, source):
247247 Raises:
248248 ValueError: If the fetched HTML content is empty or contains only whitespace.
249249 """
250-
250+
251251 self .logger .info (f"--- (Fetching HTML from: { source } ) ---" )
252252 if self .use_soup :
253253 response = requests .get (source )
0 commit comments