11"""
22FetchNode Module
33"""
4-
4+ import pandas as pd
55from typing import List , Optional
66from langchain_community .document_loaders import AsyncChromiumLoader
77from langchain_core .documents import Document
@@ -22,19 +22,21 @@ class FetchNode(BaseNode):
2222 Attributes:
2323 headless (bool): A flag indicating whether the browser should run in headless mode.
2424 verbose (bool): A flag indicating whether to print verbose output during execution.
25-
25+
2626 Args:
2727 input (str): Boolean expression defining the input keys needed from the state.
2828 output (List[str]): List of output keys to be updated in the state.
2929 node_config (Optional[dict]): Additional configuration for the node.
3030 node_name (str): The unique identifier name for the node, defaulting to "Fetch".
3131 """
3232
33- def __init__ (self , input : str , output : List [str ], node_config : Optional [dict ]= None , node_name : str = "Fetch" ):
33+ def __init__ (self , input : str , output : List [str ], node_config : Optional [dict ] = None , node_name : str = "Fetch" ):
3434 super ().__init__ (node_name , "node" , input , output , 1 )
3535
36- self .headless = True if node_config is None else node_config .get ("headless" , True )
37- self .verbose = False if node_config is None else node_config .get ("verbose" , False )
36+ self .headless = True if node_config is None else node_config .get (
37+ "headless" , True )
38+ self .verbose = False if node_config is None else node_config .get (
39+ "verbose" , False )
3840
3941 def execute (self , state ):
4042 """
@@ -72,6 +74,16 @@ def execute(self, state):
7274 loader = PyPDFLoader (source )
7375 compressed_document = loader .load ()
7476
77+ elif self .input == "csv" :
78+ compressed_document = [Document (page_content = pd .read_csv (source ), metadata = {
79+ "source" : "xml"
80+ })]
81+ elif self .input == "xml" :
82+ with open (source , 'r' , encoding = 'utf-8' ) as f :
83+ data = f .read ()
84+ compressed_document = [Document (page_content = data , metadata = {
85+ "source" : "xml"
86+ })]
7587 elif self .input == "pdf_dir" :
7688 pass
7789
@@ -82,7 +94,7 @@ def execute(self, state):
8294
8395 else :
8496 if self .node_config is not None and self .node_config .get ("endpoint" ) is not None :
85-
97+
8698 loader = AsyncChromiumLoader (
8799 [source ],
88100 proxies = {"http" : self .node_config ["endpoint" ]},
0 commit comments