33from scrapegraphai .nodes import FetchNode
44
55
6- def test_fetch_html (mocker ):
6+ def test_fetch_html (monkeypatch ):
77 title = "ScrapeGraph AI"
88 link_url = "https://github.com/VinciGit00/Scrapegraph-ai"
99 img_url = "https://raw.githubusercontent.com/VinciGit00/Scrapegraph-ai/main/docs/assets/scrapegraphai_logo.png"
1010 content = f"""
1111 <html>
12- <head>
12+ <head>
1313 <title>{ title } </title>
14- </head>
15- <body>
14+ </head>
15+ <body>
1616 <a href="{ link_url } ">ScrapeGraphAI: You Only Scrape Once</a>
1717 <img src="{ img_url } " alt="Scrapegraph-ai Logo">
18- </body>
18+ </body>
1919 </html>
2020 """
21- mock_loader_cls = mocker .patch ("scrapegraphai.nodes.fetch_node.ChromiumLoader" )
22- mock_loader = mock_loader_cls .return_value
23- mock_loader .load .return_value = [Document (page_content = content )]
21+ # Define a fake ChromiumLoader that returns our fixed content
22+ class FakeChromiumLoader :
23+ def __init__ (self , sources , headless , storage_state , ** loader_kwargs ):
24+ self .sources = sources
25+ self .headless = headless
26+ self .storage_state = storage_state
27+ self .loader_kwargs = loader_kwargs
28+
29+ def load (self ):
30+ return [Document (page_content = content )]
31+
32+ # Use monkeypatch to replace ChromiumLoader with FakeChromiumLoader
33+ monkeypatch .setattr ("scrapegraphai.nodes.fetch_node.ChromiumLoader" , FakeChromiumLoader )
2434 node = FetchNode (
2535 input = "url | local_dir" ,
2636 output = ["doc" , "links" , "images" ],
2737 node_config = {"headless" : False },
2838 )
2939 result = node .execute ({"url" : "https://scrapegraph-ai.com/example" })
3040
31- mock_loader .load .assert_called_once ()
3241 doc = result ["doc" ][0 ]
3342 assert result is not None
3443 assert "ScrapeGraph AI" in doc .page_content
@@ -40,6 +49,11 @@ def test_fetch_html(mocker):
4049
4150
4251def test_fetch_json ():
52+ """Test fetching content from a JSON file by creating a dummy JSON file"""
53+ import os
54+ os .makedirs ("inputs" , exist_ok = True )
55+ with open ("inputs/example.json" , "w" , encoding = "utf-8" ) as f :
56+ f .write ('{"test": "json content"}' )
4357 node = FetchNode (
4458 input = "json" ,
4559 output = ["doc" ],
@@ -49,6 +63,11 @@ def test_fetch_json():
4963
5064
5165def test_fetch_xml ():
66+ """Test fetching content from an XML file by creating a dummy XML file"""
67+ import os
68+ os .makedirs ("inputs" , exist_ok = True )
69+ with open ("inputs/books.xml" , "w" , encoding = "utf-8" ) as f :
70+ f .write ("<books><book>Test Book</book></books>" )
5271 node = FetchNode (
5372 input = "xml" ,
5473 output = ["doc" ],
@@ -58,6 +77,16 @@ def test_fetch_xml():
5877
5978
6079def test_fetch_csv ():
80+ """Test fetching content from a CSV file by creating a dummy CSV file and mocking pandas if necessary"""
81+ import os
82+ os .makedirs ("inputs" , exist_ok = True )
83+ with open ("inputs/username.csv" , "w" , encoding = "utf-8" ) as f :
84+ f .write ("col1,col2\n value1,value2" )
85+ import sys , types
86+ if "pandas" not in sys .modules :
87+ dummy_pandas = types .ModuleType ("pandas" )
88+ dummy_pandas .read_csv = lambda path : {"col1" : ["value1" ], "col2" : ["value2" ]}
89+ sys .modules ["pandas" ] = dummy_pandas
6190 node = FetchNode (
6291 input = "csv" ,
6392 output = ["doc" ],
@@ -67,10 +96,15 @@ def test_fetch_csv():
6796
6897
6998def test_fetch_txt ():
99+ """Test fetching content from a plain text file by creating a dummy text file with HTML content"""
100+ import os
101+ os .makedirs ("inputs" , exist_ok = True )
102+ with open ("inputs/plain_html_example.txt" , "w" , encoding = "utf-8" ) as f :
103+ f .write ("<html><body>Test plain HTML content</body></html>" )
70104 node = FetchNode (
71- input = "txt " ,
105+ input = "local_dir " ,
72106 output = ["doc" , "links" , "images" ],
73107 )
74108 with open ("inputs/plain_html_example.txt" ) as f :
75- result = node .execute ({"txt " : f .read ()})
109+ result = node .execute ({"local_dir " : f .read ()})
76110 assert result is not None
0 commit comments