Skip to content

Commit 40747c3

Browse files
authored
Merge branch 'main' into main
2 parents c927145 + 58086ee commit 40747c3

File tree

4 files changed

+95
-80
lines changed

4 files changed

+95
-80
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,15 @@
1+
## [1.6.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.7...v1.6.0) (2024-06-09)
2+
3+
4+
### Features
5+
6+
* Add tests for RobotsNode and update test setup ([dedfa2e](https://github.com/VinciGit00/Scrapegraph-ai/commit/dedfa2eaf02b7e9b68a116515053c1daae6e4a31))
7+
8+
9+
### Test
10+
11+
* Enhance JSON scraping pipeline test ([d845a1b](https://github.com/VinciGit00/Scrapegraph-ai/commit/d845a1ba7d6e7f7574b92b51b6d5326bbfb3d1c6))
12+
113
## [1.5.7](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.6...v1.5.7) (2024-06-06)
214

315

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "scrapegraphai"
33

44

5-
version = "1.5.7"
5+
version = "1.6.0"
66

77

88
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
Lines changed: 35 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,47 +1,45 @@
1-
"""
1+
"""
22
Module for making the tests for ScriptGeneratorGraph
33
"""
44
import pytest
55
from scrapegraphai.graphs import ScriptCreatorGraph
66
from scrapegraphai.utils import prettify_exec_info
77

8-
98
@pytest.fixture
109
def graph_config():
11-
"""
12-
Configuration of the graph
13-
"""
14-
return {
15-
"llm": {
16-
"model": "ollama/mistral",
17-
"temperature": 0,
18-
"format": "json",
19-
"base_url": "http://localhost:11434",
20-
"library": "beautifoulsoup",
21-
},
22-
"embeddings": {
23-
"model": "ollama/nomic-embed-text",
24-
"temperature": 0,
25-
"base_url": "http://localhost:11434",
26-
},
27-
"library": "beautifoulsoup"
28-
}
29-
10+
"""
11+
Configuration of the graph
12+
"""
13+
return {
14+
"llm": {
15+
"model": "ollama/mistral",
16+
"temperature": 0,
17+
"format": "json",
18+
"base_url": "http://localhost:11434",
19+
"library": "beautifulsoup",
20+
},
21+
"embeddings": {
22+
"model": "ollama/nomic-embed-text",
23+
"temperature": 0,
24+
"base_url": "http://localhost:11434",
25+
},
26+
"library": "beautifulsoup"
27+
}
3028

3129
def test_script_creator_graph(graph_config: dict):
32-
"""
33-
Start of the scraping pipeline
34-
"""
35-
smart_scraper_graph = ScriptCreatorGraph(
36-
prompt="List me all the news with their description.",
37-
source="https://perinim.github.io/projects",
38-
config=graph_config
39-
)
40-
41-
result = smart_scraper_graph.run()
42-
43-
assert result is not None
44-
45-
graph_exec_info = smart_scraper_graph.get_execution_info()
46-
47-
assert graph_exec_info is not None
30+
"""
31+
Test the ScriptCreatorGraph
32+
"""
33+
smart_scraper_graph = ScriptCreatorGraph(
34+
prompt="List me all the news with their description.",
35+
source="https://perinim.github.io/projects",
36+
config=graph_config
37+
)
38+
result = smart_scraper_graph.run()
39+
assert result is not None, "ScriptCreatorGraph execution failed to produce a result."
40+
graph_exec_info = smart_scraper_graph.get_execution_info()
41+
assert graph_exec_info is not None, "ScriptCreatorGraph execution info is None."
42+
prettified_exec_info = prettify_exec_info(graph_exec_info)
43+
print(prettified_exec_info)
44+
45+
# Perform additional assertions on the result or execution info as needed

tests/nodes/robot_node_test.py

Lines changed: 47 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,61 @@
1+
"""
2+
Module for the tests
3+
"""
4+
import os
15
import pytest
2-
from scrapegraphai.models import Ollama
3-
from scrapegraphai.nodes import RobotsNode
4-
from unittest.mock import patch, MagicMock
6+
from scrapegraphai.graphs import SmartScraperGraph
57

68
@pytest.fixture
7-
def setup():
9+
def sample_text():
810
"""
9-
Setup the RobotsNode and initial state for testing.
11+
Example of text fixture.
1012
"""
11-
# Define the configuration for the graph
12-
graph_config = {
13+
file_name = "inputs/plain_html_example.txt"
14+
curr_dir = os.path.dirname(os.path.realpath(__file__))
15+
file_path = os.path.join(curr_dir, file_name)
16+
17+
with open(file_path, 'r', encoding="utf-8") as file:
18+
text = file.read()
19+
20+
return text
21+
22+
@pytest.fixture
23+
def graph_config():
24+
"""
25+
Configuration of the graph fixture.
26+
"""
27+
return {
1328
"llm": {
14-
"model_name": "ollama/llama3",
29+
"model": "ollama/mistral",
1530
"temperature": 0,
16-
"streaming": True
31+
"format": "json",
32+
"base_url": "http://localhost:11434",
1733
},
18-
}
19-
20-
# Instantiate the LLM model with the configuration
21-
llm_model = Ollama(graph_config["llm"])
22-
23-
# Define the RobotsNode with necessary configurations
24-
robots_node = RobotsNode(
25-
input="url",
26-
output=["is_scrapable"],
27-
node_config={
28-
"llm_model": llm_model,
29-
"headless": False
34+
"embeddings": {
35+
"model": "ollama/nomic-embed-text",
36+
"temperature": 0,
37+
"base_url": "http://localhost:11434",
3038
}
31-
)
32-
33-
# Define the initial state for the node
34-
initial_state = {
35-
"url": "https://twitter.com/home"
3639
}
3740

38-
return robots_node, initial_state
39-
40-
def test_robots_node(setup):
41+
def test_scraping_pipeline(sample_text, graph_config):
4142
"""
42-
Test the RobotsNode execution.
43+
Test the SmartScraperGraph scraping pipeline.
4344
"""
44-
robots_node, initial_state = setup
45-
46-
# Patch the execute method to avoid actual network calls and return a mock response
47-
with patch.object(RobotsNode, 'execute', return_value={"is_scrapable": True}) as mock_execute:
48-
result = robots_node.execute(initial_state)
45+
smart_scraper_graph = SmartScraperGraph(
46+
prompt="List me all the news with their description.",
47+
source=sample_text,
48+
config=graph_config
49+
)
4950

50-
# Check if the result is not None
51-
assert result is not None
52-
# Additional assertion to check the returned value
53-
assert "is_scrapable" in result
54-
assert isinstance(result["is_scrapable"], bool)
55-
# Ensure the execute method was called once
56-
mock_execute.assert_called_once_with(initial_state)
51+
result = smart_scraper_graph.run()
52+
53+
assert result is not None
54+
# Additional assertions to check the structure of the result
55+
assert isinstance(result, dict) # Assuming the result is a dictionary
56+
assert "news" in result # Assuming the result should contain a key "news"
57+
assert "is_scrapable" in result
58+
assert isinstance(result["is_scrapable"], bool)
59+
assert result["is_scrapable"] is True
60+
# Ensure the execute method was called once
61+
mock_execute.assert_called_once_with(initial_state)

0 commit comments

Comments
 (0)