|
| 1 | +""" |
| 2 | +Module for the tests |
| 3 | +""" |
| 4 | +import os |
1 | 5 | import pytest |
2 | | -from scrapegraphai.models import Ollama |
3 | | -from scrapegraphai.nodes import RobotsNode |
4 | | -from unittest.mock import patch, MagicMock |
| 6 | +from scrapegraphai.graphs import SmartScraperGraph |
5 | 7 |
|
6 | 8 | @pytest.fixture |
7 | | -def setup(): |
| 9 | +def sample_text(): |
8 | 10 | """ |
9 | | - Setup the RobotsNode and initial state for testing. |
| 11 | + Example of text fixture. |
10 | 12 | """ |
11 | | - # Define the configuration for the graph |
12 | | - graph_config = { |
| 13 | + file_name = "inputs/plain_html_example.txt" |
| 14 | + curr_dir = os.path.dirname(os.path.realpath(__file__)) |
| 15 | + file_path = os.path.join(curr_dir, file_name) |
| 16 | + |
| 17 | + with open(file_path, 'r', encoding="utf-8") as file: |
| 18 | + text = file.read() |
| 19 | + |
| 20 | + return text |
| 21 | + |
| 22 | +@pytest.fixture |
| 23 | +def graph_config(): |
| 24 | + """ |
| 25 | + Configuration of the graph fixture. |
| 26 | + """ |
| 27 | + return { |
13 | 28 | "llm": { |
14 | | - "model_name": "ollama/llama3", |
| 29 | + "model": "ollama/mistral", |
15 | 30 | "temperature": 0, |
16 | | - "streaming": True |
| 31 | + "format": "json", |
| 32 | + "base_url": "http://localhost:11434", |
17 | 33 | }, |
18 | | - } |
19 | | - |
20 | | - # Instantiate the LLM model with the configuration |
21 | | - llm_model = Ollama(graph_config["llm"]) |
22 | | - |
23 | | - # Define the RobotsNode with necessary configurations |
24 | | - robots_node = RobotsNode( |
25 | | - input="url", |
26 | | - output=["is_scrapable"], |
27 | | - node_config={ |
28 | | - "llm_model": llm_model, |
29 | | - "headless": False |
| 34 | + "embeddings": { |
| 35 | + "model": "ollama/nomic-embed-text", |
| 36 | + "temperature": 0, |
| 37 | + "base_url": "http://localhost:11434", |
30 | 38 | } |
31 | | - ) |
32 | | - |
33 | | - # Define the initial state for the node |
34 | | - initial_state = { |
35 | | - "url": "https://twitter.com/home" |
36 | 39 | } |
37 | 40 |
|
38 | | - return robots_node, initial_state |
39 | | - |
40 | | -def test_robots_node(setup): |
| 41 | +def test_scraping_pipeline(sample_text, graph_config): |
41 | 42 | """ |
42 | | - Test the RobotsNode execution. |
| 43 | + Test the SmartScraperGraph scraping pipeline. |
43 | 44 | """ |
44 | | - robots_node, initial_state = setup |
45 | | - |
46 | | - # Patch the execute method to avoid actual network calls and return a mock response |
47 | | - with patch.object(RobotsNode, 'execute', return_value={"is_scrapable": True}) as mock_execute: |
48 | | - result = robots_node.execute(initial_state) |
| 45 | + smart_scraper_graph = SmartScraperGraph( |
| 46 | + prompt="List me all the news with their description.", |
| 47 | + source=sample_text, |
| 48 | + config=graph_config |
| 49 | + ) |
49 | 50 |
|
50 | | - # Check if the result is not None |
51 | | - assert result is not None |
52 | | - # Additional assertion to check the returned value |
53 | | - assert "is_scrapable" in result |
54 | | - assert isinstance(result["is_scrapable"], bool) |
55 | | - # Ensure the execute method was called once |
56 | | - mock_execute.assert_called_once_with(initial_state) |
| 51 | + result = smart_scraper_graph.run() |
| 52 | + |
| 53 | + assert result is not None |
| 54 | + # Additional assertions to check the structure of the result |
| 55 | + assert isinstance(result, dict) # Assuming the result is a dictionary |
| 56 | + assert "news" in result # Assuming the result should contain a key "news" |
| 57 | + assert "is_scrapable" in result |
| 58 | + assert isinstance(result["is_scrapable"], bool) |
| 59 | + assert result["is_scrapable"] is True |
| 60 | + # Ensure the execute method was called once |
| 61 | + mock_execute.assert_called_once_with(initial_state) |
0 commit comments