|
1 | | -""" |
2 | | -Module for scraping json documents |
| 1 | +""" |
| 2 | +Module for scraping JSON documents |
3 | 3 | """ |
4 | 4 | import os |
| 5 | +import json |
5 | 6 | import pytest |
| 7 | + |
6 | 8 | from scrapegraphai.graphs import JSONScraperGraph |
7 | 9 |
|
| 10 | +# Load configuration from a JSON file |
| 11 | +CONFIG_FILE = "config.json" |
| 12 | +with open(CONFIG_FILE, "r") as f: |
| 13 | + CONFIG = json.load(f) |
8 | 14 |
|
| 15 | +# Fixture to read the sample JSON file |
9 | 16 | @pytest.fixture |
10 | 17 | def sample_json(): |
11 | 18 | """ |
12 | | - Example of text |
| 19 | + Read the sample JSON file |
13 | 20 | """ |
14 | | - file_name = "inputs/example.json" |
15 | | - curr_dir = os.path.dirname(os.path.realpath(__file__)) |
16 | | - file_path = os.path.join(curr_dir, file_name) |
17 | | - |
18 | | - with open(file_path, 'r', encoding="utf-8") as file: |
| 21 | + file_path = os.path.join(os.path.dirname(__file__), "inputs", "example.json") |
| 22 | + with open(file_path, "r", encoding="utf-8") as file: |
19 | 23 | text = file.read() |
20 | | - |
21 | 24 | return text |
22 | 25 |
|
23 | | - |
24 | | -@pytest.fixture |
25 | | -def graph_config(): |
| 26 | +# Parametrized fixture to load graph configurations |
| 27 | +@pytest.fixture(params=CONFIG["graph_configs"]) |
| 28 | +def graph_config(request): |
26 | 29 | """ |
27 | | - Configuration of the graph |
| 30 | + Load graph configuration |
28 | 31 | """ |
29 | | - return { |
30 | | - "llm": { |
31 | | - "model": "ollama/mistral", |
32 | | - "temperature": 0, |
33 | | - "format": "json", |
34 | | - "base_url": "http://localhost:11434", |
35 | | - }, |
36 | | - "embeddings": { |
37 | | - "model": "ollama/nomic-embed-text", |
38 | | - "temperature": 0, |
39 | | - "base_url": "http://localhost:11434", |
40 | | - } |
41 | | - } |
42 | | - |
| 32 | + return request.param |
43 | 33 |
|
44 | | -def test_scraping_pipeline(sample_json: str, graph_config: dict): |
| 34 | +# Test function for the scraping pipeline |
| 35 | +def test_scraping_pipeline(sample_json, graph_config): |
45 | 36 | """ |
46 | | - Start of the scraping pipeline |
| 37 | + Test the scraping pipeline |
47 | 38 | """ |
| 39 | + expected_titles = ["Title 1", "Title 2", "Title 3"] # Replace with expected titles |
| 40 | + |
48 | 41 | smart_scraper_graph = JSONScraperGraph( |
49 | 42 | prompt="List me all the titles", |
50 | 43 | source=sample_json, |
51 | 44 | config=graph_config |
52 | 45 | ) |
53 | | - |
54 | 46 | result = smart_scraper_graph.run() |
55 | 47 |
|
56 | 48 | assert result is not None |
| 49 | + assert isinstance(result, list) |
| 50 | + assert sorted(result) == sorted(expected_titles) |
0 commit comments