Merge branch 'main' into main

tejhande · web-flow · commit 40747c3e01cd · 2024-06-10T13:36:44.000+05:30
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,15 @@
+## [1.6.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.7...v1.6.0) (2024-06-09)
+
+
+### Features
+
+* Add tests for RobotsNode and update test setup ([dedfa2e](https://github.com/VinciGit00/Scrapegraph-ai/commit/dedfa2eaf02b7e9b68a116515053c1daae6e4a31))
+
+
+### Test
+
+* Enhance JSON scraping pipeline test ([d845a1b](https://github.com/VinciGit00/Scrapegraph-ai/commit/d845a1ba7d6e7f7574b92b51b6d5326bbfb3d1c6))
+
 ## [1.5.7](https://github.com/VinciGit00/Scrapegraph-ai/compare/v1.5.6...v1.5.7) (2024-06-06)
 
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "scrapegraphai"
 
 
-version = "1.5.7"
+version = "1.6.0"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
diff --git a/tests/graphs/script_generator_test.py b/tests/graphs/script_generator_test.py
@@ -1,47 +1,45 @@
-""" 
+"""
 Module for making the tests for ScriptGeneratorGraph
 """
 import pytest
 from scrapegraphai.graphs import ScriptCreatorGraph
 from scrapegraphai.utils import prettify_exec_info
 
-
 @pytest.fixture
 def graph_config():
-    """
-    Configuration of the graph
-    """
-    return {
-        "llm": {
-            "model": "ollama/mistral",
-            "temperature": 0,
-            "format": "json",
-            "base_url": "http://localhost:11434",
-            "library": "beautifoulsoup",
-        },
-        "embeddings": {
-            "model": "ollama/nomic-embed-text",
-            "temperature": 0,
-            "base_url": "http://localhost:11434",
-        },
-        "library": "beautifoulsoup"
-    }
-
+   """
+   Configuration of the graph
+   """
+   return {
+       "llm": {
+           "model": "ollama/mistral",
+           "temperature": 0,
+           "format": "json",
+           "base_url": "http://localhost:11434",
+           "library": "beautifulsoup",
+       },
+       "embeddings": {
+           "model": "ollama/nomic-embed-text",
+           "temperature": 0,
+           "base_url": "http://localhost:11434",
+       },
+       "library": "beautifulsoup"
+   }
 
 def test_script_creator_graph(graph_config: dict):
-    """
-    Start of the scraping pipeline
-    """
-    smart_scraper_graph = ScriptCreatorGraph(
-        prompt="List me all the news with their description.",
-        source="https://perinim.github.io/projects",
-        config=graph_config
-    )
-
-    result = smart_scraper_graph.run()
-
-    assert result is not None
-
-    graph_exec_info = smart_scraper_graph.get_execution_info()
-
-    assert graph_exec_info is not None
+   """
+   Test the ScriptCreatorGraph
+   """
+   smart_scraper_graph = ScriptCreatorGraph(
+       prompt="List me all the news with their description.",
+       source="https://perinim.github.io/projects",
+       config=graph_config
+   )
+   result = smart_scraper_graph.run()
+   assert result is not None, "ScriptCreatorGraph execution failed to produce a result."
+   graph_exec_info = smart_scraper_graph.get_execution_info()
+   assert graph_exec_info is not None, "ScriptCreatorGraph execution info is None."
+   prettified_exec_info = prettify_exec_info(graph_exec_info)
+   print(prettified_exec_info)
+
+   # Perform additional assertions on the result or execution info as needed
diff --git a/tests/nodes/robot_node_test.py b/tests/nodes/robot_node_test.py
@@ -1,56 +1,61 @@
+"""
+Module for the tests
+"""
+import os
 import pytest
-from scrapegraphai.models import Ollama
-from scrapegraphai.nodes import RobotsNode
-from unittest.mock import patch, MagicMock
+from scrapegraphai.graphs import SmartScraperGraph
 
 @pytest.fixture
-def setup():
+def sample_text():
     """
-    Setup the RobotsNode and initial state for testing.
+    Example of text fixture.
     """
-    # Define the configuration for the graph
-    graph_config = {
+    file_name = "inputs/plain_html_example.txt"
+    curr_dir = os.path.dirname(os.path.realpath(__file__))
+    file_path = os.path.join(curr_dir, file_name)
+
+    with open(file_path, 'r', encoding="utf-8") as file:
+        text = file.read()
+
+    return text
+
+@pytest.fixture
+def graph_config():
+    """
+    Configuration of the graph fixture.
+    """
+    return {
         "llm": {
-            "model_name": "ollama/llama3",
+            "model": "ollama/mistral",
             "temperature": 0,
-            "streaming": True
+            "format": "json",
+            "base_url": "http://localhost:11434",
         },
-    }
-
-    # Instantiate the LLM model with the configuration
-    llm_model = Ollama(graph_config["llm"])
-
-    # Define the RobotsNode with necessary configurations
-    robots_node = RobotsNode(
-        input="url",
-        output=["is_scrapable"],
-        node_config={
-            "llm_model": llm_model,
-            "headless": False
+        "embeddings": {
+            "model": "ollama/nomic-embed-text",
+            "temperature": 0,
+            "base_url": "http://localhost:11434",
         }
-    )
-
-    # Define the initial state for the node
-    initial_state = {
-        "url": "https://twitter.com/home"
     }
 
-    return robots_node, initial_state
-
-def test_robots_node(setup):
+def test_scraping_pipeline(sample_text, graph_config):
     """
-    Test the RobotsNode execution.
+    Test the SmartScraperGraph scraping pipeline.
     """
-    robots_node, initial_state = setup
-
-    # Patch the execute method to avoid actual network calls and return a mock response
-    with patch.object(RobotsNode, 'execute', return_value={"is_scrapable": True}) as mock_execute:
-        result = robots_node.execute(initial_state)
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all the news with their description.",
+        source=sample_text,
+        config=graph_config
+    )
 
-        # Check if the result is not None
-        assert result is not None
-        # Additional assertion to check the returned value
-        assert "is_scrapable" in result
-        assert isinstance(result["is_scrapable"], bool)
-        # Ensure the execute method was called once
-        mock_execute.assert_called_once_with(initial_state)
+    result = smart_scraper_graph.run()
+
+    assert result is not None
+    # Additional assertions to check the structure of the result
+    assert isinstance(result, dict)  # Assuming the result is a dictionary
+    assert "news" in result  # Assuming the result should contain a key "news"
+    assert "is_scrapable" in result
+    assert isinstance(result["is_scrapable"], bool)
+    assert result["is_scrapable"] is True
+    # Ensure the execute method was called once
+    mock_execute.assert_called_once_with(initial_state)