Merge pull request #70 from VinciGit00/refactoring_nodes_openai

VinciGit00 · web-flow · commit d34df2ff4db2 · 2024-04-19T13:26:08.000+02:00
Refactoring nodes openai
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -1,5 +1,3 @@
-name: Pylint
-
 on: [push]
 
 jobs:
@@ -20,4 +18,13 @@ jobs:
           pip install pylint
           pip install -r requirements.txt
       - name: Analysing the code with pylint
-        run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py 
+        run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py
+      - name: Check Pylint score
+        run: |
+          pylint_score=$(pylint --disable=all --enable=metrics --output-format=text scrapegraphai/**/*.py scrapegraphai/*.py | grep 'Raw metrics' | awk '{print $4}')
+          if (( $(echo "$pylint_score < 8" | bc -l) )); then
+            echo "Pylint score is below 8. Blocking commit."
+            exit 1
+          else
+            echo "Pylint score is acceptable."
+          fi
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scrapegraphai"
-version = "0.2.1"
+version = "0.2.2"
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
 authors = [
     "Marco Vinciguerra <mvincig11@gmail.com>",
diff --git a/scrapegraphai/nodes/__init__.py b/scrapegraphai/nodes/__init__.py
@@ -8,7 +8,7 @@
 from .generate_answer_node import GenerateAnswerNode
 from .parse_node import ParseNode
 from .rag_node import RAGNode
-from .text_to_speech_node import TextToSpeechNode
-from .image_to_text_node import ImageToTextNode
+from .text_to_speech_node_openai import TextToSpeechNode
+from .image_to_text_node_openai import ImageToTextNode
 from .search_internet_node import SearchInternetNode
 from .generate_scraper_node import GenerateScraperNode
diff --git a/scrapegraphai/nodes/image_to_text_node_openai.py b/scrapegraphai/nodes/image_to_text_node_openai.py
diff --git a/scrapegraphai/nodes/text_to_speech_node_openai.py b/scrapegraphai/nodes/text_to_speech_node_openai.py
diff --git a/tests/scrape_plain_text_ollama_test.py b/tests/scrape_plain_text_ollama_test.py
@@ -8,7 +8,9 @@
 
 @pytest.fixture
 def sample_text():
-    # Read the sample text file
+    """
+    Example of text
+    """
     file_name = "inputs/plain_html_example.txt"
     curr_dir = os.path.dirname(os.path.realpath(__file__))
     file_path = os.path.join(curr_dir, file_name)
@@ -21,6 +23,9 @@ def sample_text():
 
 @pytest.fixture
 def graph_config():
+    """
+    Configuration of the graph
+    """
     return {
         "llm": {
             "model": "ollama/mistral",
@@ -36,16 +41,16 @@ def graph_config():
     }
 
 
-def test_scraping_pipeline(sample_text, graph_config):
-    # Create the SmartScraperGraph instance
+def test_scraping_pipeline(sample_text: str, graph_config: dict):
+    """
+    Start of the scraping pipeline
+    """
     smart_scraper_graph = SmartScraperGraph(
         prompt="List me all the news with their description.",
         source=sample_text,
         config=graph_config
     )
 
-    # Run the graph
     result = smart_scraper_graph.run()
 
-    # Check that the result is not empty
     assert result is not None
diff --git a/tests/scrape_xml_ollama_test.py b/tests/scrape_xml_ollama_test.py
@@ -8,7 +8,9 @@
 
 @pytest.fixture
 def sample_xml():
-    # Leggi il file XML di esempio
+    """
+    Example of text
+    """
     file_name = "inputs/books.xml"
     curr_dir = os.path.dirname(os.path.realpath(__file__))
     file_path = os.path.join(curr_dir, file_name)
@@ -21,6 +23,9 @@ def sample_xml():
 
 @pytest.fixture
 def graph_config():
+    """
+    Configuration of the graph
+    """
     return {
         "llm": {
             "model": "ollama/mistral",
@@ -36,16 +41,16 @@ def graph_config():
     }
 
 
-def test_scraping_pipeline(sample_xml, graph_config):
-    # Crea un'istanza di SmartScraperGraph
+def test_scraping_pipeline(sample_xml: str, graph_config: dict):
+    """
+    Start of the scraping pipeline
+    """
     smart_scraper_graph = SmartScraperGraph(
         prompt="List me all the authors, title and genres of the books",
         source=sample_xml,
         config=graph_config
     )
 
-    # Esegui il grafico
     result = smart_scraper_graph.run()
 
-    # Verifica che il risultato non sia vuoto
     assert result is not None
diff --git a/tests/script_generator_test.py b/tests/script_generator_test.py
@@ -1,9 +1,5 @@
 """ 
-<<<<<<< Updated upstream
 Module for making the tests for ScriptGeneratorGraph
-=======
-Test for script generator
->>>>>>> Stashed changes
 """
 import pytest
 from scrapegraphai.graphs import ScriptCreatorGraph
@@ -12,6 +8,9 @@
 
 @pytest.fixture
 def graph_config():
+    """
+    Configuration of the graph
+    """
     return {
         "llm": {
             "model": "ollama/mistral",
@@ -29,28 +28,24 @@ def graph_config():
     }
 
 
-def test_script_creator_graph(graph_config):
-    # Create the ScriptCreatorGraph instance
+def test_script_creator_graph(graph_config: dict):
+    """
+    Start of the scraping pipeline
+    """
     smart_scraper_graph = ScriptCreatorGraph(
         prompt="List me all the news with their description.",
         source="https://perinim.github.io/projects",
         config=graph_config
     )
 
-    # Run the graph
     result = smart_scraper_graph.run()
 
-    # Check that the result is not empty
     assert result is not None
 
-    # Get graph execution info
     graph_exec_info = smart_scraper_graph.get_execution_info()
 
-    # Check that execution info is not empty
     assert graph_exec_info is not None
 
-    # Check that execution info is a dictionary
     assert isinstance(graph_exec_info, dict)
 
-    # Print execution info
     print(prettify_exec_info(graph_exec_info))
diff --git a/tests/smart_scraper_ollama_test.py b/tests/smart_scraper_ollama_test.py
@@ -7,6 +7,9 @@
 
 @pytest.fixture
 def graph_config():
+    """
+    Configuration of the graph
+    """
     return {
         "llm": {
             "model": "ollama/mistral",
@@ -22,34 +25,33 @@ def graph_config():
     }
 
 
-def test_scraping_pipeline(graph_config):
-    # Crea un'istanza di SmartScraperGraph
+def test_scraping_pipeline(graph_config: dict):
+    """
+    Start of the scraping pipeline
+    """
     smart_scraper_graph = SmartScraperGraph(
         prompt="List me all the news with their description.",
         source="https://perinim.github.io/projects",
         config=graph_config
     )
 
-    # Esegui il grafico
     result = smart_scraper_graph.run()
 
-    # Verifica che il risultato non sia vuoto
     assert result is not None
 
 
-def test_get_execution_info(graph_config):
-    # Crea un'istanza di SmartScraperGraph
+def test_get_execution_info(graph_config: dict):
+    """
+    Get the execution info
+    """
     smart_scraper_graph = SmartScraperGraph(
         prompt="List me all the news with their description.",
         source="https://perinim.github.io/projects",
         config=graph_config
     )
 
-    # Esegui il grafico
     smart_scraper_graph.run()
 
-    # Ottieni le informazioni sull'esecuzione del grafico
     graph_exec_info = smart_scraper_graph.get_execution_info()
 
-    # Verifica che le informazioni sull'esecuzione non siano vuote
     assert graph_exec_info is not None