Skip to content

Commit d34df2f

Browse files
authored
Merge pull request #70 from VinciGit00/refactoring_nodes_openai
Refactoring nodes openai
2 parents 50ce67d + 8449e1d commit d34df2f

File tree

9 files changed

+51
-37
lines changed

9 files changed

+51
-37
lines changed

.github/workflows/pylint.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
name: Pylint
2-
31
on: [push]
42

53
jobs:
@@ -20,4 +18,13 @@ jobs:
2018
pip install pylint
2119
pip install -r requirements.txt
2220
- name: Analysing the code with pylint
23-
run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py
21+
run: pylint --disable=C0114,C0115,C0116 --exit-zero scrapegraphai/**/*.py scrapegraphai/*.py
22+
- name: Check Pylint score
23+
run: |
24+
pylint_score=$(pylint --disable=all --enable=metrics --output-format=text scrapegraphai/**/*.py scrapegraphai/*.py | grep 'Raw metrics' | awk '{print $4}')
25+
if (( $(echo "$pylint_score < 8" | bc -l) )); then
26+
echo "Pylint score is below 8. Blocking commit."
27+
exit 1
28+
else
29+
echo "Pylint score is acceptable."
30+
fi

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "scrapegraphai"
3-
version = "0.2.1"
3+
version = "0.2.2"
44
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
55
authors = [
66
"Marco Vinciguerra <[email protected]>",

scrapegraphai/nodes/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from .generate_answer_node import GenerateAnswerNode
99
from .parse_node import ParseNode
1010
from .rag_node import RAGNode
11-
from .text_to_speech_node import TextToSpeechNode
12-
from .image_to_text_node import ImageToTextNode
11+
from .text_to_speech_node_openai import TextToSpeechNode
12+
from .image_to_text_node_openai import ImageToTextNode
1313
from .search_internet_node import SearchInternetNode
1414
from .generate_scraper_node import GenerateScraperNode

tests/scrape_plain_text_ollama_test.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
@pytest.fixture
1010
def sample_text():
11-
# Read the sample text file
11+
"""
12+
Example of text
13+
"""
1214
file_name = "inputs/plain_html_example.txt"
1315
curr_dir = os.path.dirname(os.path.realpath(__file__))
1416
file_path = os.path.join(curr_dir, file_name)
@@ -21,6 +23,9 @@ def sample_text():
2123

2224
@pytest.fixture
2325
def graph_config():
26+
"""
27+
Configuration of the graph
28+
"""
2429
return {
2530
"llm": {
2631
"model": "ollama/mistral",
@@ -36,16 +41,16 @@ def graph_config():
3641
}
3742

3843

39-
def test_scraping_pipeline(sample_text, graph_config):
40-
# Create the SmartScraperGraph instance
44+
def test_scraping_pipeline(sample_text: str, graph_config: dict):
45+
"""
46+
Start of the scraping pipeline
47+
"""
4148
smart_scraper_graph = SmartScraperGraph(
4249
prompt="List me all the news with their description.",
4350
source=sample_text,
4451
config=graph_config
4552
)
4653

47-
# Run the graph
4854
result = smart_scraper_graph.run()
4955

50-
# Check that the result is not empty
5156
assert result is not None

tests/scrape_xml_ollama_test.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88

99
@pytest.fixture
1010
def sample_xml():
11-
# Leggi il file XML di esempio
11+
"""
12+
Example of text
13+
"""
1214
file_name = "inputs/books.xml"
1315
curr_dir = os.path.dirname(os.path.realpath(__file__))
1416
file_path = os.path.join(curr_dir, file_name)
@@ -21,6 +23,9 @@ def sample_xml():
2123

2224
@pytest.fixture
2325
def graph_config():
26+
"""
27+
Configuration of the graph
28+
"""
2429
return {
2530
"llm": {
2631
"model": "ollama/mistral",
@@ -36,16 +41,16 @@ def graph_config():
3641
}
3742

3843

39-
def test_scraping_pipeline(sample_xml, graph_config):
40-
# Crea un'istanza di SmartScraperGraph
44+
def test_scraping_pipeline(sample_xml: str, graph_config: dict):
45+
"""
46+
Start of the scraping pipeline
47+
"""
4148
smart_scraper_graph = SmartScraperGraph(
4249
prompt="List me all the authors, title and genres of the books",
4350
source=sample_xml,
4451
config=graph_config
4552
)
4653

47-
# Esegui il grafico
4854
result = smart_scraper_graph.run()
4955

50-
# Verifica che il risultato non sia vuoto
5156
assert result is not None

tests/script_generator_test.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
"""
2-
<<<<<<< Updated upstream
32
Module for making the tests for ScriptGeneratorGraph
4-
=======
5-
Test for script generator
6-
>>>>>>> Stashed changes
73
"""
84
import pytest
95
from scrapegraphai.graphs import ScriptCreatorGraph
@@ -12,6 +8,9 @@
128

139
@pytest.fixture
1410
def graph_config():
11+
"""
12+
Configuration of the graph
13+
"""
1514
return {
1615
"llm": {
1716
"model": "ollama/mistral",
@@ -29,28 +28,24 @@ def graph_config():
2928
}
3029

3130

32-
def test_script_creator_graph(graph_config):
33-
# Create the ScriptCreatorGraph instance
31+
def test_script_creator_graph(graph_config: dict):
32+
"""
33+
Start of the scraping pipeline
34+
"""
3435
smart_scraper_graph = ScriptCreatorGraph(
3536
prompt="List me all the news with their description.",
3637
source="https://perinim.github.io/projects",
3738
config=graph_config
3839
)
3940

40-
# Run the graph
4141
result = smart_scraper_graph.run()
4242

43-
# Check that the result is not empty
4443
assert result is not None
4544

46-
# Get graph execution info
4745
graph_exec_info = smart_scraper_graph.get_execution_info()
4846

49-
# Check that execution info is not empty
5047
assert graph_exec_info is not None
5148

52-
# Check that execution info is a dictionary
5349
assert isinstance(graph_exec_info, dict)
5450

55-
# Print execution info
5651
print(prettify_exec_info(graph_exec_info))

tests/smart_scraper_ollama_test.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77

88
@pytest.fixture
99
def graph_config():
10+
"""
11+
Configuration of the graph
12+
"""
1013
return {
1114
"llm": {
1215
"model": "ollama/mistral",
@@ -22,34 +25,33 @@ def graph_config():
2225
}
2326

2427

25-
def test_scraping_pipeline(graph_config):
26-
# Crea un'istanza di SmartScraperGraph
28+
def test_scraping_pipeline(graph_config: dict):
29+
"""
30+
Start of the scraping pipeline
31+
"""
2732
smart_scraper_graph = SmartScraperGraph(
2833
prompt="List me all the news with their description.",
2934
source="https://perinim.github.io/projects",
3035
config=graph_config
3136
)
3237

33-
# Esegui il grafico
3438
result = smart_scraper_graph.run()
3539

36-
# Verifica che il risultato non sia vuoto
3740
assert result is not None
3841

3942

40-
def test_get_execution_info(graph_config):
41-
# Crea un'istanza di SmartScraperGraph
43+
def test_get_execution_info(graph_config: dict):
44+
"""
45+
Get the execution info
46+
"""
4247
smart_scraper_graph = SmartScraperGraph(
4348
prompt="List me all the news with their description.",
4449
source="https://perinim.github.io/projects",
4550
config=graph_config
4651
)
4752

48-
# Esegui il grafico
4953
smart_scraper_graph.run()
5054

51-
# Ottieni le informazioni sull'esecuzione del grafico
5255
graph_exec_info = smart_scraper_graph.get_execution_info()
5356

54-
# Verifica che le informazioni sull'esecuzione non siano vuote
5557
assert graph_exec_info is not None

0 commit comments

Comments
 (0)