Skip to content

Commit 8cece1d

Browse files
authored
Merge branch 'pre/beta' into refactoring-of-abstract-graph
2 parents cef2fdb + de1ec25 commit 8cece1d

File tree

71 files changed

+268
-241
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+268
-241
lines changed

CHANGELOG.md

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
## [1.14.0-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.3...v1.14.0-beta.1) (2024-08-11)
2+
3+
4+
### Features
5+
6+
* add refactoring of default temperature ([6c3b37a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6c3b37ab001b80c09ea9ffb56d4c3df338e33a7a))
7+
8+
9+
### Bug Fixes
10+
11+
* broken node ([1272273](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/127227349915deeb0dede34aa575ad269ed7cbe3))
12+
* merge_anwser prompt import ([f17cef9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/f17cef94bb39349d40cc520d93b51ac4e629db32))
13+
14+
15+
### CI
16+
17+
* **release:** 1.13.0-beta.8 [skip ci] ([b470d97](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/b470d974cf3fdb3a75ead46fceb8c21525e2e616))
18+
* **release:** 1.13.0-beta.9 [skip ci] ([d4c1a1c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/d4c1a1c58a54740ff50aa87b1d1d3500b61ea088))
19+
120
## [1.13.3](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.2...v1.13.3) (2024-08-10)
221

322

@@ -25,6 +44,8 @@
2544
* conditional node ([ce00345](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/ce003454953e5785d4746223c252de38cd5d07ea))
2645

2746
## [1.13.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.12.2...v1.13.0) (2024-08-09)
47+
## [1.13.0-beta.9](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.8...v1.13.0-beta.9) (2024-08-10)
48+
2849

2950

3051
### Features
@@ -65,6 +86,15 @@
6586
* **release:** 1.13.0-beta.5 [skip ci] ([2eba73b](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/2eba73b784ee443260117e98ab7c943934b3018d)), closes [#513](https://github.com/ScrapeGraphAI/Scrapegraph-ai/issues/513)
6687
* **release:** 1.13.0-beta.6 [skip ci] ([e75b574](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e75b574b67040e127599da9ee1b0eee13d234cb9))
6788
* **release:** 1.13.0-beta.7 [skip ci] ([6e56925](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6e56925355c424edae290c70fd98646ab5f420ee))
89+
* add refactoring of default temperature ([6c3b37a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/6c3b37ab001b80c09ea9ffb56d4c3df338e33a7a))
90+
91+
## [1.13.0-beta.8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.7...v1.13.0-beta.8) (2024-08-09)
92+
93+
94+
### Bug Fixes
95+
96+
* broken node ([1272273](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/127227349915deeb0dede34aa575ad269ed7cbe3))
97+
6898

6999
## [1.13.0-beta.7](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.13.0-beta.6...v1.13.0-beta.7) (2024-08-09)
70100

examples/local_models/script_generator_ollama.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,11 @@
99

1010
graph_config = {
1111
"llm": {
12-
"model": "ollama/mistral",
13-
"temperature": 0,
12+
"model": "ollama/llama3.1",
13+
"temperature": 0.5,
1414
# "model_tokens": 2000, # set context length arbitrarily,
1515
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
1616
},
17-
"embeddings": {
18-
"model": "ollama/nomic-embed-text",
19-
"temperature": 0,
20-
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
21-
},
2217
"library": "beautifoulsoup",
2318
"verbose": True,
2419
}

examples/local_models/smart_scraper_ollama.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"format": "json", # Ollama needs the format to be specified explicitly
1515
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
1616
},
17-
1817
"verbose": True,
1918
"headless": False
2019
}

pyproject.toml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
name = "scrapegraphai"
33

44

5-
version = "1.13.3"
5+
version = "1.14.0b1"
66

77

88
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
@@ -23,6 +23,8 @@ dependencies = [
2323
"langchain-groq>=0.1.3",
2424
"langchain-aws>=0.1.3",
2525
"langchain-anthropic>=0.1.11",
26+
"langchain-mistralai>=0.1.12",
27+
"langchain-huggingface>=0.0.3",
2628
"langchain-nvidia-ai-endpoints>=0.1.6",
2729
"html2text>=2024.2.26",
2830
"faiss-cpu>=1.8.0",
@@ -38,11 +40,7 @@ dependencies = [
3840
"google>=3.0.0",
3941
"undetected-playwright>=0.3.0",
4042
"semchunk>=1.0.1",
41-
"langchain-fireworks>=0.1.3",
42-
"langchain-community>=0.2.9",
43-
"langchain-huggingface>=0.0.3",
4443
"browserbase>=0.3.0",
45-
"langchain-mistralai>=0.1.12",
4644
]
4745

4846
license = "MIT"

requirements-dev.lock

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# features: []
77
# all-features: false
88
# with-sources: false
9+
# generate-hashes: false
10+
# universal: false
911

1012
-e file:.
1113
aiofiles==24.1.0
@@ -110,6 +112,7 @@ filelock==3.15.4
110112
# via huggingface-hub
111113
# via torch
112114
# via transformers
115+
# via triton
113116
fireworks-ai==0.14.0
114117
# via langchain-fireworks
115118
fonttools==4.53.1
@@ -185,6 +188,7 @@ graphviz==0.20.3
185188
# via scrapegraphai
186189
greenlet==3.0.3
187190
# via playwright
191+
# via sqlalchemy
188192
groq==0.9.0
189193
# via langchain-groq
190194
grpc-google-iam-v1==0.13.1
@@ -358,6 +362,34 @@ numpy==1.26.4
358362
# via shapely
359363
# via streamlit
360364
# via transformers
365+
nvidia-cublas-cu12==12.1.3.1
366+
# via nvidia-cudnn-cu12
367+
# via nvidia-cusolver-cu12
368+
# via torch
369+
nvidia-cuda-cupti-cu12==12.1.105
370+
# via torch
371+
nvidia-cuda-nvrtc-cu12==12.1.105
372+
# via torch
373+
nvidia-cuda-runtime-cu12==12.1.105
374+
# via torch
375+
nvidia-cudnn-cu12==8.9.2.26
376+
# via torch
377+
nvidia-cufft-cu12==11.0.2.54
378+
# via torch
379+
nvidia-curand-cu12==10.3.2.106
380+
# via torch
381+
nvidia-cusolver-cu12==11.4.5.107
382+
# via torch
383+
nvidia-cusparse-cu12==12.1.0.106
384+
# via nvidia-cusolver-cu12
385+
# via torch
386+
nvidia-nccl-cu12==2.19.3
387+
# via torch
388+
nvidia-nvjitlink-cu12==12.6.20
389+
# via nvidia-cusolver-cu12
390+
# via nvidia-cusparse-cu12
391+
nvidia-nvtx-cu12==12.1.105
392+
# via torch
361393
openai==1.37.0
362394
# via burr
363395
# via langchain-fireworks
@@ -599,6 +631,8 @@ tqdm==4.66.4
599631
transformers==4.43.3
600632
# via langchain-huggingface
601633
# via sentence-transformers
634+
triton==2.2.0
635+
# via torch
602636
typer==0.12.3
603637
# via fastapi-cli
604638
typing-extensions==4.12.2
@@ -642,6 +676,8 @@ uvicorn==0.30.3
642676
# via fastapi
643677
uvloop==0.19.0
644678
# via uvicorn
679+
watchdog==4.0.2
680+
# via streamlit
645681
watchfiles==0.22.0
646682
# via uvicorn
647683
websockets==12.0

requirements.lock

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# features: []
77
# all-features: false
88
# with-sources: false
9+
# generate-hashes: false
10+
# universal: false
911

1012
-e file:.
1113
aiohttp==3.9.5
@@ -67,6 +69,7 @@ filelock==3.15.4
6769
# via huggingface-hub
6870
# via torch
6971
# via transformers
72+
# via triton
7073
fireworks-ai==0.14.0
7174
# via langchain-fireworks
7275
free-proxy==1.1.1
@@ -133,6 +136,7 @@ graphviz==0.20.3
133136
# via scrapegraphai
134137
greenlet==3.0.3
135138
# via playwright
139+
# via sqlalchemy
136140
groq==0.9.0
137141
# via langchain-groq
138142
grpc-google-iam-v1==0.13.1
@@ -263,6 +267,34 @@ numpy==1.26.4
263267
# via sentence-transformers
264268
# via shapely
265269
# via transformers
270+
nvidia-cublas-cu12==12.1.3.1
271+
# via nvidia-cudnn-cu12
272+
# via nvidia-cusolver-cu12
273+
# via torch
274+
nvidia-cuda-cupti-cu12==12.1.105
275+
# via torch
276+
nvidia-cuda-nvrtc-cu12==12.1.105
277+
# via torch
278+
nvidia-cuda-runtime-cu12==12.1.105
279+
# via torch
280+
nvidia-cudnn-cu12==8.9.2.26
281+
# via torch
282+
nvidia-cufft-cu12==11.0.2.54
283+
# via torch
284+
nvidia-curand-cu12==10.3.2.106
285+
# via torch
286+
nvidia-cusolver-cu12==11.4.5.107
287+
# via torch
288+
nvidia-cusparse-cu12==12.1.0.106
289+
# via nvidia-cusolver-cu12
290+
# via torch
291+
nvidia-nccl-cu12==2.19.3
292+
# via torch
293+
nvidia-nvjitlink-cu12==12.6.20
294+
# via nvidia-cusolver-cu12
295+
# via nvidia-cusparse-cu12
296+
nvidia-nvtx-cu12==12.1.105
297+
# via torch
266298
openai==1.37.0
267299
# via langchain-fireworks
268300
# via langchain-openai
@@ -414,6 +446,8 @@ tqdm==4.66.4
414446
transformers==4.43.3
415447
# via langchain-huggingface
416448
# via sentence-transformers
449+
triton==2.2.0
450+
# via torch
417451
typing-extensions==4.12.2
418452
# via anthropic
419453
# via anyio

scrapegraphai/graphs/abstract_graph.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,16 @@
77
import uuid
88
import warnings
99
from pydantic import BaseModel
10-
1110
from langchain_community.chat_models import ErnieBotChat
1211
from langchain_nvidia_ai_endpoints import ChatNVIDIA
1312
from langchain.chat_models import init_chat_model
14-
1513
from ..helpers import models_tokens
1614
from ..models import (
1715
OneApi,
1816
DeepSeek
1917
)
2018
from ..utils.logging import set_verbosity_warning, set_verbosity_info
2119

22-
23-
2420
class AbstractGraph(ABC):
2521
"""
2622
Scaffolding class for creating a graph representation and executing it.
@@ -53,6 +49,9 @@ class AbstractGraph(ABC):
5349
def __init__(self, prompt: str, config: dict,
5450
source: Optional[str] = None, schema: Optional[BaseModel] = None):
5551

52+
if config.get("llm").get("temperature") is None:
53+
config["llm"]["temperature"] = 0
54+
5655
self.prompt = prompt
5756
self.source = source
5857
self.config = config
@@ -163,7 +162,6 @@ def handle_model(model_name, provider, token_key, default_token=8192):
163162

164163
elif llm_params["model"].startswith("vertexai"):
165164
return handle_model(llm_params["model"], "google_vertexai", llm_params["model"])
166-
167165
elif "gpt-" in llm_params["model"]:
168166
return handle_model(llm_params["model"], "openai", llm_params["model"])
169167

@@ -197,6 +195,7 @@ def handle_model(model_name, provider, token_key, default_token=8192):
197195
return ErnieBotChat(llm_params)
198196

199197
elif "oneapi" in llm_params["model"]:
198+
200199
# take the model after the last dash
201200
llm_params["model"] = llm_params["model"].split("/")[-1]
202201
try:
@@ -206,6 +205,7 @@ def handle_model(model_name, provider, token_key, default_token=8192):
206205
return OneApi(llm_params)
207206

208207
elif "nvidia" in llm_params["model"]:
208+
209209
try:
210210
self.model_token = models_tokens["nvidia"][llm_params["model"].split("/")[-1]]
211211
llm_params["model"] = "/".join(llm_params["model"].split("/")[1:])

scrapegraphai/graphs/base_graph.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
from typing import Tuple
77
from langchain_community.callbacks import get_openai_callback
88
from ..integrations import BurrBridge
9-
10-
# Import telemetry functions
11-
from ..telemetry import log_graph_execution, log_event
9+
from ..telemetry import log_graph_execution
1210

1311
class BaseGraph:
1412
"""

scrapegraphai/graphs/csv_scraper_graph.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@
44

55
from typing import Optional
66
from pydantic import BaseModel
7-
87
from .base_graph import BaseGraph
98
from .abstract_graph import AbstractGraph
10-
119
from ..nodes import (
1210
FetchNode,
1311
GenerateAnswerCSVNode
1412
)
1513

16-
1714
class CSVScraperGraph(AbstractGraph):
1815
"""
1916
SmartScraper is a comprehensive web scraping tool that automates the process of extracting

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44

55
from copy import copy, deepcopy
66
from typing import List, Optional
7-
87
from pydantic import BaseModel
9-
108
from .base_graph import BaseGraph
119
from .abstract_graph import AbstractGraph
1210
from .csv_scraper_graph import CSVScraperGraph
13-
1411
from ..nodes import (
1512
GraphIteratorNode,
1613
MergeAnswersNode
1714
)
1815

19-
2016
class CSVScraperMultiGraph(AbstractGraph):
2117
"""
22-
CSVScraperMultiGraph is a scraping pipeline that scrapes a list of URLs and generates answers to a given prompt.
18+
CSVScraperMultiGraph is a scraping pipeline that
19+
scrapes a list of URLs and generates answers to a given prompt.
2320
It only requires a user prompt and a list of URLs.
2421
2522
Attributes:
@@ -44,7 +41,8 @@ class CSVScraperMultiGraph(AbstractGraph):
4441
>>> result = search_graph.run()
4542
"""
4643

47-
def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None):
44+
def __init__(self, prompt: str, source: List[str],
45+
config: dict, schema: Optional[BaseModel] = None):
4846

4947
self.max_results = config.get("max_results", 3)
5048

0 commit comments

Comments
 (0)