Skip to content

Commit 8cb9646

Browse files
committed
Merge branch 'main' into pre/beta
2 parents 9266a36 + 58b1133 commit 8cb9646

24 files changed

+75
-19
lines changed

.github/FUNDING.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# These are supported funding model platforms
2+
3+
github: ScrapeGraphAI
4+
patreon: # Replace with a single Patreon username
5+
open_collective:
6+
ko_fi: # Replace with a single Ko-fi username
7+
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8+
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9+
liberapay: # Replace with a single Liberapay username
10+
issuehunt: # Replace with a single IssueHunt username
11+
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
12+
polar: # Replace with a single Polar username
13+
buy_me_a_coffee: # Replace with a single Buy Me a Coffee username
14+
thanks_dev: # Replace with a single thanks.dev username
15+
custom:

CHANGELOG.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@
55

66
* add conditional node structure to the smart_scraper_graph and implemented a structured way to check condition ([cacd9cd](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/cacd9cde004dace1a7dcc27981245632a78b95f3))
77

8-
## [1.26.6-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6-beta.1) (2024-10-14)
98

9+
## [1.26.6](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6) (2024-10-18)
10+
11+
## [1.26.6-beta.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.5...v1.26.6-beta.1) (2024-10-14)
1012

1113
### Bug Fixes
1214

1315
* remove variable "max_result" not being used in the code ([e76a68a](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/e76a68a782e5bce48d421cb620d0b7bffa412918))
1416

17+
* refactoring of gpt2 tokenizer ([44c3f9c](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/44c3f9c98939c44caa86dc582242819a7c6a0f80))
18+
>>>>>>> main
19+
1520
## [1.26.5](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.26.4...v1.26.5) (2024-10-13)
1621

1722

examples/extras/.env.example

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1-
OPENAI_API_KEY="OPENAI_API_KEY"
2-
BROWSER_BASE_PROJECT_ID="BROWSER_BASE_PROJECT_ID"
3-
BROWSER_BASE_API_KEY="BROWSERBASE_API_KEY"
1+
OPENAI_API_KEY="YOUR_OPENAI_API_KEY"
2+
BROWSER_BASE_PROJECT_ID="YOUR_BROWSER_BASE_PROJECT_ID"
3+
BROWSER_BASE_API_KEY="YOUR_BROWSERBASE_API_KEY"
4+
SCRAPE_DO_API_KEY="YOUR_SCRAPE_DO_API_KEY"

examples/openai/smart_scraper_openai.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@
2828
# ************************************************
2929

3030
smart_scraper_graph = SmartScraperGraph(
31-
prompt="List me what does the company do, the name and a contact email.",
32-
source="https://scrapegraphai.com/",
31+
prompt="Extract me all the articles",
32+
source="https://www.wired.com",
3333
config=graph_config
3434
)
3535

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ name = "scrapegraphai"
33

44
version = "1.27.0b1"
55

6+
67
description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
78
authors = [
89
{ name = "Marco Vinciguerra", email = "[email protected]" },

scrapegraphai/graphs/abstract_graph.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def _create_llm(self, llm_config: dict) -> object:
128128
if requests_per_second is not None:
129129
with warnings.catch_warnings():
130130
warnings.simplefilter("ignore")
131-
llm_params["rate_limiter"] = InMemoryRateLimiter(requests_per_second=requests_per_second)
131+
llm_params["rate_limiter"] = InMemoryRateLimiter(
132+
requests_per_second=requests_per_second)
132133
if max_retries is not None:
133134
llm_params["max_retries"] = max_retries
134135

scrapegraphai/graphs/base_graph.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def __init__(self, nodes: list, edges: list, entry_point: str,
5959
# raise a warning if the entry point is not the first node in the list
6060
warnings.warn(
6161
"Careful! The entry point node is different from the first node in the graph.")
62-
62+
6363
self._set_conditional_node_edges()
6464

6565
# Burr configuration
@@ -89,11 +89,9 @@ def _set_conditional_node_edges(self):
8989
"""
9090
for node in self.nodes:
9191
if node.node_type == 'conditional_node':
92-
# Find outgoing edges from this ConditionalNode
9392
outgoing_edges = [(from_node, to_node) for from_node, to_node in self.raw_edges if from_node.node_name == node.node_name]
9493
if len(outgoing_edges) != 2:
9594
raise ValueError(f"ConditionalNode '{node.node_name}' must have exactly two outgoing edges.")
96-
# Assign true_node_name and false_node_name
9795
node.true_node_name = outgoing_edges[0][1].node_name
9896
try:
9997
node.false_node_name = outgoing_edges[1][1].node_name

scrapegraphai/graphs/code_generator_graph.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ def _create_graph(self) -> BaseGraph:
9999
"schema": self.schema,
100100
}
101101
)
102+
102103
prompt_refier_node = PromptRefinerNode(
103104
input="user_prompt",
104105
output=["refined_prompt"],
@@ -108,6 +109,7 @@ def _create_graph(self) -> BaseGraph:
108109
"schema": self.schema
109110
}
110111
)
112+
111113
html_analyzer_node = HtmlAnalyzerNode(
112114
input="refined_prompt & original_html",
113115
output=["html_info", "reduced_html"],
@@ -118,6 +120,7 @@ def _create_graph(self) -> BaseGraph:
118120
"reduction": self.config.get("reduction", 0)
119121
}
120122
)
123+
121124
generate_code_node = GenerateCodeNode(
122125
input="user_prompt & refined_prompt & html_info & reduced_html & answer",
123126
output=["generated_code"],

scrapegraphai/graphs/csv_scraper_graph.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def _create_graph(self):
5959
"""
6060
Creates the graph of nodes representing the workflow for web scraping.
6161
"""
62+
6263
fetch_node = FetchNode(
6364
input="csv | csv_dir",
6465
output=["doc"],
@@ -90,6 +91,7 @@ def run(self) -> str:
9091
"""
9192
Executes the web scraping process and returns the answer to the prompt.
9293
"""
94+
9395
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
9496
self.final_state, self.execution_info = self.graph.execute(inputs)
9597

scrapegraphai/graphs/csv_scraper_multi_graph.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def run(self) -> str:
9494
Returns:
9595
str: The answer to the prompt.
9696
"""
97+
9798
inputs = {"user_prompt": self.prompt, "jsons": self.source}
9899
self.final_state, self.execution_info = self.graph.execute(inputs)
99100

0 commit comments

Comments
 (0)