Skip to content

Commit 4f53b09

Browse files
committed
add examples for schema
1 parent 450fde6 commit 4f53b09

18 files changed

+579
-145
lines changed
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Example of Search Graph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
load_dotenv()
8+
9+
from scrapegraphai.graphs import SearchGraph
10+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
11+
12+
from pydantic import BaseModel, Field
13+
from typing import List
14+
15+
# ************************************************
16+
# Define the output schema for the graph
17+
# ************************************************
18+
19+
class Dish(BaseModel):
20+
name: str = Field(description="The name of the dish")
21+
description: str = Field(description="The description of the dish")
22+
23+
class Dishes(BaseModel):
24+
dishes: List[Dish]
25+
26+
# ************************************************
27+
# Define the configuration for the graph
28+
# ************************************************
29+
graph_config = {
30+
"llm": {
31+
"api_key": os.getenv("ANTHROPIC_API_KEY"),
32+
"model": "claude-3-haiku-20240307",
33+
"max_tokens": 4000},
34+
}
35+
36+
# ************************************************
37+
# Create the SearchGraph instance and run it
38+
# ************************************************
39+
40+
search_graph = SearchGraph(
41+
prompt="List me Chioggia's famous dishes",
42+
config=graph_config,
43+
schema=Dishes
44+
)
45+
46+
result = search_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = search_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))
55+
56+
# Save to json and csv
57+
convert_to_csv(result, "result")
58+
convert_to_json(result, "result")

examples/anthropic/smart_scraper_schema_haiku.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
"""
44

55
import os
6+
from typing import List
7+
from pydantic import BaseModel, Field
68
from dotenv import load_dotenv
79
from scrapegraphai.graphs import SmartScraperGraph
810
from scrapegraphai.utils import prettify_exec_info
@@ -17,22 +19,12 @@
1719
# Define the output schema for the graph
1820
# ************************************************
1921

20-
schema= """
21-
{
22-
"Projects": [
23-
"Project #":
24-
{
25-
"title": "...",
26-
"description": "...",
27-
},
28-
"Project #":
29-
{
30-
"title": "...",
31-
"description": "...",
32-
}
33-
]
34-
}
35-
"""
22+
class Project(BaseModel):
23+
title: str = Field(description="The title of the project")
24+
description: str = Field(description="The description of the project")
25+
26+
class Projects(BaseModel):
27+
projects: List[Project]
3628

3729
# ************************************************
3830
# Create the SmartScraperGraph instance and run it
@@ -48,7 +40,7 @@
4840
smart_scraper_graph = SmartScraperGraph(
4941
prompt="List me all the projects with their description",
5042
# also accepts a string with the already downloaded HTML code
51-
schema=schema,
43+
schema=Projects,
5244
source="https://perinim.github.io/projects/",
5345
config=graph_config
5446
)
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
"""
2+
Example of Search Graph
3+
"""
4+
5+
import os
6+
from dotenv import load_dotenv
7+
load_dotenv()
8+
9+
from scrapegraphai.graphs import SearchGraph
10+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
11+
12+
from pydantic import BaseModel, Field
13+
from typing import List
14+
from langchain_openai import AzureChatOpenAI
15+
from langchain_openai import AzureOpenAIEmbeddings
16+
17+
# ************************************************
18+
# Define the output schema for the graph
19+
# ************************************************
20+
21+
class Dish(BaseModel):
22+
name: str = Field(description="The name of the dish")
23+
description: str = Field(description="The description of the dish")
24+
25+
class Dishes(BaseModel):
26+
dishes: List[Dish]
27+
28+
# ************************************************
29+
# Define the configuration for the graph
30+
# ************************************************
31+
32+
33+
llm_model_instance = AzureChatOpenAI(
34+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
35+
azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT_NAME"]
36+
)
37+
38+
embedder_model_instance = AzureOpenAIEmbeddings(
39+
azure_deployment=os.environ["AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT_NAME"],
40+
openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
41+
)
42+
43+
# ************************************************
44+
# Create the SmartScraperGraph instance and run it
45+
# ************************************************
46+
47+
graph_config = {
48+
"llm": {"model_instance": llm_model_instance},
49+
"embeddings": {"model_instance": embedder_model_instance}
50+
}
51+
52+
# ************************************************
53+
# Create the SearchGraph instance and run it
54+
# ************************************************
55+
56+
search_graph = SearchGraph(
57+
prompt="List me Chioggia's famous dishes",
58+
config=graph_config,
59+
schema=Dishes
60+
)
61+
62+
result = search_graph.run()
63+
print(result)
64+
65+
# ************************************************
66+
# Get graph execution info
67+
# ************************************************
68+
69+
graph_exec_info = search_graph.get_execution_info()
70+
print(prettify_exec_info(graph_exec_info))
71+
72+
# Save to json and csv
73+
convert_to_csv(result, "result")
74+
convert_to_json(result, "result")

examples/azure/smart_scraper_schema_azure.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
"""
44

55
import os, json
6+
from typing import List
7+
from pydantic import BaseModel, Field
68
from dotenv import load_dotenv
79
from langchain_openai import AzureChatOpenAI
810
from langchain_openai import AzureOpenAIEmbeddings
@@ -14,22 +16,12 @@
1416
# Define the output schema for the graph
1517
# ************************************************
1618

17-
schema= """
18-
{
19-
"Projects": [
20-
"Project #":
21-
{
22-
"title": "...",
23-
"description": "...",
24-
},
25-
"Project #":
26-
{
27-
"title": "...",
28-
"description": "...",
29-
}
30-
]
31-
}
32-
"""
19+
class Project(BaseModel):
20+
title: str = Field(description="The title of the project")
21+
description: str = Field(description="The description of the project")
22+
23+
class Projects(BaseModel):
24+
projects: List[Project]
3325

3426
# ************************************************
3527
# Initialize the model instances
@@ -60,7 +52,7 @@
6052
smart_scraper_graph = SmartScraperGraph(
6153
prompt="List me all the projects with their description",
6254
source="https://perinim.github.io/projects/",
63-
schema=schema,
55+
schema=Projects,
6456
config=graph_config
6557
)
6658

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
"""
2+
Example of Search Graph
3+
"""
4+
from scrapegraphai.graphs import SearchGraph
5+
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
6+
7+
from pydantic import BaseModel, Field
8+
from typing import List
9+
10+
# ************************************************
11+
# Define the output schema for the graph
12+
# ************************************************
13+
14+
class Dish(BaseModel):
15+
name: str = Field(description="The name of the dish")
16+
description: str = Field(description="The description of the dish")
17+
18+
class Dishes(BaseModel):
19+
dishes: List[Dish]
20+
21+
# ************************************************
22+
# Define the configuration for the graph
23+
# ************************************************
24+
25+
graph_config = {
26+
"llm": {
27+
"client": "client_name",
28+
"model": "bedrock/anthropic.claude-3-sonnet-20240229-v1:0",
29+
"temperature": 0.0
30+
},
31+
"embeddings": {
32+
"model": "bedrock/cohere.embed-multilingual-v3"
33+
}
34+
}
35+
36+
# ************************************************
37+
# Create the SearchGraph instance and run it
38+
# ************************************************
39+
40+
search_graph = SearchGraph(
41+
prompt="List me Chioggia's famous dishes",
42+
config=graph_config,
43+
schema=Dishes
44+
)
45+
46+
result = search_graph.run()
47+
print(result)
48+
49+
# ************************************************
50+
# Get graph execution info
51+
# ************************************************
52+
53+
graph_exec_info = search_graph.get_execution_info()
54+
print(prettify_exec_info(graph_exec_info))
55+
56+
# Save to json and csv
57+
convert_to_csv(result, "result")
58+
convert_to_json(result, "result")

examples/bedrock/smart_scraper_schema_bedrock.py

Lines changed: 9 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,21 @@
11
"""
22
Basic example of scraping pipeline using SmartScraper
33
"""
4-
5-
import os
6-
from dotenv import load_dotenv
4+
from typing import List
5+
from pydantic import BaseModel, Field
76
from scrapegraphai.graphs import SmartScraperGraph
87
from scrapegraphai.utils import prettify_exec_info
98

10-
load_dotenv()
119
# ************************************************
1210
# Define the output schema for the graph
1311
# ************************************************
1412

15-
schema= """
16-
{
17-
"Projects": [
18-
"Project #":
19-
{
20-
"title": "...",
21-
"description": "...",
22-
},
23-
"Project #":
24-
{
25-
"title": "...",
26-
"description": "...",
27-
}
28-
]
29-
}
30-
"""
13+
class Project(BaseModel):
14+
title: str = Field(description="The title of the project")
15+
description: str = Field(description="The description of the project")
16+
17+
class Projects(BaseModel):
18+
projects: List[Project]
3119

3220
# ************************************************
3321
# Define the configuration for the graph
@@ -52,7 +40,7 @@
5240
prompt="List me all the projects with their description",
5341
# also accepts a string with the already downloaded HTML code
5442
source="https://perinim.github.io/projects/",
55-
schema=schema,
43+
schema=Projects,
5644
config=graph_config
5745
)
5846

0 commit comments

Comments
 (0)