Skip to content

Commit e6f18bc

Browse files
committed
add examples and tests for sdk
1 parent ecbc955 commit e6f18bc

File tree

7 files changed

+140
-8
lines changed

7 files changed

+140
-8
lines changed

examples/.env.examples

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SCRAPEGRAPH_API_KEY="YOUR_SCRAPEGRAPH_API_KEY_HERE"

examples/scrape_example.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import os
2+
from dotenv import load_dotenv
3+
from scrapegraphaiapisdk.scrape import scrape
4+
from pydantic import BaseModel
5+
from typing import List
6+
7+
# Load environment variables from .env file
8+
load_dotenv()
9+
10+
class Product(BaseModel):
11+
name: str
12+
price: float
13+
description: str
14+
15+
class ProductList(BaseModel):
16+
products: List[Product]
17+
18+
def main():
19+
# Get API key from environment variables
20+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
21+
22+
# URL to scrape
23+
url = "https://example.com/products"
24+
25+
# Natural language prompt
26+
prompt = "Extract all products from this page including their names, prices, and descriptions"
27+
28+
# Create schema
29+
schema = ProductList
30+
31+
# Make the request
32+
try:
33+
result = scrape(api_key, url, prompt, schema)
34+
print(f"Scraped data: {result}")
35+
except Exception as e:
36+
print(f"Error occurred: {e}")
37+
38+
if __name__ == "__main__":
39+
main()

examples/status_example.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import os
2+
from dotenv import load_dotenv
3+
from scrapegraphaiapisdk.status import status
4+
5+
# Load environment variables from .env file
6+
load_dotenv()
7+
8+
def main():
9+
# Get API key from environment variables
10+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
11+
12+
# Check API status
13+
try:
14+
result = status(api_key)
15+
print(f"API Status: {result}")
16+
except Exception as e:
17+
print(f"Error occurred: {e}")
18+
19+
if __name__ == "__main__":
20+
main()

scrapegraphaiapisdk/scrape.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,24 @@
11
"""
22
This module provides a function to scrape and extract structured data from a webpage
33
using the ScrapeGraph AI API. It allows specifying a schema for the output structure
4-
using either a dictionary or a Pydantic model.
4+
using a Pydantic model.
55
"""
66

7-
from typing import Union
87
from pydantic import BaseModel
98
import requests
109

11-
def scrape(api_key: str, url: str, prompt: str, schema: Union[dict, BaseModel, None] = None) -> str:
10+
def scrape(api_key: str, url: str, prompt: str, schema: BaseModel) -> str:
1211
"""Scrape and extract structured data from a webpage using ScrapeGraph AI.
1312
1413
Args:
1514
api_key (str): Your ScrapeGraph AI API key
1615
url (str): The URL of the webpage to scrape
1716
prompt (str): Natural language prompt describing what data to extract
18-
schema (Union[dict, BaseModel, None], optional): Schema definition for the output structure.
19-
Can be either a dictionary or a Pydantic model. If None, returns raw JSON.
17+
schema (BaseModel): Pydantic model defining the output structure.
18+
The model will be converted to JSON schema before making the request.
2019
2120
Returns:
22-
str: Extracted data in JSON format matching the provided schema (if specified)
21+
str: Extracted data in JSON format matching the provided schema
2322
"""
2423
endpoint = "https://api.scrapegraph.ai/v1/scrape"
2524
headers = {
@@ -30,10 +29,10 @@ def scrape(api_key: str, url: str, prompt: str, schema: Union[dict, BaseModel, N
3029
payload = {
3130
"url": url,
3231
"prompt": prompt,
33-
"schema": schema.dict() if isinstance(schema, BaseModel) else schema if schema is not None else None
32+
"schema": schema.model_json_schema()
3433
}
3534

3635
response = requests.post(endpoint, headers=headers, json=payload)
37-
response.raise_for_status() # Raise an exception for bad status codes
36+
response.raise_for_status()
3837

3938
return response.text

tests/.env.examples

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
SCRAPEGRAPH_API_KEY="YOUR_SCRAPEGRAPH_API_KEY_HERE"

tests/test_scrape.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import pytest
2+
import os
3+
from dotenv import load_dotenv
4+
from pydantic import BaseModel
5+
from typing import List
6+
from scrapegraphaiapisdk.scrape import scrape
7+
8+
# Load environment variables from .env file
9+
load_dotenv()
10+
11+
class Product(BaseModel):
12+
name: str
13+
price: float
14+
description: str
15+
16+
class ProductList(BaseModel):
17+
products: List[Product]
18+
19+
def test_scrape_successful():
20+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
21+
url = "https://example.com/products"
22+
prompt = "Extract all products"
23+
schema = ProductList
24+
25+
# Mock the response
26+
with pytest.mock.patch('requests.post') as mock_post:
27+
mock_post.return_value.text = '{"products": [{"name": "Test Product", "price": 99.99, "description": "Test Description"}]}'
28+
mock_post.return_value.raise_for_status.return_value = None
29+
30+
result = scrape(api_key, url, prompt, schema)
31+
assert isinstance(result, str)
32+
assert "Test Product" in result
33+
34+
def test_scrape_invalid_api_key():
35+
api_key = "invalid_key"
36+
url = "https://example.com/products"
37+
prompt = "Extract all products"
38+
schema = ProductList
39+
40+
with pytest.mock.patch('requests.post') as mock_post:
41+
mock_post.side_effect = Exception("Invalid API key")
42+
43+
with pytest.raises(Exception):
44+
scrape(api_key, url, prompt, schema)

tests/test_status.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import pytest
2+
import os
3+
from dotenv import load_dotenv
4+
from scrapegraphaiapisdk.status import status
5+
6+
# Load environment variables from .env file
7+
load_dotenv()
8+
9+
def test_status_successful():
10+
api_key = os.getenv("SCRAPEGRAPH_API_KEY")
11+
12+
# Mock the response
13+
with pytest.mock.patch('requests.get') as mock_get:
14+
mock_get.return_value.text = '{"status": "ok"}'
15+
mock_get.return_value.raise_for_status.return_value = None
16+
17+
result = status(api_key)
18+
assert isinstance(result, str)
19+
assert "ok" in result
20+
21+
def test_status_invalid_api_key():
22+
api_key = "invalid_key"
23+
24+
with pytest.mock.patch('requests.get') as mock_get:
25+
mock_get.side_effect = Exception("Invalid API key")
26+
27+
with pytest.raises(Exception):
28+
status(api_key)

0 commit comments

Comments
 (0)