Skip to content

Commit ecbc955

Browse files
committed
add req + basic integration
1 parent 51563a1 commit ecbc955

File tree

3 files changed

+41
-3
lines changed

3 files changed

+41
-3
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ authors = [
99
]
1010

1111
dependencies = [
12+
"requests>=2.32.3",
13+
"pydantic>=2.9.2"
1214
]
1315

1416
license = "MIT"

scrapegraphaiapisdk/scrape.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,39 @@
1+
"""
2+
This module provides a function to scrape and extract structured data from a webpage
3+
using the ScrapeGraph AI API. It allows specifying a schema for the output structure
4+
using either a dictionary or a Pydantic model.
5+
"""
16

2-
def scrape(url: str) -> str:
3-
pass
7+
from typing import Union
8+
from pydantic import BaseModel
9+
import requests
10+
11+
def scrape(api_key: str, url: str, prompt: str, schema: Union[dict, BaseModel, None] = None) -> str:
12+
"""Scrape and extract structured data from a webpage using ScrapeGraph AI.
13+
14+
Args:
15+
api_key (str): Your ScrapeGraph AI API key
16+
url (str): The URL of the webpage to scrape
17+
prompt (str): Natural language prompt describing what data to extract
18+
schema (Union[dict, BaseModel, None], optional): Schema definition for the output structure.
19+
Can be either a dictionary or a Pydantic model. If None, returns raw JSON.
20+
21+
Returns:
22+
str: Extracted data in JSON format matching the provided schema (if specified)
23+
"""
24+
endpoint = "https://api.scrapegraph.ai/v1/scrape"
25+
headers = {
26+
"Authorization": f"Bearer {api_key}",
27+
"Content-Type": "application/json"
28+
}
29+
30+
payload = {
31+
"url": url,
32+
"prompt": prompt,
33+
"schema": schema.dict() if isinstance(schema, BaseModel) else schema if schema is not None else None
34+
}
35+
36+
response = requests.post(endpoint, headers=headers, json=payload)
37+
response.raise_for_status() # Raise an exception for bad status codes
38+
39+
return response.text

scrapegraphaiapisdk/status.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11

2-
def status() -> str:
2+
def status(api_key: str) -> str:
33
pass

0 commit comments

Comments
 (0)