Skip to content

Commit d486a7a

Browse files
committed
feat: add docstring
1 parent cad027f commit d486a7a

File tree

6 files changed

+125
-41
lines changed

6 files changed

+125
-41
lines changed
Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,65 @@
1+
"""
2+
Module for ScrapeGraph Client
3+
4+
This module contains the ScrapeGraphClient class, which provides methods to interact
5+
with the ScrapeGraph AI API. It allows users to initialize the client with an API key,
6+
retrieve necessary headers for API requests, and construct full endpoint URLs for
7+
making requests to the ScrapeGraph API. This facilitates seamless integration with
8+
ScrapeGraph AI services.
9+
"""
110

211
class ScrapeGraphClient:
3-
"""Client for interacting with the ScrapeGraph AI API."""
4-
12+
"""Client for interacting with the ScrapeGraph AI API.
13+
14+
This class provides methods to initialize the client with an API key and base URL,
15+
retrieve headers for API requests, and construct full endpoint URLs for making
16+
requests to the ScrapeGraph API. It is designed to facilitate seamless interaction
17+
with the ScrapeGraph AI services.
18+
19+
Attributes:
20+
api_key (str): Your ScrapeGraph AI API key.
21+
base_url (str): Base URL for the API, defaulting to "https://api.scrapegraphai.com/v1".
22+
"""
23+
524
def __init__(self, api_key: str, base_url: str = "https://api.scrapegraphai.com/v1"):
625
"""Initialize the ScrapeGraph client.
726
827
Args:
9-
api_key (str): Your ScrapeGraph AI API key
10-
base_url (str): Base URL for the API (optional)
28+
api_key (str): Your ScrapeGraph AI API key.
29+
base_url (str): Base URL for the API (optional, defaults
30+
to "https://api.scrapegraphai.com/v1").
1131
"""
1232
self.api_key = api_key
1333
self.base_url = base_url.rstrip('/')
14-
34+
1535
def get_headers(self, include_content_type: bool = True) -> dict:
1636
"""Get the headers for API requests.
1737
1838
Args:
19-
include_content_type (bool): Whether to include Content-Type header
39+
include_content_type (bool): Whether to include the Content-Type header
40+
(default is True).
2041
2142
Returns:
22-
dict: Headers for the API request
43+
dict: A dictionary containing the headers for the API request, including
44+
the API key and optionally the Content-Type.
2345
"""
2446
headers = {
2547
"accept": "application/json",
26-
"SGAI-API-KEY": self.api_key
48+
"SGAI-APIKEY": self.api_key
2749
}
28-
50+
2951
if include_content_type:
3052
headers["Content-Type"] = "application/json"
31-
53+
3254
return headers
33-
55+
3456
def get_endpoint(self, path: str) -> str:
3557
"""Get the full endpoint URL.
3658
3759
Args:
38-
path (str): API endpoint path
60+
path (str): The API endpoint path to be appended to the base URL.
3961
4062
Returns:
41-
str: Full endpoint URL
63+
str: The full endpoint URL constructed from the base URL and the provided path.
4264
"""
43-
return f"{self.base_url}/api/v1/{path}"
65+
return f"{self.base_url}/{path}"

scrapegraph-py/scrapegraph_py/credits.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
"""
22
This module provides functionality to interact with the ScrapeGraph AI API.
33
4-
It includes functions to retrieve credits and send feedback, handling responses and errors appropriately.
4+
It includes functions to retrieve credits and send feedback,
5+
handling responses and errors appropriately.
56
"""
67

78
import requests
8-
import json
99
from .client import ScrapeGraphClient
10-
from .exceptions import raise_for_status_code
10+
from .exceptions import raise_for_status_code, APIError
1111

1212
def credits(client: ScrapeGraphClient) -> str:
1313
"""Retrieve credits from the API.

scrapegraph-py/scrapegraph_py/exceptions.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,13 @@
1+
"""
2+
Module for ScrapeGraph Exceptions
3+
4+
This module defines custom exception classes for handling errors that may occur
5+
when interacting with the ScrapeGraph API. These exceptions provide a structured
6+
way to manage different types of errors, such as authentication issues, rate limits,
7+
bad requests, and server errors. Each exception class inherits from a base exception
8+
class, allowing for more granular error handling in client applications.
9+
"""
10+
111
class APIError(Exception):
212
"""Base class for API exceptions."""
313
def __init__(self, message=None, response=None):
@@ -19,3 +29,36 @@ class BadRequestError(APIError):
1929

2030
class InternalServerError(APIError):
2131
"""Raised when a 500 Internal Server Error occurs."""
32+
33+
class ScrapeGraphException(Exception):
34+
"""Base exception for ScrapeGraph errors"""
35+
pass
36+
37+
def raise_for_status_code(status_code: int, response_text: str = None):
38+
"""
39+
Raise appropriate exception based on HTTP status code.
40+
41+
Args:
42+
status_code (int): The HTTP status code returned from the API response.
43+
response_text (str): Optional text providing additional context for the error.
44+
45+
Raises:
46+
ScrapeGraphException: For various HTTP error statuses, including 401, 403, 404, and 500.
47+
"""
48+
if 200 <= status_code < 300:
49+
return
50+
51+
error_message = f"HTTP {status_code}"
52+
if response_text:
53+
error_message += f": {response_text}"
54+
55+
if status_code == 401:
56+
raise ScrapeGraphException("Unauthorized - Invalid API key")
57+
elif status_code == 403:
58+
raise ScrapeGraphException("Forbidden - You don't have access to this resource")
59+
elif status_code == 404:
60+
raise ScrapeGraphException("Not Found - The requested resource doesn't exist")
61+
elif status_code >= 500:
62+
raise ScrapeGraphException("Server Error - Something went wrong on our end")
63+
else:
64+
raise ScrapeGraphException(error_message)

scrapegraph-py/scrapegraph_py/feedback.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ def feedback(client: ScrapeGraphClient, request_id: str, rating: int, feedback_t
3636
}
3737

3838
try:
39-
response = requests.post(endpoint, headers=headers, json=feedback_data)
40-
raise_for_status_code(response.status_code, response)
39+
response = requests.post(endpoint, headers=headers, json=feedback_data, timeout=10)
4140
return response.text
4241
except requests.exceptions.RequestException as e:
43-
raise APIError(f"Request failed: {str(e)}", response=None)
42+
raise APIError(f"Request failed: {str(e)}", response=None) from e

scrapegraph-py/scrapegraph_py/local_scraper.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
1+
"""
2+
Module for Local Scraper
3+
4+
This module contains functions for scraping and extracting structured data from
5+
website text using the ScrapeGraph AI API. It defines the `scrape_text` function,
6+
which takes a ScrapeGraph client, website text, a user prompt, and an optional
7+
Pydantic schema to extract relevant data. The extracted data is returned in JSON
8+
format, adhering to the specified schema if provided. This module is designed to
9+
facilitate the integration of web scraping capabilities into applications using
10+
ScrapeGraph AI services.
11+
"""
12+
from typing import Optional
113
from pydantic import BaseModel
214
import requests
3-
from typing import Optional
4-
import json
515
from .client import ScrapeGraphClient
616
from .exceptions import raise_for_status_code, APIError
717

8-
def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schema: Optional[BaseModel] = None) -> str:
18+
def scrape_text(client: ScrapeGraphClient, website_text: str,
19+
prompt: str, schema: Optional[BaseModel] = None) -> str:
920
"""Scrape and extract structured data from website text using ScrapeGraph AI.
1021
1122
Args:
@@ -21,12 +32,12 @@ def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schem
2132
"""
2233
endpoint = client.get_endpoint("smartscraper")
2334
headers = client.get_headers()
24-
35+
2536
payload = {
2637
"website_text": website_text,
2738
"user_prompt": prompt
2839
}
29-
40+
3041
if schema:
3142
schema_json = schema.model_json_schema()
3243
payload["output_schema"] = {
@@ -35,9 +46,9 @@ def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schem
3546
"properties": schema_json.get("properties", {}),
3647
"required": schema_json.get("required", [])
3748
}
38-
49+
3950
try:
40-
response = requests.post(endpoint, headers=headers, json=payload)
51+
response = requests.post(endpoint, headers=headers, json=payload, timeout=10)
4152
raise_for_status_code(response.status_code, response)
4253
return response.text
4354
except requests.exceptions.RequestException as e:
Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,23 @@
1+
"""
2+
Module for Scraping Web Data with ScrapeGraph AI
3+
4+
This module provides functionality to scrape and extract structured data from
5+
webpages using the ScrapeGraph AI API. It defines the `scrape` function, which
6+
takes a ScrapeGraph client, a URL, a user prompt, and an optional Pydantic schema
7+
to specify the desired output structure. The extracted data is returned in JSON
8+
format, adhering to the specified schema if provided. This module is designed to
9+
facilitate the integration of web scraping capabilities into applications utilizing
10+
ScrapeGraph AI services.
11+
"""
12+
from typing import Optional
13+
114
from pydantic import BaseModel
215
import requests
3-
from typing import Optional
4-
import json
516
from .client import ScrapeGraphClient
6-
from .exceptions import APIError
17+
from .exceptions import APIError, raise_for_status_code
718

8-
def raise_for_status_code(status_code: int, response: requests.Response):
9-
if status_code >= 400:
10-
raise APIError(f"API request failed with status {status_code}", response=response)
11-
12-
def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[BaseModel] = None) -> str:
19+
def scrape(client: ScrapeGraphClient, url: str, prompt: str,
20+
schema: Optional[BaseModel] = None) -> str:
1321
"""Scrape and extract structured data from a webpage using ScrapeGraph AI.
1422
1523
Args:
@@ -25,12 +33,13 @@ def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[Ba
2533
"""
2634
endpoint = client.get_endpoint("smartscraper")
2735
headers = client.get_headers()
28-
36+
2937
payload = {
3038
"website_url": url,
31-
"user_prompt": prompt
39+
"user_prompt": prompt,
40+
"output_schema": {}
3241
}
33-
42+
3443
if schema:
3544
schema_json = schema.model_json_schema()
3645
payload["output_schema"] = {
@@ -39,10 +48,10 @@ def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[Ba
3948
"properties": schema_json.get("properties", {}),
4049
"required": schema_json.get("required", [])
4150
}
42-
51+
4352
try:
44-
response = requests.post(endpoint, headers=headers, json=payload)
53+
response = requests.post(endpoint, headers=headers, json=payload, timeout=10)
4554
raise_for_status_code(response.status_code, response)
4655
return response.text
4756
except requests.exceptions.RequestException as e:
48-
raise APIError(f"Request failed: {str(e)}", response=None)
57+
raise APIError(f"Request failed: {str(e)}", response=None)

0 commit comments

Comments
 (0)