feat: add docstring

VinciGit00 · VinciGit00 · commit d486a7abac41 · 2024-11-20T08:56:26.000+01:00
diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py
@@ -1,43 +1,65 @@
+"""
+Module for ScrapeGraph Client
+
+This module contains the ScrapeGraphClient class, which provides methods to interact
+with the ScrapeGraph AI API. It allows users to initialize the client with an API key,
+retrieve necessary headers for API requests, and construct full endpoint URLs for
+making requests to the ScrapeGraph API. This facilitates seamless integration with
+ScrapeGraph AI services.
+"""
 
 class ScrapeGraphClient:
-    """Client for interacting with the ScrapeGraph AI API."""
-    
+    """Client for interacting with the ScrapeGraph AI API.
+
+    This class provides methods to initialize the client with an API key and base URL,
+    retrieve headers for API requests, and construct full endpoint URLs for making
+    requests to the ScrapeGraph API. It is designed to facilitate seamless interaction
+    with the ScrapeGraph AI services.
+
+    Attributes:
+        api_key (str): Your ScrapeGraph AI API key.
+        base_url (str): Base URL for the API, defaulting to "https://api.scrapegraphai.com/v1".
+    """
+
     def __init__(self, api_key: str, base_url: str = "https://api.scrapegraphai.com/v1"):
         """Initialize the ScrapeGraph client.
         
         Args:
-            api_key (str): Your ScrapeGraph AI API key
-            base_url (str): Base URL for the API (optional)
+            api_key (str): Your ScrapeGraph AI API key.
+            base_url (str): Base URL for the API (optional, defaults 
+            to "https://api.scrapegraphai.com/v1").
         """
         self.api_key = api_key
         self.base_url = base_url.rstrip('/')
-        
+
     def get_headers(self, include_content_type: bool = True) -> dict:
         """Get the headers for API requests.
         
         Args:
-            include_content_type (bool): Whether to include Content-Type header
+            include_content_type (bool): Whether to include the Content-Type header 
+            (default is True).
             
         Returns:
-            dict: Headers for the API request
+            dict: A dictionary containing the headers for the API request, including
+                  the API key and optionally the Content-Type.
         """
         headers = {
             "accept": "application/json",
-            "SGAI-API-KEY": self.api_key
+            "SGAI-APIKEY": self.api_key
         }
-        
+
         if include_content_type:
             headers["Content-Type"] = "application/json"
-            
+ 
         return headers
-    
+
     def get_endpoint(self, path: str) -> str:
         """Get the full endpoint URL.
         
         Args:
-            path (str): API endpoint path
+            path (str): The API endpoint path to be appended to the base URL.
             
         Returns:
-            str: Full endpoint URL
+            str: The full endpoint URL constructed from the base URL and the provided path.
         """
-        return f"{self.base_url}/api/v1/{path}"
+        return f"{self.base_url}/{path}"
diff --git a/scrapegraph-py/scrapegraph_py/credits.py b/scrapegraph-py/scrapegraph_py/credits.py
@@ -1,13 +1,13 @@
 """
 This module provides functionality to interact with the ScrapeGraph AI API.
 
-It includes functions to retrieve credits and send feedback, handling responses and errors appropriately.
+It includes functions to retrieve credits and send feedback, 
+handling responses and errors appropriately.
 """
 
 import requests
-import json
 from .client import ScrapeGraphClient
-from .exceptions import raise_for_status_code
+from .exceptions import raise_for_status_code, APIError
 
 def credits(client: ScrapeGraphClient) -> str:
     """Retrieve credits from the API.
diff --git a/scrapegraph-py/scrapegraph_py/exceptions.py b/scrapegraph-py/scrapegraph_py/exceptions.py
@@ -1,3 +1,13 @@
+"""
+Module for ScrapeGraph Exceptions
+
+This module defines custom exception classes for handling errors that may occur
+when interacting with the ScrapeGraph API. These exceptions provide a structured
+way to manage different types of errors, such as authentication issues, rate limits,
+bad requests, and server errors. Each exception class inherits from a base exception
+class, allowing for more granular error handling in client applications.
+"""
+
 class APIError(Exception):
     """Base class for API exceptions."""
     def __init__(self, message=None, response=None):
@@ -19,3 +29,36 @@ class BadRequestError(APIError):
 
 class InternalServerError(APIError):
     """Raised when a 500 Internal Server Error occurs."""
+
+class ScrapeGraphException(Exception):
+    """Base exception for ScrapeGraph errors"""
+    pass
+
+def raise_for_status_code(status_code: int, response_text: str = None):
+    """
+    Raise appropriate exception based on HTTP status code.
+    
+    Args:
+        status_code (int): The HTTP status code returned from the API response.
+        response_text (str): Optional text providing additional context for the error.
+    
+    Raises:
+        ScrapeGraphException: For various HTTP error statuses, including 401, 403, 404, and 500.
+    """
+    if 200 <= status_code < 300:
+        return
+
+    error_message = f"HTTP {status_code}"
+    if response_text:
+        error_message += f": {response_text}"
+
+    if status_code == 401:
+        raise ScrapeGraphException("Unauthorized - Invalid API key")
+    elif status_code == 403:
+        raise ScrapeGraphException("Forbidden - You don't have access to this resource")
+    elif status_code == 404:
+        raise ScrapeGraphException("Not Found - The requested resource doesn't exist")
+    elif status_code >= 500:
+        raise ScrapeGraphException("Server Error - Something went wrong on our end")
+    else:
+        raise ScrapeGraphException(error_message)
diff --git a/scrapegraph-py/scrapegraph_py/feedback.py b/scrapegraph-py/scrapegraph_py/feedback.py
@@ -36,8 +36,7 @@ def feedback(client: ScrapeGraphClient, request_id: str, rating: int, feedback_t
     }
 
     try:
-        response = requests.post(endpoint, headers=headers, json=feedback_data)
-        raise_for_status_code(response.status_code, response)
+        response = requests.post(endpoint, headers=headers, json=feedback_data, timeout=10)
         return response.text
     except requests.exceptions.RequestException as e:
-        raise APIError(f"Request failed: {str(e)}", response=None)
+        raise APIError(f"Request failed: {str(e)}", response=None) from e
diff --git a/scrapegraph-py/scrapegraph_py/local_scraper.py b/scrapegraph-py/scrapegraph_py/local_scraper.py
@@ -1,11 +1,22 @@
+"""
+Module for Local Scraper
+
+This module contains functions for scraping and extracting structured data from
+website text using the ScrapeGraph AI API. It defines the `scrape_text` function,
+which takes a ScrapeGraph client, website text, a user prompt, and an optional
+Pydantic schema to extract relevant data. The extracted data is returned in JSON
+format, adhering to the specified schema if provided. This module is designed to
+facilitate the integration of web scraping capabilities into applications using
+ScrapeGraph AI services.
+"""
+from typing import Optional
 from pydantic import BaseModel
 import requests
-from typing import Optional
-import json
 from .client import ScrapeGraphClient
 from .exceptions import raise_for_status_code, APIError
 
-def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schema: Optional[BaseModel] = None) -> str:
+def scrape_text(client: ScrapeGraphClient, website_text: str, 
+                prompt: str, schema: Optional[BaseModel] = None) -> str:
     """Scrape and extract structured data from website text using ScrapeGraph AI.
 
     Args:
@@ -21,12 +32,12 @@ def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schem
     """
     endpoint = client.get_endpoint("smartscraper")
     headers = client.get_headers()
-    
+
     payload = {
         "website_text": website_text,
         "user_prompt": prompt
     }
-    
+
     if schema:
         schema_json = schema.model_json_schema()
         payload["output_schema"] = {
@@ -35,9 +46,9 @@ def scrape_text(client: ScrapeGraphClient, website_text: str, prompt: str, schem
             "properties": schema_json.get("properties", {}),
             "required": schema_json.get("required", [])
         }
-    
+
     try:
-        response = requests.post(endpoint, headers=headers, json=payload)
+        response = requests.post(endpoint, headers=headers, json=payload, timeout=10)
         raise_for_status_code(response.status_code, response)
         return response.text
     except requests.exceptions.RequestException as e:
diff --git a/scrapegraph-py/scrapegraph_py/scrape.py b/scrapegraph-py/scrapegraph_py/scrape.py
@@ -1,15 +1,23 @@
+"""
+Module for Scraping Web Data with ScrapeGraph AI
+
+This module provides functionality to scrape and extract structured data from
+webpages using the ScrapeGraph AI API. It defines the `scrape` function, which
+takes a ScrapeGraph client, a URL, a user prompt, and an optional Pydantic schema
+to specify the desired output structure. The extracted data is returned in JSON
+format, adhering to the specified schema if provided. This module is designed to
+facilitate the integration of web scraping capabilities into applications utilizing
+ScrapeGraph AI services.
+"""
+from typing import Optional
+
 from pydantic import BaseModel
 import requests
-from typing import Optional
-import json
 from .client import ScrapeGraphClient
-from .exceptions import APIError
+from .exceptions import APIError, raise_for_status_code
 
-def raise_for_status_code(status_code: int, response: requests.Response):
-    if status_code >= 400:
-        raise APIError(f"API request failed with status {status_code}", response=response)
-
-def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[BaseModel] = None) -> str:
+def scrape(client: ScrapeGraphClient, url: str, prompt: str, 
+           schema: Optional[BaseModel] = None) -> str:
     """Scrape and extract structured data from a webpage using ScrapeGraph AI.
 
     Args:
@@ -25,12 +33,13 @@ def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[Ba
     """
     endpoint = client.get_endpoint("smartscraper")
     headers = client.get_headers()
-    
+
     payload = {
         "website_url": url,
-        "user_prompt": prompt
+        "user_prompt": prompt,
+        "output_schema": {}
     }
-    
+
     if schema:
         schema_json = schema.model_json_schema()
         payload["output_schema"] = {
@@ -39,10 +48,10 @@ def scrape(client: ScrapeGraphClient, url: str, prompt: str, schema: Optional[Ba
             "properties": schema_json.get("properties", {}),
             "required": schema_json.get("required", [])
         }
-    
+
     try:
-        response = requests.post(endpoint, headers=headers, json=payload)
+        response = requests.post(endpoint, headers=headers, json=payload, timeout=10)
         raise_for_status_code(response.status_code, response)
         return response.text
     except requests.exceptions.RequestException as e:
-        raise APIError(f"Request failed: {str(e)}", response=None)
+        raise APIError(f"Request failed: {str(e)}", response=None)