ScrapeGraphAI
diff --git a/‎scrapegraph-py/README.md‎
Lines changed: 45 additions & 0 deletions b/‎scrapegraph-py/README.md‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎scrapegraph-py/examples/credits_example.py‎
Lines changed: 3 additions & 2 deletions b/‎scrapegraph-py/examples/credits_example.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎scrapegraph-py/examples/feedback_example.py‎
Lines changed: 5 additions & 5 deletions b/‎scrapegraph-py/examples/feedback_example.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎scrapegraph-py/examples/local_scraper_example.py‎
Lines changed: 21 additions & 32 deletions b/‎scrapegraph-py/examples/local_scraper_example.py‎
Lines changed: 21 additions & 32 deletions
diff --git a/‎scrapegraph-py/examples/scrape_example.py‎
Lines changed: 4 additions & 3 deletions b/‎scrapegraph-py/examples/scrape_example.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎scrapegraph-py/examples/scrape_schema_example.py‎
Lines changed: 5 additions & 4 deletions b/‎scrapegraph-py/examples/scrape_schema_example.py‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎scrapegraph-py/scrapegraph_py/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎scrapegraph-py/scrapegraph_py/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎scrapegraph-py/scrapegraph_py/client.py‎
Lines changed: 65 additions & 0 deletions b/‎scrapegraph-py/scrapegraph_py/client.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎scrapegraph-py/scrapegraph_py/credits.py‎
Lines changed: 11 additions & 15 deletions b/‎scrapegraph-py/scrapegraph_py/credits.py‎
Lines changed: 11 additions & 15 deletions
@@ -34,6 +34,51 @@ The SDK provides four main functionalities:
 
 ## Usage
 
+### Basic Web Scraping
+
+```python
+from scrapegraph_py import ScrapeGraphClient, scrape
+from dotenv import load_dotenv
+
+load_dotenv()
+api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
+url = "https://scrapegraphai.com/"
+prompt = "What does the company do?"
+
+result = scrape(client, url, prompt)
+print(result)
+```
+
+### Local HTML Scraping
+
+You can also scrape content from local HTML files:
+
+```python
+from scrapegraph_py import ScrapeGraphClient, scrape_text
+from bs4 import BeautifulSoup
+
+def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        html_content = file.read()
+    
+    # Use BeautifulSoup to extract text content
+    soup = BeautifulSoup(html_content, 'html.parser')
+    text_content = soup.get_text(separator='\n', strip=True)
+    
+    # Use ScrapeGraph AI to analyze the text
+    return scrape_text(client, text_content, prompt)
+
+# Usage
+client = ScrapeGraphClient(api_key)
+result = scrape_local_html(
+    client,
+    'sample.html',
+    "Extract main content and important information"
+)
+print("Extracted Data:", result)
+```
 
 ### Structured Data Extraction
 
 
@@ -5,15 +5,16 @@
 
 import os
 from dotenv import load_dotenv
-from scrapegraph_py import credits
+from scrapegraph_py import ScrapeGraphClient, credits
 
 # Load environment variables from a .env file
 load_dotenv()
 
 def main():
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
 
-    response = credits(api_key)
+    response = credits(client)
     print("Response from the API:")
     print(response)
 
 
@@ -1,18 +1,18 @@
 import os
 from dotenv import load_dotenv
-from scrapegraph_py import status
-from scrapegraph_py import feedback
+from scrapegraph_py import ScrapeGraphClient, feedback, status
 
 # Load environment variables from .env file
 load_dotenv()
 
 def main():
     # Get API key from environment variables
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
 
     # Check API status
     try:
-        result = status(api_key)
+        result = status(client)
         print(f"API Status: {result}")
     except Exception as e:
         print(f"Error occurred: {e}")
@@ -21,8 +21,8 @@ def main():
     request_id = "3fa85f64-5717-4562-b3fc-2c963f66afa6"
     rating = 5
     feedback_message = "This is a test feedback message."
-    feedback_response = feedback(api_key, request_id, rating, feedback_message)  # Call the feedback function
-    print(f"Feedback Response: {feedback_response}")  # Print the response
+    feedback_response = feedback(client, request_id, rating, feedback_message)
+    print(f"Feedback Response: {feedback_response}")
 
 if __name__ == "__main__":
     main() 
@@ -1,56 +1,45 @@
 from bs4 import BeautifulSoup
 import os
+from scrapegraph_py import ScrapeGraphClient, scrape_text
+from dotenv import load_dotenv
 
-def scrape_local_html(file_path):
+def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
     """
-    Scrape content from a local HTML file.
+    Scrape content from a local HTML file using ScrapeGraph AI.
     
     Args:
+        client (ScrapeGraphClient): Initialized ScrapeGraph client
         file_path (str): Path to the local HTML file
+        prompt (str): Natural language prompt describing what to extract
         
     Returns:
-        dict: Extracted data from the HTML file
+        str: Extracted data in JSON format
     """
-    # Check if file exists
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"HTML file not found at: {file_path}")
 
-    # Read the HTML file
     with open(file_path, 'r', encoding='utf-8') as file:
         html_content = file.read()
 
-    # Parse HTML with BeautifulSoup
+    # Use BeautifulSoup to extract text content
     soup = BeautifulSoup(html_content, 'html.parser')
+    text_content = soup.get_text(separator='\n', strip=True)
 
-    # Example extraction - modify based on your HTML structure
-    data = {
-        'title': soup.title.string if soup.title else None,
-        'paragraphs': [p.text for p in soup.find_all('p')],
-        'links': [{'text': a.text, 'href': a.get('href')} for a in soup.find_all('a')],
-        'headers': [h.text for h in soup.find_all(['h1', 'h2', 'h3'])]
-    }
-    
-    return data
+    # Use ScrapeGraph AI to analyze the text
+    return scrape_text(client, text_content, prompt)
 
 def main():
-    # Example usage
+    load_dotenv()
+    api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
+    
     try:
-        # Assuming you have a sample.html file in the same directory
-        result = scrape_local_html('sample.html')
-        
-        # Print extracted data
-        print("Title:", result['title'])
-        print("\nParagraphs:")
-        for p in result['paragraphs']:
-            print(f"- {p}")
-            
-        print("\nLinks:")
-        for link in result['links']:
-            print(f"- {link['text']}: {link['href']}")
-            
-        print("\nHeaders:")
-        for header in result['headers']:
-            print(f"- {header}")
+        result = scrape_local_html(
+            client,
+            'sample.html',
+            "Extract main content and important information"
+        )
+        print("Extracted Data:", result)
 
     except FileNotFoundError as e:
         print(f"Error: {e}")
 
@@ -1,12 +1,13 @@
 import os
-from scrapegraph_py import scrape
+from scrapegraph_py import ScrapeGraphClient, scrape
 from dotenv import load_dotenv
 
-
 load_dotenv()
 api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
 url = "https://scrapegraphai.com/"
 prompt = "What does the company do?"
 
-result = scrape(api_key, url, prompt)
+result = scrape(client, url, prompt)
 print(result)
@@ -1,6 +1,6 @@
 import os
 from pydantic import BaseModel, Field
-from scrapegraph_py import scrape
+from scrapegraph_py import ScrapeGraphClient, scrape
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -11,12 +11,13 @@ class CompanyInfoSchema(BaseModel):
     description: str = Field(description="A description of the company")
     main_products: list[str] = Field(description="The main products of the company")
 
-# Example usage
+# Initialize client
 api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
 url = "https://scrapegraphai.com/"
 prompt = "What does the company do?"
 
 # Call the scrape function with the schema
-result = scrape(api_key=api_key, url=url, prompt=prompt, schema=CompanyInfoSchema)
-
+result = scrape(client=client, url=url, prompt=prompt, schema=CompanyInfoSchema)
 print(result)
@@ -1,3 +1,4 @@
+from .client import ScrapeGraphClient
 from .scrape import scrape
 from .credits import credits
 from .feedback import feedback
@@ -0,0 +1,65 @@
+"""
+Module for ScrapeGraph Client
+
+This module contains the ScrapeGraphClient class, which provides methods to interact
+with the ScrapeGraph AI API. It allows users to initialize the client with an API key,
+retrieve necessary headers for API requests, and construct full endpoint URLs for
+making requests to the ScrapeGraph API. This facilitates seamless integration with
+ScrapeGraph AI services.
+"""
+
+class ScrapeGraphClient:
+    """Client for interacting with the ScrapeGraph AI API.
+
+    This class provides methods to initialize the client with an API key and base URL,
+    retrieve headers for API requests, and construct full endpoint URLs for making
+    requests to the ScrapeGraph API. It is designed to facilitate seamless interaction
+    with the ScrapeGraph AI services.
+
+    Attributes:
+        api_key (str): Your ScrapeGraph AI API key.
+        base_url (str): Base URL for the API, defaulting to "https://api.scrapegraphai.com/v1".
+    """
+
+    def __init__(self, api_key: str, base_url: str = "https://api.scrapegraphai.com/v1"):
+        """Initialize the ScrapeGraph client.
+        
+        Args:
+            api_key (str): Your ScrapeGraph AI API key.
+            base_url (str): Base URL for the API (optional, defaults 
+            to "https://api.scrapegraphai.com/v1").
+        """
+        self.api_key = api_key
+        self.base_url = base_url.rstrip('/')
+
+    def get_headers(self, include_content_type: bool = True) -> dict:
+        """Get the headers for API requests.
+        
+        Args:
+            include_content_type (bool): Whether to include the Content-Type header 
+            (default is True).
+            
+        Returns:
+            dict: A dictionary containing the headers for the API request, including
+                  the API key and optionally the Content-Type.
+        """
+        headers = {
+            "accept": "application/json",
+            "SGAI-APIKEY": self.api_key
+        }
+
+        if include_content_type:
+            headers["Content-Type"] = "application/json"
+ 
+        return headers
+
+    def get_endpoint(self, path: str) -> str:
+        """Get the full endpoint URL.
+        
+        Args:
+            path (str): The API endpoint path to be appended to the base URL.
+            
+        Returns:
+            str: The full endpoint URL constructed from the base URL and the provided path.
+        """
+        return f"{self.base_url}/{path}"
@@ -1,33 +1,29 @@
 """
 This module provides functionality to interact with the ScrapeGraph AI API.
 
-It includes functions to retrieve credits and send feedback, handling responses and errors appropriately.
+It includes functions to retrieve credits and send feedback, 
+handling responses and errors appropriately.
 """
 
 import requests
-import json
+from .client import ScrapeGraphClient
+from .exceptions import raise_for_status_code, APIError
 
-def credits(api_key: str) -> str:
+def credits(client: ScrapeGraphClient) -> str:
     """Retrieve credits from the API.
 
     Args:
-        api_key (str): Your ScrapeGraph AI API key.
+        client (ScrapeGraphClient): Initialized ScrapeGraph client
 
     Returns:
         str: Response from the API in JSON format.
     """
-    endpoint = "https://sgai-api.onrender.com/api/v1/credits"
-    headers = {
-        "accept": "application/json",
-        "SGAI-API-KEY": api_key
-    }
+    endpoint = client.get_endpoint("credits")
+    headers = client.get_headers(include_content_type=False)
 
     try:
         response = requests.get(endpoint, headers=headers)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as http_err:
-        return json.dumps({"error": "HTTP error occurred", "message": str(http_err), "status_code": response.status_code})
+        raise_for_status_code(response.status_code, response)
+        return response.text
     except requests.exceptions.RequestException as e:
-        return json.dumps({"error": "An error occurred", "message": str(e)})
-
-    return response.text
+        raise APIError(f"Request failed: {str(e)}", response=None)