diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 516e8cf..f5ab38f 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,15 @@ +## [1.4.3-beta.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.2...v1.4.3-beta.1) (2024-12-03) + + +### Bug Fixes + +* updated comment ([8250818](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/825081883940bc1caa37f4f13e10f710770aeb9c)) + + +### chore + +* improved url validation ([83eac53](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/83eac530269a767e5469c4aded1656fe00a2cdc0)) + ## [1.4.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.1...v1.4.2) (2024-12-02) diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py index d56ba9b..2246736 100644 --- a/scrapegraph-py/scrapegraph_py/async_client.py +++ b/scrapegraph-py/scrapegraph_py/async_client.py @@ -18,6 +18,35 @@ class AsyncClient: + @classmethod + def from_env( + cls, + verify_ssl: bool = True, + timeout: float = 120, + max_retries: int = 3, + retry_delay: float = 1.0, + ): + """Initialize AsyncClient using API key from environment variable. + + Args: + verify_ssl: Whether to verify SSL certificates + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + retry_delay: Delay between retries in seconds + """ + from os import getenv + + api_key = getenv("SGAI_API_KEY") + if not api_key: + raise ValueError("SGAI_API_KEY environment variable not set") + return cls( + api_key=api_key, + verify_ssl=verify_ssl, + timeout=timeout, + max_retries=max_retries, + retry_delay=retry_delay, + ) + def __init__( self, api_key: str, @@ -54,34 +83,6 @@ def __init__( logger.info("✅ AsyncClient initialized successfully") - @classmethod - def from_env( - cls, - verify_ssl: bool = True, - timeout: float = 120, - max_retries: int = 3, - retry_delay: float = 1.0, - ): - """Initialize AsyncClient using API key from environment variable. - - Args: - verify_ssl: Whether to verify SSL certificates - timeout: Request timeout in seconds - max_retries: Maximum number of retry attempts - retry_delay: Delay between retries in seconds - """ - from os import getenv - api_key = getenv("SGAI_API_KEY") - if not api_key: - raise ValueError("SGAI_API_KEY environment variable not set") - return cls( - api_key=api_key, - verify_ssl=verify_ssl, - timeout=timeout, - max_retries=max_retries, - retry_delay=retry_delay, - ) - async def _make_request(self, method: str, url: str, **kwargs) -> Any: """Make HTTP request with retry logic.""" for attempt in range(self.max_retries): diff --git a/scrapegraph-py/scrapegraph_py/models/smartscraper.py b/scrapegraph-py/scrapegraph_py/models/smartscraper.py index c127a38..c1fdd9a 100644 --- a/scrapegraph-py/scrapegraph_py/models/smartscraper.py +++ b/scrapegraph-py/scrapegraph_py/models/smartscraper.py @@ -3,7 +3,6 @@ from typing import Optional, Type from uuid import UUID -import validators from pydantic import BaseModel, Field, model_validator @@ -25,9 +24,13 @@ def validate_user_prompt(self) -> "SmartScraperRequest": @model_validator(mode="after") def validate_url(self) -> "SmartScraperRequest": - url = self.website_url - if not validators.url(url): - raise ValueError(f"Invalid URL: {url}") + if self.website_url is None or not self.website_url.strip(): + raise ValueError("Website URL cannot be empty") + if not ( + self.website_url.startswith("http://") + or self.website_url.startswith("https://") + ): + raise ValueError("Invalid URL") return self def model_dump(self, *args, **kwargs) -> dict: @@ -46,6 +49,7 @@ class GetSmartScraperRequest(BaseModel): @model_validator(mode="after") def validate_request_id(self) -> "GetSmartScraperRequest": try: + # Validate the request_id is a valid UUID UUID(self.request_id) except ValueError: raise ValueError("request_id must be a valid UUID")