Skip to content
This repository was archived by the owner on Nov 10, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 36 additions & 6 deletions crewai_tools/tools/brightdata_tool/brightdata_dataset.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import asyncio
import os
from typing import Any, Dict, Optional, Type
from typing import Any, Dict, List, Optional, Type

import aiohttp
from crewai.tools import BaseTool
from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, Field


class BrightDataConfig(BaseModel):
API_URL: str = "https://api.brightdata.com"
DEFAULT_TIMEOUT: int = 600
Expand All @@ -16,8 +17,12 @@ def from_env(cls):
return cls(
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com"),
DEFAULT_TIMEOUT=int(os.environ.get("BRIGHTDATA_DEFAULT_TIMEOUT", "600")),
DEFAULT_POLLING_INTERVAL=int(os.environ.get("BRIGHTDATA_DEFAULT_POLLING_INTERVAL", "1"))
DEFAULT_POLLING_INTERVAL=int(
os.environ.get("BRIGHTDATA_DEFAULT_POLLING_INTERVAL", "1")
),
)


class BrightDataDatasetToolException(Exception):
"""Exception raised for custom error in the application."""

Expand Down Expand Up @@ -52,6 +57,7 @@ class BrightDataDatasetToolSchema(BaseModel):
default=None, description="Additional params if any"
)


config = BrightDataConfig.from_env()

BRIGHTDATA_API_URL = config.API_URL
Expand Down Expand Up @@ -410,8 +416,22 @@ class BrightDataDatasetTool(BaseTool):
format: str = "json"
zipcode: Optional[str] = None
additional_params: Optional[Dict[str, Any]] = None
env_vars: List[EnvVar] = [
EnvVar(
name="BRIGHT_DATA_API_KEY",
description="API key for Bright Data",
required=True,
),
]

def __init__(self, dataset_type: str = None, url: str = None, format: str = "json", zipcode: str = None, additional_params: Dict[str, Any] = None):
def __init__(
self,
dataset_type: str = None,
url: str = None,
format: str = "json",
zipcode: str = None,
additional_params: Dict[str, Any] = None,
):
super().__init__()
self.dataset_type = dataset_type
self.url = url
Expand Down Expand Up @@ -530,15 +550,25 @@ async def get_dataset_data_async(

return await snapshot_response.text()

def _run(self, url: str = None, dataset_type: str = None, format: str = None, zipcode: str = None, additional_params: Dict[str, Any] = None, **kwargs: Any) -> Any:
def _run(
self,
url: str = None,
dataset_type: str = None,
format: str = None,
zipcode: str = None,
additional_params: Dict[str, Any] = None,
**kwargs: Any,
) -> Any:
dataset_type = dataset_type or self.dataset_type
output_format = format or self.format
url = url or self.url
zipcode = zipcode or self.zipcode
additional_params = additional_params or self.additional_params

if not dataset_type:
raise ValueError("dataset_type is required either in constructor or method call")
raise ValueError(
"dataset_type is required either in constructor or method call"
)
if not url:
raise ValueError("url is required either in constructor or method call")

Expand Down
46 changes: 39 additions & 7 deletions crewai_tools/tools/brightdata_tool/brightdata_serp.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
import os
import urllib.parse
from typing import Any, Optional, Type
from typing import Any, List, Optional, Type

import requests
from crewai.tools import BaseTool
from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, Field


class BrightDataConfig(BaseModel):
API_URL: str = "https://api.brightdata.com/request"

@classmethod
def from_env(cls):
return cls(
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request")
API_URL=os.environ.get(
"BRIGHTDATA_API_URL", "https://api.brightdata.com/request"
)
)


class BrightDataSearchToolSchema(BaseModel):
"""
Schema that defines the input arguments for the BrightDataSearchToolSchema.
Expand Down Expand Up @@ -87,8 +91,24 @@ class BrightDataSearchTool(BaseTool):
search_type: Optional[str] = None
device_type: str = "desktop"
parse_results: bool = True

def __init__(self, query: str = None, search_engine: str = "google", country: str = "us", language: str = "en", search_type: str = None, device_type: str = "desktop", parse_results: bool = True):
env_vars: List[EnvVar] = [
EnvVar(
name="BRIGHT_DATA_API_KEY",
description="API key for Bright Data",
required=True,
),
]

def __init__(
self,
query: str = None,
search_engine: str = "google",
country: str = "us",
language: str = "en",
search_type: str = None,
device_type: str = "desktop",
parse_results: bool = True,
):
super().__init__()
self.base_url = self._config.API_URL
self.query = query
Expand All @@ -113,7 +133,17 @@ def get_search_url(self, engine: str, query: str):
return f"https://www.bing.com/search?q=${query}"
return f"https://www.google.com/search?q=${query}"

def _run(self, query: str = None, search_engine: str = None, country: str = None, language: str = None, search_type: str = None, device_type: str = None, parse_results: bool = None, **kwargs) -> Any:
def _run(
self,
query: str = None,
search_engine: str = None,
country: str = None,
language: str = None,
search_type: str = None,
device_type: str = None,
parse_results: bool = None,
**kwargs,
) -> Any:
"""
Executes a search query using Bright Data SERP API and returns results.

Expand All @@ -137,7 +167,9 @@ def _run(self, query: str = None, search_engine: str = None, country: str = None
language = language or self.language
search_type = search_type or self.search_type
device_type = device_type or self.device_type
parse_results = parse_results if parse_results is not None else self.parse_results
parse_results = (
parse_results if parse_results is not None else self.parse_results
)
results_count = kwargs.get("results_count", "10")

# Validate required parameters
Expand Down
31 changes: 25 additions & 6 deletions crewai_tools/tools/brightdata_tool/brightdata_unlocker.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
import os
from typing import Any, Optional, Type
from typing import Any, List, Optional, Type

import requests
from crewai.tools import BaseTool
from crewai.tools import BaseTool, EnvVar
from pydantic import BaseModel, Field


class BrightDataConfig(BaseModel):
API_URL: str = "https://api.brightdata.com/request"

@classmethod
def from_env(cls):
return cls(
API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request")
API_URL=os.environ.get(
"BRIGHTDATA_API_URL", "https://api.brightdata.com/request"
)
)


class BrightDataUnlockerToolSchema(BaseModel):
"""
Pydantic schema for input parameters used by the BrightDataWebUnlockerTool.
Expand Down Expand Up @@ -67,8 +71,17 @@ class BrightDataWebUnlockerTool(BaseTool):
url: Optional[str] = None
format: str = "raw"
data_format: str = "markdown"

def __init__(self, url: str = None, format: str = "raw", data_format: str = "markdown"):
env_vars: List[EnvVar] = [
EnvVar(
name="BRIGHT_DATA_API_KEY",
description="API key for Bright Data",
required=True,
),
]

def __init__(
self, url: str = None, format: str = "raw", data_format: str = "markdown"
):
super().__init__()
self.base_url = self._config.API_URL
self.url = url
Expand All @@ -82,7 +95,13 @@ def __init__(self, url: str = None, format: str = "raw", data_format: str = "mar
if not self.zone:
raise ValueError("BRIGHT_DATA_ZONE environment variable is required.")

def _run(self, url: str = None, format: str = None, data_format: str = None, **kwargs: Any) -> Any:
def _run(
self,
url: str = None,
format: str = None,
data_format: str = None,
**kwargs: Any,
) -> Any:
url = url or self.url
format = format or self.format
data_format = data_format or self.data_format
Expand Down
Loading