|
| 1 | +# Brightdata |
| 2 | + |
| 3 | +import ToolInfo from "@/app/_components/tool-info"; |
| 4 | +import Badges from "@/app/_components/badges"; |
| 5 | +import TabbedCodeBlock from "@/app/_components/tabbed-code-block"; |
| 6 | +import TableOfContents from "@/app/_components/table-of-contents"; |
| 7 | +import ToolFooter from "@/app/_components/tool-footer"; |
| 8 | +import { Callout } from "nextra/components"; |
| 9 | + |
| 10 | +<ToolInfo |
| 11 | + description="Search, Crawl and Scrape any site, at scale, without getting blocked" |
| 12 | + author="Meirk-Brightdata" |
| 13 | + authType="API Key" |
| 14 | + versions={["0.2.0"]} |
| 15 | +/> |
| 16 | + |
| 17 | +<Badges repo="arcadeai/arcade_brightdata" /> |
| 18 | + |
| 19 | +The Brightdata MCP Server provides tools for scraping and extracting web content and structured data at scale. Main capabilities include: |
| 20 | + |
| 21 | +- Scrape web pages and return cleaned content in Markdown (ScrapeAsMarkdown). |
| 22 | +- Perform advanced web searches across Google, Bing, or Yandex with customizable parameters (SearchEngine). |
| 23 | +- Extract structured feeds from many site types (Amazon, LinkedIn, Instagram, Facebook, YouTube, Zillow, Booking, ZoomInfo, X, etc.), including products, reviews, profiles, posts, comments, listings, and videos (WebDataFeed). Note: do not fabricate links—use the search tool first if needed. |
| 24 | + |
| 25 | +## Available Tools |
| 26 | + |
| 27 | +<TableOfContents |
| 28 | + headers={["Tool Name", "Description"]} |
| 29 | + data={[ |
| 30 | + [ |
| 31 | + "Brightdata.ScrapeAsMarkdown", |
| 32 | + " Scrape a webpage and return content in Markdown format using Bright Data.", |
| 33 | + ], |
| 34 | + [ |
| 35 | + "Brightdata.SearchEngine", |
| 36 | + " Search using Google, Bing, or Yandex with advanced parameters using Bright Data.", |
| 37 | + ], |
| 38 | + [ |
| 39 | + "Brightdata.WebDataFeed", |
| 40 | + "Extract structured data from various websites like LinkedIn, Amazon, Instagram, etc.", |
| 41 | + ], |
| 42 | + ]} |
| 43 | +/> |
| 44 | + |
| 45 | +<Callout> |
| 46 | + If you need to perform an action that's not listed here, you can [get in touch |
| 47 | + with us ](mailto:[email protected]) to request a new tool, or [create your own |
| 48 | + tools](/home/build-tools/create-a-mcp-server). |
| 49 | +</Callout> |
| 50 | + |
| 51 | +## Brightdata.ScrapeAsMarkdown |
| 52 | + |
| 53 | +<br /> |
| 54 | +<TabbedCodeBlock |
| 55 | + tabs={[ |
| 56 | + { |
| 57 | + label: "Call the Tool Directly", |
| 58 | + content: { |
| 59 | + Python: [ |
| 60 | + "/examples/integrations/mcp-servers/brightdata/scrape_as_markdown_example_call_tool.py", |
| 61 | + ], |
| 62 | + JavaScript: [ |
| 63 | + "/examples/integrations/mcp-servers/brightdata/scrape_as_markdown_example_call_tool.js", |
| 64 | + ], |
| 65 | + }, |
| 66 | + }, |
| 67 | + ]} |
| 68 | +/> |
| 69 | + |
| 70 | + Scrape a webpage and return content in Markdown format using Bright Data. |
| 71 | + |
| 72 | +**Parameters** |
| 73 | + |
| 74 | +- **url** (`string`, required) URL to scrape |
| 75 | + |
| 76 | +**Secrets** |
| 77 | + |
| 78 | +This tool requires the following secrets: `BRIGHTDATA_API_KEY`, `BRIGHTDATA_ZONE` (learn how to [configure secrets](/home/build-tools/create-a-tool-with-secrets)) |
| 79 | + |
| 80 | +## Brightdata.SearchEngine |
| 81 | + |
| 82 | +<br /> |
| 83 | +<TabbedCodeBlock |
| 84 | + tabs={[ |
| 85 | + { |
| 86 | + label: "Call the Tool Directly", |
| 87 | + content: { |
| 88 | + Python: [ |
| 89 | + "/examples/integrations/mcp-servers/brightdata/search_engine_example_call_tool.py", |
| 90 | + ], |
| 91 | + JavaScript: [ |
| 92 | + "/examples/integrations/mcp-servers/brightdata/search_engine_example_call_tool.js", |
| 93 | + ], |
| 94 | + }, |
| 95 | + }, |
| 96 | + ]} |
| 97 | +/> |
| 98 | + |
| 99 | + Search using Google, Bing, or Yandex with advanced parameters using Bright Data. |
| 100 | + |
| 101 | +**Parameters** |
| 102 | + |
| 103 | +- **query** (`string`, required) Search query |
| 104 | +- **engine** (`Enum` [SearchEngine](#SearchEngine), optional) Search engine to use |
| 105 | +- **language** (`string`, optional) Two-letter language code |
| 106 | +- **country_code** (`string`, optional) Two-letter country code |
| 107 | +- **search_type** (`Enum` [SearchType](#SearchType), optional) Type of search |
| 108 | +- **start** (`integer`, optional) Results pagination offset |
| 109 | +- **num_results** (`integer`, optional) Number of results to return. The default is 10 |
| 110 | +- **location** (`string`, optional) Location for search results |
| 111 | +- **device** (`Enum` [DeviceType](#DeviceType), optional) Device type |
| 112 | +- **return_json** (`boolean`, optional) Return JSON instead of Markdown |
| 113 | + |
| 114 | +**Secrets** |
| 115 | + |
| 116 | +This tool requires the following secrets: `BRIGHTDATA_API_KEY`, `BRIGHTDATA_ZONE` (learn how to [configure secrets](/home/build-tools/create-a-tool-with-secrets)) |
| 117 | + |
| 118 | +## Brightdata.WebDataFeed |
| 119 | + |
| 120 | +<br /> |
| 121 | +<TabbedCodeBlock |
| 122 | + tabs={[ |
| 123 | + { |
| 124 | + label: "Call the Tool Directly", |
| 125 | + content: { |
| 126 | + Python: [ |
| 127 | + "/examples/integrations/mcp-servers/brightdata/web_data_feed_example_call_tool.py", |
| 128 | + ], |
| 129 | + JavaScript: [ |
| 130 | + "/examples/integrations/mcp-servers/brightdata/web_data_feed_example_call_tool.js", |
| 131 | + ], |
| 132 | + }, |
| 133 | + }, |
| 134 | + ]} |
| 135 | +/> |
| 136 | + |
| 137 | +Extract structured data from various websites like LinkedIn, Amazon, Instagram, etc. |
| 138 | + |
| 139 | +**Parameters** |
| 140 | + |
| 141 | +- **source_type** (`Enum` [SourceType](#SourceType), required) Type of data source |
| 142 | +- **url** (`string`, required) URL of the web resource to extract data from |
| 143 | +- **num_of_reviews** (`integer`, optional) Number of reviews to retrieve. Only applicable for facebook_company_reviews. Default is None |
| 144 | +- **timeout** (`integer`, optional) Maximum time in seconds to wait for data retrieval |
| 145 | +- **polling_interval** (`integer`, optional) Time in seconds between polling attempts |
| 146 | + |
| 147 | +**Secrets** |
| 148 | + |
| 149 | +This tool requires the following secrets: `BRIGHTDATA_API_KEY` (learn how to [configure secrets](/home/build-tools/create-a-tool-with-secrets)) |
| 150 | + |
| 151 | +## Secrets |
| 152 | + |
| 153 | +This tool requires the following secrets: |
| 154 | + |
| 155 | +- `BRIGHTDATA_API_KEY` |
| 156 | +- `BRIGHTDATA_ZONE` |
| 157 | + |
| 158 | +### Auth |
| 159 | + |
| 160 | +The Arcade Bright Data MCP Server uses [Bright Data](https://brightdata.com/) to access proxy networks and web scraping infrastructure. |
| 161 | + |
| 162 | +**Global Environment Variables:** |
| 163 | + |
| 164 | +- `BRIGHTDATA_API_KEY`: Your Bright Data API key. You can generate this from your [Bright Data dashboard](https://brightdata.com/cp/zones) under Account Settings → API Access. |
| 165 | + |
| 166 | +- `BRIGHTDATA_ZONE`: Your Bright Data zone name (e.g., `residential_proxy1`). This is the zone identifier you created in your Bright Data dashboard under Proxies & Scraping Infrastructure → Zones. |
| 167 | + |
| 168 | +**How to get your credentials:** |
| 169 | + |
| 170 | +1. **API Key**: Navigate to your [Bright Data Control Panel](https://brightdata.com/cp) → Settings → API Access → Generate API Token |
| 171 | +2. **Zone**: Go to Zones section in your dashboard, find your zone name in the format shown in the zone username: `brd-customer-{customer_id}-zone-{zone_name}` |
| 172 | + |
| 173 | +For more details, see the [Bright Data API Documentation](https://docs.brightdata.com/api-reference). |
| 174 | + |
| 175 | +## Reference |
| 176 | + |
| 177 | +Below is a reference of enumerations used by some of the tools in the Brightdata MCP Server: |
| 178 | + |
| 179 | +### SearchEngine |
| 180 | + |
| 181 | +- **GOOGLE**: `google` |
| 182 | +- **BING**: `bing` |
| 183 | +- **YANDEX**: `yandex` |
| 184 | + |
| 185 | +### SearchType |
| 186 | + |
| 187 | +- **IMAGES**: `images` |
| 188 | +- **SHOPPING**: `shopping` |
| 189 | +- **NEWS**: `news` |
| 190 | +- **JOBS**: `jobs` |
| 191 | + |
| 192 | +### DeviceType |
| 193 | + |
| 194 | +- **MOBILE**: `mobile` |
| 195 | +- **IOS**: `ios` |
| 196 | +- **IPHONE**: `iphone` |
| 197 | +- **IPAD**: `ipad` |
| 198 | +- **ANDROID**: `android` |
| 199 | +- **ANDROID_TABLET**: `android_tablet` |
| 200 | + |
| 201 | +### SourceType |
| 202 | + |
| 203 | +- **AMAZON_PRODUCT**: `amazon_product` |
| 204 | +- **AMAZON_PRODUCT_REVIEWS**: `amazon_product_reviews` |
| 205 | +- **LINKEDIN_PERSON_PROFILE**: `linkedin_person_profile` |
| 206 | +- **LINKEDIN_COMPANY_PROFILE**: `linkedin_company_profile` |
| 207 | +- **ZOOMINFO_COMPANY_PROFILE**: `zoominfo_company_profile` |
| 208 | +- **INSTAGRAM_PROFILES**: `instagram_profiles` |
| 209 | +- **INSTAGRAM_POSTS**: `instagram_posts` |
| 210 | +- **INSTAGRAM_REELS**: `instagram_reels` |
| 211 | +- **INSTAGRAM_COMMENTS**: `instagram_comments` |
| 212 | +- **FACEBOOK_POSTS**: `facebook_posts` |
| 213 | +- **FACEBOOK_MARKETPLACE_LISTINGS**: `facebook_marketplace_listings` |
| 214 | +- **FACEBOOK_COMPANY_REVIEWS**: `facebook_company_reviews` |
| 215 | +- **X_POSTS**: `x_posts` |
| 216 | +- **ZILLOW_PROPERTIES_LISTING**: `zillow_properties_listing` |
| 217 | +- **BOOKING_HOTEL_LISTINGS**: `booking_hotel_listings` |
| 218 | +- **YOUTUBE_VIDEOS**: `youtube_videos` |
| 219 | + |
| 220 | +<ToolFooter pipPackageName="arcade_brightdata" /> |
0 commit comments