Skip to content

Commit cfaba9b

Browse files
committed
done
1 parent 9a00a52 commit cfaba9b

File tree

4 files changed

+341
-62
lines changed

4 files changed

+341
-62
lines changed

README.md

Lines changed: 60 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,15 @@ from brightdata import BrightDataClient
106106
# Initialize client (auto-loads token from environment)
107107
client = BrightDataClient()
108108

109-
# Scrape any website
109+
# Scrape any website (sync wrapper)
110110
result = client.scrape.generic.url("https://example.com")
111111

112-
print(f"Success: {result.success}")
113-
print(f"Data: {result.data[:200]}...")
114-
print(f"Time: {result.elapsed_ms():.2f}ms")
112+
if result.success:
113+
print(f"Success: {result.success}")
114+
print(f"Data: {result.data[:200]}...")
115+
print(f"Time: {result.elapsed_ms():.2f}ms")
116+
else:
117+
print(f"Error: {result.error}")
115118
```
116119

117120
### Using Dataclass Payloads (Type-Safe ✨)
@@ -180,7 +183,6 @@ df.to_csv('products.csv', index=False)
180183
# Scrape specific product URLs
181184
result = client.scrape.amazon.products(
182185
url="https://amazon.com/dp/B0CRMZHDG8",
183-
sync=True,
184186
timeout=65
185187
)
186188

@@ -203,8 +205,7 @@ result = client.scrape.amazon.sellers(
203205
```python
204206
# URL-based extraction
205207
result = client.scrape.linkedin.profiles(
206-
url="https://linkedin.com/in/johndoe",
207-
sync=True
208+
url="https://linkedin.com/in/johndoe"
208209
)
209210

210211
result = client.scrape.linkedin.jobs(
@@ -242,18 +243,17 @@ result = client.search.linkedin.posts(
242243
#### ChatGPT Interactions
243244

244245
```python
245-
# Send prompts to ChatGPT
246-
result = client.search.chatGPT(
246+
# Send single prompt to ChatGPT
247+
result = client.scrape.chatgpt.prompt(
247248
prompt="Explain Python async programming",
248249
country="us",
249-
webSearch=True,
250-
sync=True
250+
web_search=True
251251
)
252252

253253
# Batch prompts
254-
result = client.search.chatGPT(
255-
prompt=["What is Python?", "What is JavaScript?", "Compare them"],
256-
webSearch=[False, False, True]
254+
result = client.scrape.chatgpt.prompts(
255+
prompts=["What is Python?", "What is JavaScript?", "Compare them"],
256+
web_searches=[False, False, True]
257257
)
258258
```
259259

@@ -377,11 +377,14 @@ result = client.search.yandex(
377377

378378
### Async Usage
379379

380+
For better performance with multiple operations, use async:
381+
380382
```python
381383
import asyncio
382384
from brightdata import BrightDataClient
383385

384386
async def scrape_multiple():
387+
# Use async context manager for engine lifecycle
385388
async with BrightDataClient() as client:
386389
# Scrape multiple URLs concurrently
387390
results = await client.scrape.generic.url_async([
@@ -391,11 +394,13 @@ async def scrape_multiple():
391394
])
392395

393396
for result in results:
394-
print(f"{result.url}: {result.success}")
397+
print(f"Success: {result.success}")
395398

396399
asyncio.run(scrape_multiple())
397400
```
398401

402+
**Important:** When using `*_async` methods, always use the async context manager (`async with BrightDataClient() as client`). Sync wrappers (methods without `_async`) handle this automatically.
403+
399404
---
400405

401406
## 🆕 What's New in v26.11.24
@@ -454,7 +459,7 @@ client.scrape.generic.url(url="...")
454459
client.search.linkedin.jobs(keyword="...", location="...")
455460
client.search.instagram.posts(url="...", num_of_posts=10)
456461
client.search.google(query="...")
457-
client.search.chatGPT(prompt="...")
462+
client.scrape.chatgpt.prompt(prompt="...")
458463

459464
# Direct service access (advanced)
460465
client.web_unlocker.fetch(url="...")
@@ -600,9 +605,9 @@ The SDK includes a powerful CLI tool:
600605
# Help
601606
brightdata --help
602607

603-
# Scrape Amazon product
608+
# Scrape Amazon product (URL is positional argument)
604609
brightdata scrape amazon products \
605-
--url "https://amazon.com/dp/B0CRMZHDG8" \
610+
"https://amazon.com/dp/B0CRMZHDG8" \
606611
--output-format json
607612

608613
# Search LinkedIn jobs
@@ -612,14 +617,14 @@ brightdata search linkedin jobs \
612617
--remote \
613618
--output-file jobs.json
614619

615-
# Search Google
620+
# Search Google (query is positional argument)
616621
brightdata search google \
617-
--query "python tutorial" \
622+
"python tutorial" \
618623
--location "United States"
619624

620-
# Generic web scraping
625+
# Generic web scraping (URL is positional argument)
621626
brightdata scrape generic \
622-
--url "https://example.com" \
627+
"https://example.com" \
623628
--output-format pretty
624629
```
625630

@@ -799,8 +804,7 @@ result = client.scrape.amazon.reviews(
799804
url="https://amazon.com/dp/B123",
800805
pastDays=7, # Last 7 days only
801806
keyWord="quality", # Filter by keyword
802-
numOfReviews=50, # Limit to 50 reviews
803-
sync=True
807+
numOfReviews=50 # Limit to 50 reviews
804808
)
805809

806810
# LinkedIn jobs with extensive filters
@@ -816,24 +820,31 @@ result = client.search.linkedin.jobs(
816820
)
817821
```
818822

819-
### Sync vs Async Modes
823+
### Sync vs Async Methods
820824

821825
```python
822-
# Sync mode (default) - immediate response
826+
# Sync wrapper - for simple scripts (blocks until complete)
823827
result = client.scrape.linkedin.profiles(
824828
url="https://linkedin.com/in/johndoe",
825-
sync=True, # Immediate response (faster but limited timeout)
826-
timeout=65 # Max 65 seconds
829+
timeout=300 # Max wait time in seconds
827830
)
828831

829-
# Async mode - polling for long operations
830-
result = client.scrape.linkedin.profiles(
831-
url="https://linkedin.com/in/johndoe",
832-
sync=False, # Trigger + poll (can wait longer)
833-
timeout=300 # Max 5 minutes
834-
)
832+
# Async method - for concurrent operations (requires async context)
833+
import asyncio
834+
835+
async def scrape_profiles():
836+
async with BrightDataClient() as client:
837+
result = await client.scrape.linkedin.profiles_async(
838+
url="https://linkedin.com/in/johndoe",
839+
timeout=300
840+
)
841+
return result
842+
843+
result = asyncio.run(scrape_profiles())
835844
```
836845

846+
**Note:** Sync wrappers (e.g., `profiles()`) internally use `asyncio.run()` and cannot be called from within an existing async context. Use `*_async` methods when you're already in an async function.
847+
837848
### SSL Certificate Error Handling
838849

839850
The SDK includes comprehensive SSL error handling with platform-specific guidance:
@@ -1100,11 +1111,10 @@ if client.test_connection_sync():
11001111
)
11011112

11021113
if product.success:
1103-
print(f"Product: {product.data['title']}")
1104-
print(f"Price: {product.data['price']}")
1105-
print(f"Rating: {product.data['rating']}")
1114+
print(f"Product: {product.data[0]['title']}")
1115+
print(f"Price: {product.data[0]['final_price']}")
1116+
print(f"Rating: {product.data[0]['rating']}")
11061117
print(f"Cost: ${product.cost:.4f}")
1107-
print(f"Method: {product.method}") # "web_scraper", "web_unlocker", etc.
11081118

11091119
# Search LinkedIn jobs
11101120
jobs = client.search.linkedin.jobs(
@@ -1113,25 +1123,28 @@ if client.test_connection_sync():
11131123
remote=True
11141124
)
11151125

1116-
print(f"Found {jobs.row_count} jobs")
1126+
if jobs.success:
1127+
print(f"Found {len(jobs.data)} jobs")
11171128

11181129
# Scrape Facebook posts
11191130
fb_posts = client.scrape.facebook.posts_by_profile(
1120-
url="https://facebook.com/profile",
1131+
url="https://facebook.com/zuck",
11211132
num_of_posts=10,
11221133
timeout=240
11231134
)
11241135

1125-
print(f"Scraped {len(fb_posts.data)} Facebook posts")
1136+
if fb_posts.success:
1137+
print(f"Scraped {len(fb_posts.data)} Facebook posts")
11261138

11271139
# Scrape Instagram profile
11281140
ig_profile = client.scrape.instagram.profiles(
1129-
url="https://instagram.com/username",
1141+
url="https://instagram.com/instagram",
11301142
timeout=240
11311143
)
11321144

1133-
print(f"Profile: {ig_profile.data['username']}")
1134-
print(f"Followers: {ig_profile.data['followers']}")
1145+
if ig_profile.success:
1146+
print(f"Profile: {ig_profile.data[0]['username']}")
1147+
print(f"Followers: {ig_profile.data[0]['followers_count']}")
11351148

11361149
# Search Google
11371150
search_results = client.search.google(
@@ -1140,8 +1153,9 @@ if client.test_connection_sync():
11401153
num_results=10
11411154
)
11421155

1143-
for i, item in enumerate(search_results.data, 1):
1144-
print(f"{i}. {item['title']}")
1156+
if search_results.success:
1157+
for i, item in enumerate(search_results.data[:5], 1):
1158+
print(f"{i}. {item.get('title', 'N/A')}")
11451159
```
11461160

11471161
### Interactive CLI Demo

src/brightdata/scrapers/chatgpt/scraper.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,11 @@ async def prompt_trigger_async(
157157
dataset_id=self.DATASET_ID
158158
)
159159

160-
sdk_function = get_caller_function_name()
161-
162160
return ScrapeJob(
163161
snapshot_id=snapshot_id,
164-
scraper=self,
165-
dataset_id=self.DATASET_ID,
166-
sdk_function=sdk_function or "prompt_trigger"
162+
api_client=self.api_client,
163+
platform_name=self.PLATFORM_NAME,
164+
cost_per_record=self.COST_PER_RECORD,
167165
)
168166

169167
def prompt_trigger(
@@ -307,13 +305,11 @@ async def prompts_trigger_async(
307305
dataset_id=self.DATASET_ID
308306
)
309307

310-
sdk_function = get_caller_function_name()
311-
312308
return ScrapeJob(
313309
snapshot_id=snapshot_id,
314-
scraper=self,
315-
dataset_id=self.DATASET_ID,
316-
sdk_function=sdk_function or "prompts_trigger"
310+
api_client=self.api_client,
311+
platform_name=self.PLATFORM_NAME,
312+
cost_per_record=self.COST_PER_RECORD,
317313
)
318314

319315
def prompts_trigger(

src/brightdata/scrapers/facebook/scraper.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ async def posts_by_profile_trigger_async(
165165

166166
return ScrapeJob(
167167
snapshot_id=snapshot_id,
168-
scraper=self,
169-
dataset_id=self.DATASET_ID_POSTS_PROFILE,
170-
sdk_function=sdk_function or "posts_by_profile_trigger"
168+
api_client=self.api_client,
169+
platform_name=self.PLATFORM_NAME,
170+
cost_per_record=self.COST_PER_RECORD,
171171
)
172172

173173
def posts_by_profile_trigger(self, url: Union[str, List[str]], **kwargs) -> "ScrapeJob":
@@ -266,7 +266,12 @@ async def posts_by_group_trigger_async(self, url: Union[str, List[str]], **kwarg
266266
url_list = [url] if isinstance(url, str) else url
267267
payload = [{"url": u, **{k: v for k, v in kwargs.items() if v is not None}} for u in url_list]
268268
snapshot_id = await self.api_client.trigger(payload=payload, dataset_id=self.DATASET_ID_POSTS_GROUP)
269-
return ScrapeJob(snapshot_id=snapshot_id, scraper=self, dataset_id=self.DATASET_ID_POSTS_GROUP, sdk_function=sdk_function or "posts_by_group_trigger")
269+
return ScrapeJob(
270+
snapshot_id=snapshot_id,
271+
api_client=self.api_client,
272+
platform_name=self.PLATFORM_NAME,
273+
cost_per_record=self.COST_PER_RECORD,
274+
)
270275

271276
def posts_by_group_trigger(self, url: Union[str, List[str]], **kwargs) -> "ScrapeJob":
272277
"""Trigger Facebook posts by group scrape (sync wrapper)."""
@@ -442,7 +447,12 @@ async def comments_trigger_async(self, url: Union[str, List[str]], **kwargs) ->
442447
url_list = [url] if isinstance(url, str) else url
443448
payload = [{"url": u, **{k: v for k, v in kwargs.items() if v is not None}} for u in url_list]
444449
snapshot_id = await self.api_client.trigger(payload=payload, dataset_id=self.DATASET_ID_COMMENTS)
445-
return ScrapeJob(snapshot_id=snapshot_id, scraper=self, dataset_id=self.DATASET_ID_COMMENTS, sdk_function=sdk_function or "comments_trigger")
450+
return ScrapeJob(
451+
snapshot_id=snapshot_id,
452+
api_client=self.api_client,
453+
platform_name=self.PLATFORM_NAME,
454+
cost_per_record=self.COST_PER_RECORD,
455+
)
446456

447457
def comments_trigger(self, url: Union[str, List[str]], **kwargs) -> "ScrapeJob":
448458
"""Trigger Facebook comments scrape (sync wrapper)."""
@@ -540,7 +550,12 @@ async def reels_trigger_async(self, url: Union[str, List[str]], **kwargs) -> "Sc
540550
url_list = [url] if isinstance(url, str) else url
541551
payload = [{"url": u, **{k: v for k, v in kwargs.items() if v is not None}} for u in url_list]
542552
snapshot_id = await self.api_client.trigger(payload=payload, dataset_id=self.DATASET_ID_REELS)
543-
return ScrapeJob(snapshot_id=snapshot_id, scraper=self, dataset_id=self.DATASET_ID_REELS, sdk_function=sdk_function or "reels_trigger")
553+
return ScrapeJob(
554+
snapshot_id=snapshot_id,
555+
api_client=self.api_client,
556+
platform_name=self.PLATFORM_NAME,
557+
cost_per_record=self.COST_PER_RECORD,
558+
)
544559

545560
def reels_trigger(self, url: Union[str, List[str]], **kwargs) -> "ScrapeJob":
546561
"""Trigger Facebook reels scrape (sync wrapper)."""

0 commit comments

Comments
 (0)