Skip to content

Commit 8a206c4

Browse files
committed
Fixed SERP pass this param to url &brd_json=1
1 parent 7c6a549 commit 8a206c4

File tree

7 files changed

+683
-24
lines changed

7 files changed

+683
-24
lines changed

src/brightdata/api/serp/base.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import asyncio
44
import aiohttp
5+
import json
56
from typing import Union, List, Optional, Dict, Any
67
from datetime import datetime, timezone
78

@@ -134,7 +135,7 @@ async def _search_single_async(
134135
payload = {
135136
"zone": zone,
136137
"url": search_url,
137-
"format": "json",
138+
"format": "raw",
138139
"method": "GET",
139140
}
140141

@@ -149,9 +150,15 @@ async def _make_request():
149150
timeout=aiohttp.ClientTimeout(total=self.timeout)
150151
) as response:
151152
data_fetched_at = datetime.now(timezone.utc)
152-
153+
153154
if response.status == 200:
154-
data = await response.json()
155+
# With brd_json=1, response is JSON text (not wrapped in status_code/body)
156+
text = await response.text()
157+
try:
158+
data = json.loads(text)
159+
except json.JSONDecodeError:
160+
# Fallback to regular JSON response
161+
data = await response.json()
155162
normalized_data = self.data_normalizer.normalize(data)
156163

157164
return SearchResult(

src/brightdata/api/serp/data_normalizer.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,51 +16,59 @@ def normalize(self, data: Any) -> NormalizedSERPData:
1616

1717
class GoogleDataNormalizer(BaseDataNormalizer):
1818
"""Data normalizer for Google SERP responses."""
19-
19+
2020
def normalize(self, data: Any) -> NormalizedSERPData:
2121
"""Normalize Google SERP data."""
2222
if not isinstance(data, (dict, str)):
2323
return {"results": []}
24-
24+
2525
if isinstance(data, str):
2626
return {
2727
"results": [],
2828
"raw_html": data,
2929
}
30-
30+
31+
# Handle raw HTML response (body field)
32+
if "body" in data and isinstance(data.get("body"), str):
33+
return {
34+
"results": [],
35+
"raw_html": data["body"],
36+
"status_code": data.get("status_code"),
37+
}
38+
3139
results = []
3240
organic = data.get("organic", [])
33-
41+
3442
for i, item in enumerate(organic, 1):
3543
results.append({
36-
"position": i,
44+
"position": item.get("rank", i),
3745
"title": item.get("title", ""),
38-
"url": item.get("url", ""),
46+
"url": item.get("link", item.get("url", "")),
3947
"description": item.get("description", ""),
40-
"displayed_url": item.get("displayed_url", ""),
48+
"displayed_url": item.get("display_link", item.get("displayed_url", "")),
4149
})
42-
50+
4351
normalized: NormalizedSERPData = {
4452
"results": results,
4553
"total_results": data.get("total_results"),
4654
"search_info": data.get("search_information", {}),
4755
}
48-
56+
4957
if "featured_snippet" in data:
5058
normalized["featured_snippet"] = data["featured_snippet"]
51-
59+
5260
if "knowledge_panel" in data:
5361
normalized["knowledge_panel"] = data["knowledge_panel"]
54-
62+
5563
if "people_also_ask" in data:
5664
normalized["people_also_ask"] = data["people_also_ask"]
57-
65+
5866
if "related_searches" in data:
5967
normalized["related_searches"] = data["related_searches"]
60-
68+
6169
if "ads" in data:
6270
normalized["ads"] = data["ads"]
63-
71+
6472
return normalized
6573

6674

src/brightdata/api/serp/url_builder.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,30 +35,33 @@ def build(
3535
num_results: int = 10,
3636
**kwargs
3737
) -> str:
38-
"""Build Google search URL."""
38+
"""Build Google search URL with Bright Data parsing enabled."""
3939
encoded_query = quote_plus(query)
4040
url = f"https://www.google.com/search?q={encoded_query}"
4141
url += f"&num={num_results}"
42-
42+
43+
# Enable Bright Data SERP parsing
44+
url += "&brd_json=1"
45+
4346
if language:
4447
url += f"&hl={language}"
45-
48+
4649
if location:
4750
location_code = LocationService.parse_location(
4851
location, LocationFormat.GOOGLE
4952
)
5053
if location_code:
5154
url += f"&gl={location_code}"
52-
55+
5356
if device == "mobile":
5457
url += "&mobileaction=1"
55-
58+
5659
if "safe_search" in kwargs:
5760
url += f"&safe={'active' if kwargs['safe_search'] else 'off'}"
58-
61+
5962
if "time_range" in kwargs:
6063
url += f"&tbs=qdr:{kwargs['time_range']}"
61-
64+
6265
return url
6366

6467

tests/enes/amazon.py

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#!/usr/bin/env python3
2+
"""Test Amazon scraper to verify API fetches data correctly.
3+
4+
How to run manually:
5+
python tests/enes/amazon.py
6+
"""
7+
8+
import sys
9+
import asyncio
10+
from pathlib import Path
11+
12+
sys.path.insert(0, str(Path(__file__).parent.parent.parent / "src"))
13+
14+
from brightdata import BrightDataClient
15+
16+
17+
async def test_amazon_products():
18+
"""Test Amazon product scraping."""
19+
20+
print("=" * 60)
21+
print("AMAZON SCRAPER TEST - Products")
22+
print("=" * 60)
23+
24+
client = BrightDataClient()
25+
26+
async with client.engine:
27+
print("\n🛒 Testing Amazon product scraping...")
28+
print("📍 Product URL: https://www.amazon.com/dp/B0CRMZHDG8")
29+
30+
try:
31+
result = await client.scrape.amazon.products_async(
32+
url="https://www.amazon.com/dp/B0CRMZHDG8",
33+
timeout=240
34+
)
35+
36+
print(f"\n✅ API call succeeded")
37+
print(f"⏱️ Elapsed: {result.elapsed_ms():.2f}ms" if result.elapsed_ms() else "")
38+
39+
print(f"\n📊 Result analysis:")
40+
print(f" - result.success: {result.success}")
41+
print(f" - result.data type: {type(result.data)}")
42+
43+
if result.data:
44+
print(f"\n✅ Got product data:")
45+
if isinstance(result.data, dict):
46+
print(f" - Title: {result.data.get('title', 'N/A')}")
47+
print(f" - Price: {result.data.get('price', 'N/A')}")
48+
print(f" - ASIN: {result.data.get('asin', 'N/A')}")
49+
print(f" - Rating: {result.data.get('rating', 'N/A')}")
50+
print(f" - Review Count: {result.data.get('reviews_count', 'N/A')}")
51+
else:
52+
print(f" Data: {result.data}")
53+
else:
54+
print(f"\n❌ No product data returned")
55+
56+
except Exception as e:
57+
print(f"\n❌ Error: {e}")
58+
import traceback
59+
traceback.print_exc()
60+
61+
62+
async def test_amazon_reviews():
63+
"""Test Amazon reviews scraping."""
64+
65+
print("\n\n" + "=" * 60)
66+
print("AMAZON SCRAPER TEST - Reviews")
67+
print("=" * 60)
68+
69+
client = BrightDataClient()
70+
71+
async with client.engine:
72+
print("\n📝 Testing Amazon reviews scraping...")
73+
print("📍 Product URL: https://www.amazon.com/dp/B0CRMZHDG8")
74+
print("📋 Parameters: pastDays=30, numOfReviews=10")
75+
76+
try:
77+
result = await client.scrape.amazon.reviews_async(
78+
url="https://www.amazon.com/dp/B0CRMZHDG8",
79+
pastDays=30,
80+
numOfReviews=10,
81+
timeout=240
82+
)
83+
84+
print(f"\n✅ API call succeeded")
85+
print(f"⏱️ Elapsed: {result.elapsed_ms():.2f}ms" if result.elapsed_ms() else "")
86+
87+
print(f"\n📊 Result analysis:")
88+
print(f" - result.success: {result.success}")
89+
print(f" - result.data type: {type(result.data)}")
90+
91+
if result.data:
92+
if isinstance(result.data, list):
93+
print(f"\n✅ Got {len(result.data)} reviews:")
94+
for i, review in enumerate(result.data[:3], 1):
95+
print(f"\n Review {i}:")
96+
print(f" - Rating: {review.get('rating', 'N/A')}")
97+
print(f" - Title: {review.get('title', 'N/A')[:60]}...")
98+
print(f" - Author: {review.get('author', 'N/A')}")
99+
elif isinstance(result.data, dict):
100+
reviews = result.data.get('reviews', [])
101+
print(f"\n✅ Got {len(reviews)} reviews")
102+
else:
103+
print(f" Data: {result.data}")
104+
else:
105+
print(f"\n❌ No reviews data returned")
106+
107+
except Exception as e:
108+
print(f"\n❌ Error: {e}")
109+
import traceback
110+
traceback.print_exc()
111+
112+
113+
if __name__ == "__main__":
114+
print("\n🚀 Starting Amazon Scraper Tests\n")
115+
asyncio.run(test_amazon_products())
116+
asyncio.run(test_amazon_reviews())
117+
print("\n" + "=" * 60)
118+
print("✅ Amazon tests completed")
119+
print("=" * 60)

0 commit comments

Comments
 (0)