Skip to content

Commit 129fef4

Browse files
committed
Linkedin search improvements
1 parent b048c2e commit 129fef4

File tree

1 file changed

+133
-24
lines changed

1 file changed

+133
-24
lines changed

src/brightdata/scrapers/linkedin/search.py

Lines changed: 133 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -268,36 +268,32 @@ async def jobs_async(
268268
companies = self._normalize_param(company, batch_size)
269269
location_radii = self._normalize_param(locationRadius, batch_size)
270270

271-
# Build payload
271+
# Build payload - LinkedIn API requires URLs, not search parameters
272+
# If keyword/location provided, build LinkedIn job search URL internally
272273
payload = []
273274
for i in range(batch_size):
274-
item: Dict[str, Any] = {}
275-
275+
# If URL provided directly, use it
276276
if urls and i < len(urls):
277-
item["url"] = urls[i]
278-
if locations and i < len(locations):
279-
item["location"] = locations[i]
280-
if keywords and i < len(keywords):
281-
item["keyword"] = keywords[i]
282-
if countries and i < len(countries):
283-
item["country"] = countries[i]
284-
if time_ranges and i < len(time_ranges):
285-
item["timeRange"] = time_ranges[i]
286-
if job_types and i < len(job_types):
287-
item["jobType"] = job_types[i]
288-
if experience_levels and i < len(experience_levels):
289-
item["experienceLevel"] = experience_levels[i]
290-
if remote is not None:
291-
item["remote"] = remote
292-
if companies and i < len(companies):
293-
item["company"] = companies[i]
294-
if location_radii and i < len(location_radii):
295-
item["locationRadius"] = location_radii[i]
277+
item = {"url": urls[i]}
278+
else:
279+
# Build LinkedIn job search URL from parameters
280+
search_url = self._build_linkedin_jobs_search_url(
281+
keyword=keywords[i] if keywords and i < len(keywords) else None,
282+
location=locations[i] if locations and i < len(locations) else None,
283+
country=countries[i] if countries and i < len(countries) else None,
284+
time_range=time_ranges[i] if time_ranges and i < len(time_ranges) else None,
285+
job_type=job_types[i] if job_types and i < len(job_types) else None,
286+
experience_level=experience_levels[i] if experience_levels and i < len(experience_levels) else None,
287+
remote=remote,
288+
company=companies[i] if companies and i < len(companies) else None,
289+
location_radius=location_radii[i] if location_radii and i < len(location_radii) else None,
290+
)
291+
item = {"url": search_url}
296292

297293
payload.append(item)
298294

299-
# Use discovery dataset if searching by keyword/location, otherwise URL-based
300-
dataset_id = self.DATASET_ID_JOBS_DISCOVERY if (keyword or location) else self.DATASET_ID_JOBS
295+
# Always use URL-based dataset (discovery dataset doesn't support parameters)
296+
dataset_id = self.DATASET_ID_JOBS
301297

302298
return await self._execute_search(
303299
payload=payload,
@@ -376,6 +372,119 @@ def _normalize_param(
376372

377373
return param
378374

375+
def _build_linkedin_jobs_search_url(
376+
self,
377+
keyword: Optional[str] = None,
378+
location: Optional[str] = None,
379+
country: Optional[str] = None,
380+
time_range: Optional[str] = None,
381+
job_type: Optional[str] = None,
382+
experience_level: Optional[str] = None,
383+
remote: Optional[bool] = None,
384+
company: Optional[str] = None,
385+
location_radius: Optional[str] = None,
386+
) -> str:
387+
"""
388+
Build LinkedIn job search URL from parameters.
389+
390+
LinkedIn API requires URLs, not raw search parameters.
391+
This method constructs a valid LinkedIn job search URL from the provided filters.
392+
393+
Args:
394+
keyword: Job keyword/title
395+
location: Location name
396+
country: Country code
397+
time_range: Time range filter
398+
job_type: Job type filter
399+
experience_level: Experience level filter
400+
remote: Remote jobs only
401+
company: Company name filter
402+
location_radius: Location radius filter
403+
404+
Returns:
405+
LinkedIn job search URL
406+
407+
Example:
408+
>>> _build_linkedin_jobs_search_url(
409+
... keyword="python developer",
410+
... location="New York",
411+
... remote=True
412+
... )
413+
'https://www.linkedin.com/jobs/search/?keywords=python%20developer&location=New%20York&f_WT=2'
414+
"""
415+
from urllib.parse import urlencode, quote_plus
416+
417+
base_url = "https://www.linkedin.com/jobs/search/"
418+
params = {}
419+
420+
# Keywords
421+
if keyword:
422+
params["keywords"] = keyword
423+
424+
# Location
425+
if location:
426+
params["location"] = location
427+
428+
# Remote work type (f_WT: 1=on-site, 2=remote, 3=hybrid)
429+
if remote:
430+
params["f_WT"] = "2"
431+
432+
# Experience level (f_E: 1=internship, 2=entry, 3=associate, 4=mid-senior, 5=director, 6=executive)
433+
if experience_level:
434+
level_map = {
435+
"internship": "1",
436+
"entry": "2",
437+
"associate": "3",
438+
"mid": "4",
439+
"mid-senior": "4",
440+
"senior": "4",
441+
"director": "5",
442+
"executive": "6"
443+
}
444+
if experience_level.lower() in level_map:
445+
params["f_E"] = level_map[experience_level.lower()]
446+
447+
# Job type (f_JT: F=full-time, P=part-time, C=contract, T=temporary, I=internship, V=volunteer, O=other)
448+
if job_type:
449+
type_map = {
450+
"full-time": "F",
451+
"full time": "F",
452+
"part-time": "P",
453+
"part time": "P",
454+
"contract": "C",
455+
"temporary": "T",
456+
"internship": "I",
457+
"volunteer": "V"
458+
}
459+
if job_type.lower() in type_map:
460+
params["f_JT"] = type_map[job_type.lower()]
461+
462+
# Time range (f_TPR: r86400=past 24h, r604800=past week, r2592000=past month)
463+
if time_range:
464+
time_map = {
465+
"day": "r86400",
466+
"past-day": "r86400",
467+
"24h": "r86400",
468+
"week": "r604800",
469+
"past-week": "r604800",
470+
"month": "r2592000",
471+
"past-month": "r2592000"
472+
}
473+
if time_range.lower() in time_map:
474+
params["f_TPR"] = time_map[time_range.lower()]
475+
476+
# Company (f_C)
477+
if company:
478+
params["f_C"] = company
479+
480+
# Build URL
481+
if params:
482+
url = f"{base_url}?{urlencode(params)}"
483+
else:
484+
url = base_url
485+
486+
return url
487+
379488
async def _execute_search(
380489
self,
381490
payload: List[Dict[str, Any]],

0 commit comments

Comments
 (0)