Skip to content

Commit 8711838

Browse files
committed
fix: support parsing websites and instant_messengers
1 parent 4552909 commit 8711838

1 file changed

Lines changed: 35 additions & 8 deletions

File tree

linkedindumper.py

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
# You may store your session cookie here persistently
1717
li_at = "YOUR-COOKIE-VALUE"
1818

19+
# Proxies for dev
20+
proxies = {
21+
"http": "http://127.0.0.1:8080",
22+
"https": "http://127.0.0.1:8080",
23+
}
24+
1925
# Converting German umlauts
2026
special_char_map = {ord('ä'):'ae', ord('ü'):'ue', ord('ö'):'oe', ord('ß'):'ss'}
2127

@@ -117,7 +123,7 @@ def parse_employee_results(results):
117123

118124
if args.include_private_profiles or (firstname != "LinkedIn" and lastname != "Member"):
119125

120-
if args.include_contact_infos and profile_link != "N/A":
126+
if args.include_contact_infos and profile_link.startswith("https://www.linkedin.com/in/"):
121127
username = profile_link.rstrip("/").split("/")[-1]
122128
full_details = get_employee_contact_infos(username)
123129

@@ -137,26 +143,26 @@ def parse_employee_results(results):
137143
"position": position,
138144
"gender": gender,
139145
"location": location,
140-
"profile_link": profile_link
146+
"profile_link": "N/A",
147+
"contact_info": {}
141148
})
142149

143-
144150
return employee_dict
145151

146152
def get_company_id(company):
147153
company_encoded = urllib.parse.quote(company)
148154
api1 = f"https://www.linkedin.com/voyager/api/voyagerOrganizationDashCompanies?decorationId=com.linkedin.voyager.dash.deco.organization.MiniCompany-10&q=universalName&universalName={company_encoded}"
149-
r = requests.get(api1, headers=headers, cookies=cookies_dict, timeout=200)
155+
r = requests.get(api1, headers=headers, cookies=cookies_dict, timeout=200)#, proxies=proxies, verify=False)
150156
return r.json()["elements"][0]["entityUrn"].split(":")[-1]
151157

152158
def get_employee_data(company_id, start, count=10):
153159
api2 = f"https://www.linkedin.com/voyager/api/search/dash/clusters?decorationId=com.linkedin.voyager.dash.deco.search.SearchClusterCollection-165&origin=COMPANY_PAGE_CANNED_SEARCH&q=all&query=(flagshipSearchIntent:SEARCH_SRP,queryParameters:(currentCompany:List({company_id}),resultType:List(PEOPLE)),includeFiltersInResponse:false)&count={count}&start={start}"
154-
r = requests.get(api2, headers=headers, cookies=cookies_dict, timeout=200)
160+
r = requests.get(api2, headers=headers, cookies=cookies_dict, timeout=200)#, proxies=proxies, verify=False)
155161
return r.json()
156162

157163
def get_employee_contact_infos(username):
158164
api3 = f"https://www.linkedin.com/voyager/api/graphql?includeWebMetadata=true&variables=(memberIdentity:{username})&queryId=voyagerIdentityDashProfiles.c7452e58fa37646d09dae4920fc5b4b9"
159-
r = requests.get(api3, headers=headers, cookies=cookies_dict, timeout=200)
165+
r = requests.get(api3, headers=headers, cookies=cookies_dict, timeout=200)#, proxies=proxies, verify=False)
160166

161167
try:
162168
data = r.json()
@@ -166,6 +172,8 @@ def get_employee_contact_infos(username):
166172

167173
# Init default values
168174
full_name = email = birthdate = address = phone = None
175+
websites = []
176+
instant_messengers = []
169177

170178
elements = data.get("data", {}) \
171179
.get("identityDashProfilesByMemberIdentity", {}) \
@@ -202,13 +210,29 @@ def get_employee_contact_infos(username):
202210
if isinstance(phone_obj, dict):
203211
phone = phone_obj.get("number")
204212

213+
# Websites
214+
websites_data = profile.get("websites", [])
215+
for site in websites_data:
216+
url = site.get("url")
217+
if url:
218+
websites.append(url)
219+
220+
ims = profile.get("instantMessengers", [])
221+
for im in ims:
222+
provider = im.get("provider")
223+
username = im.get("id")
224+
if provider and username:
225+
instant_messengers.append(f"{provider}:{username}")
226+
205227
return {
206228
"firstname": first,
207229
"lastname": last,
208230
"email": email,
209231
"birthdate": birthdate,
210232
"address": address,
211-
"phone": phone
233+
"phone": phone,
234+
"websites": websites,
235+
"instant_messengers": instant_messengers
212236
}
213237

214238
def month_to_string(month):
@@ -279,7 +303,10 @@ def main():
279303
for person in employee_dict:
280304
firstname_clean = person["firstname"].replace(".", "").lower().translate(special_char_map)
281305
lastname_clean = person["lastname"].replace(".", "").lower().translate(special_char_map)
282-
person["email"] = mailformat.format(firstname_clean, lastname_clean)
306+
if firstname_clean == "linkedin" and lastname_clean == "member":
307+
person["email"] = "N/A"
308+
else:
309+
person["email"] = mailformat.format(firstname_clean, lastname_clean)
283310

284311
if mailformat:
285312
legend = "Firstname;Lastname;Email;Position;Gender;Location;Profile"

0 commit comments

Comments
 (0)