Skip to content

Commit 9977c1f

Browse files
fix cookie auth
1 parent 910bc4d commit 9977c1f

File tree

9 files changed

+368
-219
lines changed

9 files changed

+368
-219
lines changed

src/linkedin_spider/cli/main.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ def search(
3131
Parameter(name=["-o", "--output"], help="Output file path (.json or .csv format)"),
3232
] = None,
3333
headless: bool | None = None,
34+
user_agent: Annotated[
35+
str | None, Parameter(help="Custom user agent string for requests")
36+
] = None,
3437
email: Annotated[str | None, Parameter(help="LinkedIn email for authentication")] = None,
3538
password: Annotated[str | None, Parameter(help="LinkedIn password for authentication")] = None,
3639
cookie: Annotated[
@@ -41,12 +44,14 @@ def search(
4144
try:
4245
config = _create_config(headless)
4346
credentials = _get_credentials(email, password, cookie)
47+
custom_user_agent = _get_user_agent(user_agent)
4448

4549
scraper = LinkedinSpider(
4650
email=credentials.get("email"),
4751
password=credentials.get("password"),
4852
li_at_cookie=credentials.get("cookie"),
4953
config=config,
54+
user_agent=custom_user_agent,
5055
)
5156

5257
results = scraper.search_profiles(query, max_results)
@@ -73,6 +78,9 @@ def profile(
7378
Parameter(name=["-o", "--output"], help="Output file path (.json or .csv format)"),
7479
] = None,
7580
headless: bool | None = None,
81+
user_agent: Annotated[
82+
str | None, Parameter(help="Custom user agent string for requests")
83+
] = None,
7684
email: Annotated[str | None, Parameter(help="LinkedIn email for authentication")] = None,
7785
password: Annotated[str | None, Parameter(help="LinkedIn password for authentication")] = None,
7886
cookie: Annotated[
@@ -83,12 +91,14 @@ def profile(
8391
try:
8492
config = _create_config(headless)
8593
credentials = _get_credentials(email, password, cookie)
94+
custom_user_agent = _get_user_agent(user_agent)
8695

8796
scraper = LinkedinSpider(
8897
email=credentials.get("email"),
8998
password=credentials.get("password"),
9099
li_at_cookie=credentials.get("cookie"),
91100
config=config,
101+
user_agent=custom_user_agent,
92102
)
93103

94104
result = scraper.scrape_profile(url)
@@ -119,6 +129,9 @@ def company(
119129
Parameter(name=["-o", "--output"], help="Output file path (.json or .csv format)"),
120130
] = None,
121131
headless: bool | None = None,
132+
user_agent: Annotated[
133+
str | None, Parameter(help="Custom user agent string for requests")
134+
] = None,
122135
email: Annotated[str | None, Parameter(help="LinkedIn email for authentication")] = None,
123136
password: Annotated[str | None, Parameter(help="LinkedIn password for authentication")] = None,
124137
cookie: Annotated[
@@ -129,12 +142,14 @@ def company(
129142
try:
130143
config = _create_config(headless)
131144
credentials = _get_credentials(email, password, cookie)
145+
custom_user_agent = _get_user_agent(user_agent)
132146

133147
scraper = LinkedinSpider(
134148
email=credentials.get("email"),
135149
password=credentials.get("password"),
136150
li_at_cookie=credentials.get("cookie"),
137151
config=config,
152+
user_agent=custom_user_agent,
138153
)
139154

140155
result = scraper.scrape_company(url)
@@ -167,6 +182,9 @@ def connections(
167182
Parameter(name=["-o", "--output"], help="Output file path (.json or .csv format)"),
168183
] = None,
169184
headless: bool | None = None,
185+
user_agent: Annotated[
186+
str | None, Parameter(help="Custom user agent string for requests")
187+
] = None,
170188
email: Annotated[str | None, Parameter(help="LinkedIn email for authentication")] = None,
171189
password: Annotated[str | None, Parameter(help="LinkedIn password for authentication")] = None,
172190
cookie: Annotated[
@@ -177,12 +195,14 @@ def connections(
177195
try:
178196
config = _create_config(headless)
179197
credentials = _get_credentials(email, password, cookie)
198+
custom_user_agent = _get_user_agent(user_agent)
180199

181200
scraper = LinkedinSpider(
182201
email=credentials.get("email"),
183202
password=credentials.get("password"),
184203
li_at_cookie=credentials.get("cookie"),
185204
config=config,
205+
user_agent=custom_user_agent,
186206
)
187207

188208
results = scraper.scrape_incoming_connections(max_results)
@@ -231,6 +251,9 @@ def search_posts(
231251
Parameter(name=["-o", "--output"], help="Output file path (.json or .csv format)"),
232252
] = None,
233253
headless: bool | None = None,
254+
user_agent: Annotated[
255+
str | None, Parameter(help="Custom user agent string for requests")
256+
] = None,
234257
email: Annotated[str | None, Parameter(help="LinkedIn email for authentication")] = None,
235258
password: Annotated[str | None, Parameter(help="LinkedIn password for authentication")] = None,
236259
cookie: Annotated[
@@ -241,12 +264,14 @@ def search_posts(
241264
try:
242265
config = _create_config(headless)
243266
credentials = _get_credentials(email, password, cookie)
267+
custom_user_agent = _get_user_agent(user_agent)
244268

245269
scraper = LinkedinSpider(
246270
email=credentials.get("email"),
247271
password=credentials.get("password"),
248272
li_at_cookie=credentials.get("cookie"),
249273
config=config,
274+
user_agent=custom_user_agent,
250275
)
251276

252277
print(f"Searching for posts with keywords: '{keywords}'")
@@ -285,20 +310,25 @@ def _create_config(headless: bool | None) -> ScraperConfig:
285310
return ScraperConfig(headless=headless)
286311

287312

313+
def _get_user_agent(user_agent: str | None) -> str | None:
314+
"""Get user agent from argument or environment variable."""
315+
return user_agent or os.getenv("USER_AGENT")
316+
317+
288318
def _get_credentials(email: str | None, password: str | None, cookie: str | None) -> dict:
289319
"""Get authentication credentials from arguments or environment."""
290320
credentials = {
291321
"email": email or os.getenv("LINKEDIN_EMAIL"),
292322
"password": password or os.getenv("LINKEDIN_PASSWORD"),
293-
"cookie": cookie or os.getenv("LINKEDIN_COOKIE"),
323+
"cookie": cookie or os.getenv("cookie"),
294324
}
295325

296326
if not any(credentials.values()):
297327
raise ValueError(
298328
"Authentication required. Provide either:\n"
299329
"1. Email and password (--email, --password)\n"
300330
"2. LinkedIn cookie (--cookie)\n"
301-
"3. Set environment variables: LINKEDIN_EMAIL, LINKEDIN_PASSWORD, or LINKEDIN_COOKIE"
331+
"3. Set environment variables: LINKEDIN_EMAIL, LINKEDIN_PASSWORD, or cookie"
302332
)
303333

304334
return credentials

src/linkedin_spider/core/auth.py

Lines changed: 81 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import os
23

34
from selenium.common.exceptions import NoSuchElementException, TimeoutException
@@ -7,6 +8,8 @@
78
from linkedin_spider.core.driver import DriverManager
89
from linkedin_spider.utils.human_behavior import HumanBehavior
910

11+
logger = logging.getLogger(__name__)
12+
1013

1114
class AuthManager:
1215
"""Handles LinkedIn authentication via cookies or credentials."""
@@ -28,36 +31,59 @@ def __init__(
2831
self.li_at_cookie = li_at_cookie
2932

3033
def authenticate(self) -> bool:
31-
"""Authenticate using available methods in priority order."""
32-
if self._is_authenticated():
33-
return True
34+
"""
35+
Authenticate using available methods with smart fallback priority.
3436
35-
login_with_cred = os.getenv("LOGIN_WITH_CRED", "true").lower() == "true"
37+
Priority order:
38+
1. Try li_at_cookie from parameter (if provided)
39+
2. Check if already authenticated
40+
3. Try credentials from parameters (if provided)
41+
4. Raise error if no authentication method available
3642
37-
if login_with_cred:
38-
if self.email and self.password and self._login_with_credentials():
39-
self.driver_manager.save_cookies()
40-
return True
43+
Returns:
44+
bool: True if authentication succeeded
4145
42-
if self.li_at_cookie and self._authenticate_with_cookie():
43-
self.driver_manager.save_cookies()
44-
return True
46+
Raises:
47+
Exception: If all authentication methods fail or no credentials provided
48+
"""
4549

46-
if self._try_saved_cookies():
47-
return True
48-
else:
49-
if self._try_saved_cookies():
50-
return True
50+
# Priority 1: If cookie parameter is explicitly provided, use it
5151

52-
if self.li_at_cookie and self._authenticate_with_cookie():
52+
if self.li_at_cookie:
53+
logger.info("Using LinkedIn cookie from parameter")
54+
if self._authenticate_with_cookie(self.li_at_cookie):
5355
self.driver_manager.save_cookies()
5456
return True
57+
else:
58+
logger.error("Cookie from parameter failed")
59+
raise Exception("Provided li_at cookie is invalid or expired")
60+
61+
# Priority 2: Check if already authenticated with saved cookies
62+
logger.info("Checking for saved cookies in profile directory...")
63+
if self._try_saved_cookies():
64+
logger.info("Successfully authenticated using saved cookies")
65+
return True
66+
else:
67+
logger.info("No valid saved cookies found or authentication failed")
5568

56-
if self.email and self.password and self._login_with_credentials():
69+
# Priority 3: Try credentials from parameters if provided
70+
if self.email and self.password:
71+
logger.info("Using LinkedIn credentials from parameter")
72+
if self._login_with_credentials(self.email, self.password):
5773
self.driver_manager.save_cookies()
5874
return True
59-
60-
raise Exception("All authentication methods failed")
75+
else:
76+
logger.error("Login with provided credentials failed")
77+
raise Exception("Login failed with provided credentials")
78+
79+
# No authentication method available
80+
logger.error("No valid authentication method found")
81+
raise Exception(
82+
"Authentication required. Saved cookies not found or invalid. Please provide either:\n"
83+
" - li_at cookie (--cookie parameter)\n"
84+
" - Email and password (--email and --password parameters)\n"
85+
" - Set environment variables: LINKEDIN_EMAIL, LINKEDIN_PASSWORD, or cookie"
86+
)
6187

6288
def _is_authenticated(self) -> bool:
6389
"""Check if already authenticated by examining current page."""
@@ -113,44 +139,53 @@ def _try_saved_cookies(self) -> bool:
113139

114140
return self._is_authenticated()
115141

116-
def _authenticate_with_cookie(self) -> bool:
117-
"""Authenticate using li_at cookie."""
118-
try:
119-
self.driver.get("https://www.linkedin.com")
120-
self.human_behavior.delay()
142+
def _authenticate_with_cookie(self, cookie: str) -> bool:
143+
"""
144+
Authenticate using li_at cookie with improved validation.
121145
122-
self.driver.add_cookie(
123-
{
124-
"name": "li_at",
125-
"value": self.li_at_cookie,
126-
"domain": ".linkedin.com",
127-
"path": "/",
128-
"secure": True,
129-
}
130-
)
146+
Args:
147+
cookie: LinkedIn session cookie value
131148
132-
self.driver.refresh()
133-
self.human_behavior.delay(2, 3)
149+
Returns:
150+
bool: True if authentication succeeded
151+
"""
152+
try:
153+
# Use the improved login_with_cookie method from driver_manager
154+
success = self.driver_manager.login_with_cookie(cookie)
134155

135-
self._handle_welcome_page()
136-
return self._is_authenticated()
156+
if success:
157+
self.human_behavior.delay(1, 2)
158+
self._handle_welcome_page()
159+
return self._is_authenticated()
137160

138-
except Exception:
139161
return False
140162

141-
def _login_with_credentials(self) -> bool:
142-
"""Login using email and password."""
163+
except Exception as e:
164+
logger.error(f"Cookie authentication exception: {e}")
165+
return False
166+
167+
def _login_with_credentials(self, email: str, password: str) -> bool:
168+
"""
169+
Login using email and password.
170+
171+
Args:
172+
email: LinkedIn email
173+
password: LinkedIn password
174+
175+
Returns:
176+
bool: True if login succeeded
177+
"""
143178
try:
144179
self.driver.get("https://www.linkedin.com/login")
145180
self.human_behavior.delay(2, 3)
146181

147182
email_field = self.wait.until(EC.presence_of_element_located((By.ID, "username")))
148183
self.human_behavior.click(email_field)
149-
self.human_behavior.type_text(email_field, self.email)
184+
self.human_behavior.type_text(email_field, email)
150185

151186
password_field = self.driver.find_element(By.ID, "password")
152187
self.human_behavior.click(password_field)
153-
self.human_behavior.type_text(password_field, self.password)
188+
self.human_behavior.type_text(password_field, password)
154189

155190
self.human_behavior.delay()
156191

@@ -160,14 +195,16 @@ def _login_with_credentials(self) -> bool:
160195
self.human_behavior.delay(3, 5)
161196

162197
if self._check_login_errors():
198+
logger.error("Login errors detected")
163199
return False
164200

165201
if self._is_challenge_present():
166202
return self._handle_challenge()
167203

168204
return self._is_authenticated()
169205

170-
except Exception:
206+
except Exception as e:
207+
logger.error(f"Credential login exception: {e}")
171208
return False
172209

173210
def _is_challenge_present(self) -> bool:

0 commit comments

Comments
 (0)