|
| 1 | +import asyncio |
| 2 | +import os |
| 3 | +import yaml |
| 4 | +import dotenv |
| 5 | +from playwright.async_api import Page, Locator |
| 6 | +from config.logger_config import logger |
| 7 | +from src.job_manager.authenticator import LinkedInAuthenticator |
| 8 | +from src.utils.browser_utils import create_playwright_browser, save_browser_session, pause |
| 9 | +from src.pydantic_models.config_models import ConnectionSearcherConfig |
| 10 | + |
| 11 | + |
| 12 | +class ConnectionSearcher: |
| 13 | + def __init__(self, config_path: str = "config/connection_searcher_config.yaml"): |
| 14 | + self.config = self._load_config(config_path) |
| 15 | + self.secrets = self._load_secrets() |
| 16 | + # Expanded keywords that indicate an open networker |
| 17 | + self.open_networker_keywords = [ |
| 18 | + "L.I.O.N", |
| 19 | + "Open Networker", |
| 20 | + "Networking", |
| 21 | + "Accepting Invites", |
| 22 | + "Invites Welcome", |
| 23 | + "I accept all invites", |
| 24 | + "Open to connect", |
| 25 | + "Let's connect", |
| 26 | + "10k+", |
| 27 | + "20k+", |
| 28 | + "30k+", |
| 29 | + "megalion", |
| 30 | + "top lion", |
| 31 | + "fast connect", |
| 32 | + "I never say no", |
| 33 | + "open to networking", |
| 34 | + "send me an invite", |
| 35 | + "no idk", |
| 36 | + "will not idk", |
| 37 | + ] |
| 38 | + self.person_name = "" |
| 39 | + self.found_keywords = [] |
| 40 | + |
| 41 | + def _load_config(self, config_path: str) -> ConnectionSearcherConfig: |
| 42 | + if not os.path.exists(config_path): |
| 43 | + logger.warning(f"Config file {config_path} not found. Using defaults.") |
| 44 | + return ConnectionSearcherConfig() |
| 45 | + with open(config_path, "r") as f: |
| 46 | + config_data = yaml.safe_load(f) |
| 47 | + return ConnectionSearcherConfig(**config_data) |
| 48 | + |
| 49 | + def _load_secrets(self) -> dict: |
| 50 | + secrets = {**dotenv.dotenv_values(".env")} |
| 51 | + # Basic validation |
| 52 | + if "linkedin_email" not in secrets or "linkedin_password" not in secrets: |
| 53 | + logger.error("LinkedIn credentials not found in .env") |
| 54 | + raise ValueError("Missing LinkedIn credentials in .env") |
| 55 | + return secrets |
| 56 | + |
| 57 | + async def run(self): |
| 58 | + browser, context, page = await create_playwright_browser() |
| 59 | + try: |
| 60 | + # Login |
| 61 | + authenticator = LinkedInAuthenticator(page) |
| 62 | + authenticator.set_parameters( |
| 63 | + self.secrets["linkedin_email"], self.secrets["linkedin_password"] |
| 64 | + ) |
| 65 | + login_success = await authenticator.start() |
| 66 | + if not login_success: |
| 67 | + logger.error("Failed to log into LinkedIn") |
| 68 | + return |
| 69 | + |
| 70 | + await save_browser_session(context) |
| 71 | + logger.info("Successfully logged into LinkedIn!") |
| 72 | + |
| 73 | + for main_word in self.config.main_search_words: |
| 74 | + for add_word in self.config.additional_search_words: |
| 75 | + logger.info(f"Starting search for: {main_word} + {add_word}") |
| 76 | + await self._search_and_connect(page, main_word, add_word) |
| 77 | + finally: |
| 78 | + await browser.close() |
| 79 | + |
| 80 | + async def _search_and_connect(self, page: Page, main_word: str, add_word: str): |
| 81 | + for page_num in range(1, 101): |
| 82 | + logger.info(f"Processing page {page_num} for {main_word} + {add_word}") |
| 83 | + # URL: https://www.linkedin.com/search/results/people/?keywords={main_search_word}+{additional_search_word} |
| 84 | + query = f"{main_word} {add_word}".replace(" ", "+") |
| 85 | + url = ( |
| 86 | + f"https://www.linkedin.com/search/results/people/?keywords={query}&page={page_num}" |
| 87 | + ) |
| 88 | + await page.goto(url) |
| 89 | + pause(1, 2) # Allow page to settle |
| 90 | + |
| 91 | + # Wait for results or empty state with multiple possible selectors |
| 92 | + result_selectors = [ |
| 93 | + ".reusable-search__result-container", |
| 94 | + ".entity-result", |
| 95 | + "div[data-view-name='search-result']", |
| 96 | + "div[data-view-name='people-search-result']", |
| 97 | + "li.reusable-search__result-container", |
| 98 | + "[role='listitem']", |
| 99 | + ] |
| 100 | + combined_selector = ", ".join(result_selectors) |
| 101 | + |
| 102 | + try: |
| 103 | + await page.wait_for_selector(combined_selector, timeout=5000) |
| 104 | + except Exception as e: |
| 105 | + # If wait fails, check if "No results found" is actually visible |
| 106 | + if await page.get_by_text("No results found").is_visible(): |
| 107 | + logger.info(f"No results found on page {page_num}. Moving to next combination.") |
| 108 | + break |
| 109 | + |
| 110 | + # Check if we have any results despite the timeout (sometimes visible state is tricky) |
| 111 | + count = await page.locator(combined_selector).count() |
| 112 | + if count == 0: |
| 113 | + logger.error(f"Error waiting for results on page {page_num}: {e}") |
| 114 | + break |
| 115 | + |
| 116 | + # Scroll down to load all results |
| 117 | + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") |
| 118 | + pause(2, 3) |
| 119 | + |
| 120 | + people = await page.locator(combined_selector).all() |
| 121 | + if not people: |
| 122 | + logger.info( |
| 123 | + f"No people found on page {page_num} after scrolling. Moving to next combination." |
| 124 | + ) |
| 125 | + break |
| 126 | + |
| 127 | + for person in people: |
| 128 | + try: |
| 129 | + if await self._should_connect(person): |
| 130 | + await self._connect_with_person(page, person) |
| 131 | + except Exception as e: |
| 132 | + logger.error(f"Error processing person: {e}") |
| 133 | + |
| 134 | + # Check for next page button |
| 135 | + next_button = page.locator( |
| 136 | + "button[data-testid='pagination-controls-next-button-visible']" |
| 137 | + ) |
| 138 | + if not await next_button.is_visible() or page_num >= 100: |
| 139 | + logger.info("Reached the end of results or 100th page.") |
| 140 | + break |
| 141 | + |
| 142 | + async def _should_connect(self, person: Locator) -> bool: |
| 143 | + # Extract elements and analyze description. |
| 144 | + # Skip if keywords are ONLY in mutual connections text. |
| 145 | + full_text = await person.inner_text() or "" |
| 146 | + full_text = " ".join( |
| 147 | + [ |
| 148 | + text.strip() |
| 149 | + for text in full_text.split("\n") |
| 150 | + if "is a mutual connection" not in text.strip() |
| 151 | + and "are mutual connections" not in text.strip() |
| 152 | + ] |
| 153 | + ) |
| 154 | + |
| 155 | + found_keywords = [ |
| 156 | + kw for kw in self.open_networker_keywords if kw.lower() in full_text.lower() |
| 157 | + ] |
| 158 | + |
| 159 | + person_name = full_text.split("•")[0].strip() if "•" in full_text else "" |
| 160 | + |
| 161 | + if found_keywords or "LION" in full_text: |
| 162 | + if "LION" in found_keywords: |
| 163 | + self.found_keywords = ["LION"] |
| 164 | + else: |
| 165 | + self.found_keywords = found_keywords |
| 166 | + self.person_name = person_name |
| 167 | + return True |
| 168 | + return False |
| 169 | + |
| 170 | + async def _connect_with_person(self, page: Page, person: Locator): |
| 171 | + # 7. Find Connect button and push it. |
| 172 | + # Specific selector for the 'Invite to connect' button provided in the HTML |
| 173 | + connect_btn = person.locator("a[aria-label^='Invite'][aria-label$='to connect']") |
| 174 | + |
| 175 | + # If the 'a' tag selector fails, try the internal text as a fallback |
| 176 | + if await connect_btn.count() == 0: |
| 177 | + return |
| 178 | + |
| 179 | + if await connect_btn.count() > 0 and await connect_btn.first.is_visible(): |
| 180 | + logger.info( |
| 181 | + f"Found open networker keywords {self.found_keywords} in person's {self.person_name} description." |
| 182 | + ) |
| 183 | + await connect_btn.first.click() |
| 184 | + logger.info("Clicked Connect button.") |
| 185 | + await self._handle_invitation_modal(page) |
| 186 | + else: |
| 187 | + # Check if it's in the 'More' menu as a last resort |
| 188 | + more_btn = person.locator("button:has-text('More')") |
| 189 | + if await more_btn.count() > 0 and await more_btn.first.is_visible(): |
| 190 | + await more_btn.first.click() |
| 191 | + pause(1, 2) |
| 192 | + dropdown_connect = page.locator( |
| 193 | + "div.artdeco-dropdown__content [aria-label^='Invite'][aria-label$='to connect'], div.artdeco-dropdown__content button:has-text('Connect')" |
| 194 | + ) |
| 195 | + if await dropdown_connect.count() > 0: |
| 196 | + await dropdown_connect.first.click() |
| 197 | + logger.info("Clicked Connect button from More menu.") |
| 198 | + await self._handle_invitation_modal(page) |
| 199 | + |
| 200 | + async def _handle_invitation_modal(self, page: Page): |
| 201 | + pause(1, 2) |
| 202 | + # Check for "Add a note to your invitation?" modal |
| 203 | + # 7. Find button with name "Send without a note" and push it. |
| 204 | + send_without_note = page.locator( |
| 205 | + "button[aria-label='Send without a note'], button:has-text('Send without a note')" |
| 206 | + ) |
| 207 | + if await send_without_note.count() > 0 and await send_without_note.first.is_visible(): |
| 208 | + await send_without_note.first.click() |
| 209 | + logger.info("Sent invitation without a note.") |
| 210 | + pause(1, 2) |
| 211 | + else: |
| 212 | + # Maybe it sent directly or there is a "Send" button |
| 213 | + send_now = page.locator("button:has-text('Send now'), button[aria-label='Send now']") |
| 214 | + if await send_now.count() > 0 and await send_now.first.is_visible(): |
| 215 | + await send_now.first.click() |
| 216 | + logger.info("Sent invitation using 'Send now'.") |
| 217 | + pause(1, 2) |
| 218 | + |
| 219 | + |
| 220 | +if __name__ == "__main__": |
| 221 | + searcher = ConnectionSearcher() |
| 222 | + asyncio.run(searcher.run()) |
0 commit comments