Skip to content

Commit 1be53e0

Browse files
authored
Merge pull request #7 from vertexcover-io/feat/open-link
Feat/open link
2 parents 6546393 + 60f597f commit 1be53e0

File tree

3 files changed

+57
-0
lines changed

3 files changed

+57
-0
lines changed

.gitIgnore renamed to .gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ outputs/*
55
.idea/*
66
venv/*
77
__pycache__/*
8+
**/__pycache__/*
89
.claude/*
10+
*.pyc
911

1012
docs/source
1113
deploy/*
@@ -151,3 +153,5 @@ cython_debug/
151153
# and can be added to the global gitignore or merged into this file. For a more nuclear
152154
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
153155
#.idea/
156+
157+
CLAUDE.md

src/linkedin_spider/core/scraper.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,18 @@ def search_posts(
159159
"""
160160
return self.search_scraper.search_posts(keywords, max_results, scroll_pause)
161161

162+
def open_link(self, url: str) -> dict[str, Any] | None:
163+
"""
164+
Open a LinkedIn post URL and extract its content.
165+
166+
Args:
167+
url: LinkedIn post URL (e.g., https://linkedin.com/feed/update/urn:li:activity:...)
168+
169+
Returns:
170+
Dictionary containing post data (same structure as search_posts), or None if failed
171+
"""
172+
return self.search_scraper.open_link(url)
173+
162174
def scrape_company(self, company_url: str) -> dict[str, Any] | None:
163175
"""Scrape a LinkedIn company page."""
164176
return self.company_scraper.scrape_company(company_url)

src/linkedin_spider/scrapers/search.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,47 @@ def _load_more_comments(self, container: WebElement, max_comments: int) -> None:
10831083
except Exception as e:
10841084
self.log_action("DEBUG", f"Error loading more comments: {e!s}")
10851085

1086+
def open_link(self, url: str) -> dict[str, Any] | None:
1087+
"""
1088+
Open a LinkedIn post URL and extract its content.
1089+
1090+
Args:
1091+
url: LinkedIn post URL (e.g., https://linkedin.com/feed/update/urn:li:activity:...)
1092+
1093+
Returns:
1094+
Dictionary containing post data (same structure as search_posts), or None if failed
1095+
"""
1096+
try:
1097+
self.log_action("INFO", f"Opening LinkedIn URL: {url}")
1098+
1099+
if not self.navigate_to_url(url):
1100+
self.log_action("ERROR", f"Failed to navigate to URL: {url}")
1101+
return None
1102+
1103+
# Wait for page load
1104+
self.human_behavior.delay(2.0, 4.0)
1105+
1106+
# Find the first post container
1107+
post_containers = self._find_post_containers()
1108+
1109+
if not post_containers:
1110+
self.log_action("WARNING", "No post container found on page")
1111+
return None
1112+
1113+
# Extract data from the first post
1114+
post_data = self._extract_post_data(post_containers[0])
1115+
1116+
# Set post_url if not already set
1117+
if post_data.get("post_url") == "N/A":
1118+
post_data["post_url"] = url.split("?")[0]
1119+
1120+
self.log_action("SUCCESS", f"Extracted post from URL: {url}")
1121+
return post_data
1122+
1123+
except Exception as e:
1124+
self.log_action("ERROR", f"Failed to open link: {e!s}")
1125+
return None
1126+
10861127
def _extract_post_comments(
10871128
self, container: WebElement, max_comments: int = 10
10881129
) -> list[dict[str, Any]]:

0 commit comments

Comments
 (0)