Skip to content

Commit 6f73e33

Browse files
committed
feat: extract structured data from notifications
1 parent c02621b commit 6f73e33

File tree

3 files changed

+206
-17
lines changed

3 files changed

+206
-17
lines changed

.opencode/todo.md

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,22 @@
1-
# Mission Tasks
1+
# Mission: Notifications: make it production-grade (#613)
22

3-
## Task List
3+
## M1: Analysis & Infrastructure
4+
### T1.1: Identify Notification Types & Extracted Data | agent:Planner
5+
- [ ] S1.1.1: Document parsing logic for the 9 specified notification types | size:M
6+
- [ ] S1.1.2: Define updated `LinkedInNotification` interface with `extracted_data` | size:S
47

5-
[ ] *Start your mission by creating a task list
8+
## M2: Implementation
9+
### T2.1: Implement Data Extraction (Rich Data) | agent:Worker | depends:T1.1
10+
- [x] S2.1.1: Update `extractNotificationSnapshots` to extract structured data for all required types | size:L
11+
- [x] S2.1.2: Add parsing utilities for metrics (view counts, names, etc) | size:M
612

13+
### T2.2: Implement Type Filtering & Pagination | agent:Worker | depends:T2.1
14+
- [x] S2.2.1: Add `types` filtering to `ListNotificationsInput` and `listNotifications` method | size:S
15+
- [x] S2.2.2: Add `types` parameter to MCP tool `notifications_list` | size:S
16+
- [x] S2.2.3: Update pagination logic in `loadNotificationSnapshots` to support fetching longer feeds without fixed scroll limits | size:M
17+
18+
## M3: Verification & Integration
19+
### T3.1: Testing & Quality Gates | agent:Reviewer | depends:M2
20+
- [ ] S3.1.1: Run unit tests and e2e tests for notifications | size:M
21+
- [ ] S3.1.2: Run lint and typecheck | size:S
22+
- [ ] S3.1.3: Wait for CI and create PR | size:S

packages/core/src/linkedinNotifications.ts

Lines changed: 176 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,13 @@ export interface LinkedInNotification {
3030
timestamp: string;
3131
link: string;
3232
is_read: boolean;
33+
extracted_data?: Record<string, unknown> | undefined;
3334
}
3435

3536
export interface ListNotificationsInput {
3637
profileName?: string;
3738
limit?: number;
39+
types?: string[];
3840
}
3941

4042
export interface MarkNotificationReadInput {
@@ -155,6 +157,7 @@ interface NotificationSnapshot {
155157
link: string;
156158
is_read: boolean;
157159
card_index: number;
160+
extracted_data?: Record<string, unknown>;
158161
}
159162

160163
interface NotificationSnapshotCandidate {
@@ -165,6 +168,7 @@ interface NotificationSnapshotCandidate {
165168
link: string;
166169
is_read: boolean;
167170
card_index: number;
171+
extracted_data?: Record<string, unknown>;
168172
}
169173

170174
interface NotificationCardMatch {
@@ -289,9 +293,10 @@ function hashNotificationFingerprint(input: {
289293
// content stripping keep IDs stable across separate page loads.
290294
const normalizedLink =
291295
normalizeNotificationLink(input.link) || normalizeText(input.link);
292-
const fingerprint = [normalizedLink, stripVolatileContent(input.message)].join(
293-
"\u001f",
294-
);
296+
const fingerprint = [
297+
normalizedLink,
298+
stripVolatileContent(input.message),
299+
].join("\u001f");
295300
return `notif_${createHash("sha256").update(fingerprint).digest("hex").slice(0, 16)}`;
296301
}
297302

@@ -587,6 +592,124 @@ async function extractNotificationSnapshots(
587592
return "";
588593
};
589594

595+
const extractStructuredData = (
596+
message: string,
597+
): Record<string, unknown> | undefined => {
598+
const data: Record<string, unknown> = {};
599+
let matched = false;
600+
const text = message.replace(/\s+/g, " ").trim();
601+
602+
const postAnalyticsMatch =
603+
text.match(
604+
/Your post (?:has|got|was seen by) ([\d,]+) (?:views|impressions|times)/i,
605+
) || text.match(/([\d,]+) people viewed your post/i);
606+
if (postAnalyticsMatch) {
607+
data.views = parseInt(
608+
(postAnalyticsMatch[1] || "0").replace(/,/g, ""),
609+
10,
610+
);
611+
matched = true;
612+
}
613+
614+
const profileViewsMatch =
615+
text.match(/([\d,]+) people viewed your profile/i) ||
616+
text.match(/Your profile was viewed by ([\d,]+) people/i) ||
617+
text.match(/You appeared in ([\d,]+) searches/i);
618+
if (
619+
profileViewsMatch &&
620+
!text.match(/You appeared in ([\d,]+) searches/i)
621+
) {
622+
data.profile_views = parseInt(
623+
(profileViewsMatch[1] || "0").replace(/,/g, ""),
624+
10,
625+
);
626+
matched = true;
627+
}
628+
629+
const searchMatch = text.match(/You appeared in ([\d,]+) searches/i);
630+
if (searchMatch) {
631+
data.search_appearances = parseInt(
632+
(searchMatch[1] || "0").replace(/,/g, ""),
633+
10,
634+
);
635+
matched = true;
636+
}
637+
638+
const mentionMatch = text.match(/^(.*?)\s+mentioned you/i);
639+
if (mentionMatch) {
640+
data.mentioned_by = (mentionMatch[1] || "").trim();
641+
matched = true;
642+
}
643+
644+
const connectionMatch =
645+
text.match(/^(.*?)\s+sent you a connection request/i) ||
646+
text.match(/^(.*?)\s+wants to connect/i) ||
647+
text.match(/^(.*?)\s+accepted your connection/i);
648+
if (connectionMatch) {
649+
data.sender = (connectionMatch[1] || "").trim();
650+
matched = true;
651+
}
652+
653+
const newsletterMatch =
654+
text.match(/([\d,]+)\s+people subscribed to/i) ||
655+
text.match(/^(.*?)\s+subscribed to/i);
656+
if (newsletterMatch) {
657+
const num = parseInt(
658+
(newsletterMatch[1] || "").replace(/,/g, ""),
659+
10,
660+
);
661+
if (!isNaN(num)) {
662+
data.subscriber_count = num;
663+
} else {
664+
data.subscriber = (newsletterMatch[1] || "").trim();
665+
}
666+
matched = true;
667+
}
668+
669+
const jobAlertMatch =
670+
text.match(/([\d,]+)\s+new jobs? for "(.*?)"/i) ||
671+
text.match(/new jobs? for "(.*?)"/i) ||
672+
text.match(/([\d,]+)\s+new jobs? for (.*)/i);
673+
if (jobAlertMatch) {
674+
if (jobAlertMatch.length === 3) {
675+
data.job_count = parseInt(
676+
(jobAlertMatch[1] || "0").replace(/,/g, ""),
677+
10,
678+
);
679+
data.job_title = (jobAlertMatch[2] || "").trim();
680+
} else if (jobAlertMatch[1]) {
681+
if (!isNaN(parseInt(jobAlertMatch[1], 10))) {
682+
data.job_count = parseInt(
683+
(jobAlertMatch[1] || "0").replace(/,/g, ""),
684+
10,
685+
);
686+
data.job_title = jobAlertMatch[2]
687+
? (jobAlertMatch[2] || "").trim()
688+
: "";
689+
} else {
690+
data.job_title = (jobAlertMatch[1] || "").trim();
691+
}
692+
}
693+
matched = true;
694+
}
695+
696+
const companyPostMatch =
697+
text.match(/^(.*?)\s+posted:/i) ||
698+
text.match(/^(.*?)\s+shared a post:/i);
699+
if (companyPostMatch) {
700+
data.company_name = (companyPostMatch[1] || "").trim();
701+
matched = true;
702+
}
703+
704+
const trendingMatch = text.match(/^Trending:\s+(.*)/i);
705+
if (trendingMatch) {
706+
data.topic = (trendingMatch[1] || "").trim();
707+
matched = true;
708+
}
709+
710+
return matched ? data : undefined;
711+
};
712+
590713
const readClassName = (node: Element | null | undefined): string => {
591714
if (!node) {
592715
return "";
@@ -766,6 +889,9 @@ async function extractNotificationSnapshots(
766889
link,
767890
is_read: inferReadState(card),
768891
card_index: i,
892+
...(extractStructuredData(message)
893+
? { extracted_data: extractStructuredData(message) as Record<string, unknown> }
894+
: {}),
769895
});
770896

771897
if (notifications.length >= maxNotifications) {
@@ -800,6 +926,9 @@ async function extractNotificationSnapshots(
800926
link,
801927
is_read: Boolean(candidate.is_read),
802928
card_index: Math.max(0, Math.floor(candidate.card_index)),
929+
...(candidate.extracted_data
930+
? { extracted_data: candidate.extracted_data as Record<string, unknown> }
931+
: {}),
803932
} satisfies NotificationSnapshot;
804933
})
805934
.filter(
@@ -815,16 +944,38 @@ async function extractNotificationSnapshots(
815944
async function loadNotificationSnapshots(
816945
page: Page,
817946
limit: number,
947+
types?: string[]
818948
): Promise<NotificationSnapshot[]> {
819-
let notifications = await extractNotificationSnapshots(page, limit);
949+
const isMatch = (n: NotificationSnapshot) => {
950+
if (!types || types.length === 0) return true;
951+
return types.includes(n.type) || (n.extracted_data && types.includes(n.extracted_data.notification_category));
952+
};
820953

821-
for (let i = 0; i < 6 && notifications.length < limit; i += 1) {
822-
await scrollLinkedInPageToBottom(page);
823-
await page.waitForTimeout(800);
824-
notifications = await extractNotificationSnapshots(page, limit);
954+
const maxScrolls = 20;
955+
let scrollCount = 0;
956+
let previousCount = 0;
957+
let allNotifications: NotificationSnapshot[] = [];
958+
let matchedCount = 0;
959+
960+
while (matchedCount < limit && scrollCount < maxScrolls) {
961+
// Extract more to ensure we can find filtered items
962+
const extractLimit = Math.max(limit, 200);
963+
allNotifications = await extractNotificationSnapshots(page, extractLimit);
964+
matchedCount = allNotifications.filter(isMatch).length;
965+
966+
if (allNotifications.length === previousCount) {
967+
break; // No new items loaded
968+
}
969+
previousCount = allNotifications.length;
970+
971+
if (matchedCount < limit) {
972+
await scrollLinkedInPageToBottom(page);
973+
await page.waitForTimeout(800);
974+
scrollCount += 1;
975+
}
825976
}
826977

827-
return notifications.slice(0, Math.max(1, limit));
978+
return allNotifications.filter(isMatch).slice(0, Math.max(1, limit));
828979
}
829980

830981
async function findNotificationCard(
@@ -841,12 +992,16 @@ async function findNotificationCard(
841992
function toMatch(snapshot: NotificationSnapshot): NotificationCardMatch {
842993
return {
843994
snapshot,
844-
locator: page.locator(NOTIFICATION_CARD_SELECTOR).nth(snapshot.card_index),
995+
locator: page
996+
.locator(NOTIFICATION_CARD_SELECTOR)
997+
.nth(snapshot.card_index),
845998
};
846999
}
8471000

8481001
// Strategy 1: Exact ID match (native LinkedIn IDs or current-algorithm hashes).
849-
const exactMatch = snapshots.find((candidate) => candidate.id === normalizedId);
1002+
const exactMatch = snapshots.find(
1003+
(candidate) => candidate.id === normalizedId,
1004+
);
8501005
if (exactMatch) {
8511006
return toMatch(exactMatch);
8521007
}
@@ -1178,7 +1333,9 @@ async function readNotificationPreferencePageState(
11781333
const href = normalize(anchor.href || anchor.getAttribute("href"));
11791334
const text = normalize(anchor.textContent);
11801335
const match =
1181-
/^(.+?)\s+((?:On|Off|Push|In-app|Email)(?:[\s,]+(?:and\s+)?(?:On|Off|Push|In-app|Email))*)$/iu.exec(text);
1336+
/^(.+?)\s+((?:On|Off|Push|In-app|Email)(?:[\s,]+(?:and\s+)?(?:On|Off|Push|In-app|Email))*)$/iu.exec(
1337+
text,
1338+
);
11821339
return {
11831340
title: normalize(match?.[1] ?? text),
11841341
slug: href.replace(/\/+$/u, "").split("/").pop() ?? "",
@@ -1584,16 +1741,21 @@ export class LinkedInNotificationsService {
15841741
async (context) => {
15851742
const page = await getOrCreatePage(context);
15861743
await openNotificationsPage(page);
1587-
const notifications = await loadNotificationSnapshots(page, limit);
1744+
const notifications = await loadNotificationSnapshots(page, limit, input.types);
1745+
15881746
return notifications.map((notification) => {
1589-
return {
1747+
const mapped: Record<string, unknown> = {
15901748
id: notification.id,
15911749
type: notification.type,
15921750
message: notification.message,
15931751
timestamp: notification.timestamp,
15941752
link: notification.link,
15951753
is_read: notification.is_read,
15961754
};
1755+
if (notification.extracted_data) {
1756+
mapped.extracted_data = notification.extracted_data;
1757+
}
1758+
return mapped as LinkedInNotification;
15971759
});
15981760
},
15991761
);

packages/mcp/src/bin/linkedin-mcp.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,15 +1765,18 @@ async function handleNotificationsList(args: ToolArgs): Promise<ToolResult> {
17651765
try {
17661766
const profileName = readString(args, "profileName", "default");
17671767
const limit = readPositiveNumber(args, "limit", 20);
1768+
const types = args.types as string[] | undefined;
17681769

17691770
runtime.logger.log("info", "mcp.notifications.list.start", {
17701771
profileName,
17711772
limit,
1773+
types,
17721774
});
17731775

17741776
const notifications = await runtime.notifications.listNotifications({
17751777
profileName,
17761778
limit,
1779+
types,
17771780
});
17781781

17791782
runtime.logger.log("info", "mcp.notifications.list.done", {
@@ -6525,6 +6528,14 @@ export const LINKEDIN_MCP_TOOL_DEFINITIONS: LinkedInMcpToolDefinition[] = [
65256528
description:
65266529
"Maximum number of notifications to return. Defaults to 20.",
65276530
},
6531+
types: {
6532+
type: "array",
6533+
items: {
6534+
type: "string",
6535+
},
6536+
description:
6537+
"Optional array of notification types or categories to filter by (e.g. ['mention', 'profile_views', 'job_alert']).",
6538+
},
65286539
}),
65296540
},
65306541
},

0 commit comments

Comments
 (0)