6464 }
6565
6666 // 3) Heuristic + sentiment-lite checks
67- const linkCount = (body.match(/https?:\/\/|www\./gi) || []).length;
67+ // Link analysis with domain allowlist (do not penalize common safe docs/code links)
68+ const safeDomains = [
69+ "github.com","docs.github.com","githubusercontent.com","gitlab.com","bitbucket.org",
70+ "readthedocs.io","arxiv.org","pypi.org","npmjs.com","crates.io","stackoverflow.com","stackexchange.com"
71+ ];
72+ const urlMatches = (body.match(/https?:\/\/[^\s)]+/gi) || []);
73+ let safeLinkCount = 0;
74+ let suspiciousLinkCount = 0;
75+ for (const u of urlMatches) {
76+ try {
77+ const h = new URL(u).hostname.replace(/^www\./i, "");
78+ const isShortHost = /^(bit\.ly|t\.co|tinyurl\.com|goo\.gl|ow\.ly)$/i.test(h);
79+ const isSafe = safeDomains.some(d => h === d || h.endsWith(`.${d}`));
80+ if (isSafe && !isShortHost) safeLinkCount += 1;
81+ else suspiciousLinkCount += 1;
82+ } catch {
83+ suspiciousLinkCount += 1;
84+ }
85+ }
86+ const linkCount = urlMatches.length;
6887 const emailCount = (body.match(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi) || []).length;
6988 const phoneCount = (body.match(/(\+?\d[\d\s().-]{8,}\d)/g) || []).length;
7089 const mentions = (body.match(/@\w{1,39}/g) || []).length;
@@ -146,20 +165,8 @@ jobs:
146165 let sentiment = 0;
147166 for (const t of tokens) if (afinn[t] != null) sentiment += afinn[t];
148167
149- // Score
168+ // Score: Only use attack/insult signals for blocking (ignore links/emails/phones)
150169 let points = 0;
151- if (linkCount >= 2) points += 2;
152- if (emailCount > 0 || phoneCount > 0) points += 2;
153- if (mentions >= 5) points += 1;
154- if (exclaimBlk) points += 1;
155- if (repeatedChr) points += 1;
156- if (shortened) points += 1;
157- if (lowUnique) points += 1;
158- if (keywordHit) points += 3;
159- if (hype) points += 2;
160- if (sentiment >= 4 && linkCount >= 1) points += 1; // overly positive + links
161- if (sentiment <= -2 && (hype || keywordHit)) points += 1;
162-
163170 // Attack/insult scoring with guardrails for technical context
164171 let attackContribution = 0;
165172 if (insultHit) attackContribution += 2;
@@ -169,9 +176,10 @@ jobs:
169176 if (techCtxHit) attackContribution = Math.min(1, attackContribution); // cap if technical context detected
170177 points += attackContribution;
171178
172- core.info(`Spam score for @${actor} = ${points} (links:${linkCount}, emails:${emailCount}, phones:${phoneCount}, mentions:${mentions}, sentiment:${sentiment}, attackHits:${attackHits}, insult:${insultHit}, techCtx:${techCtxHit})`);
179+ core.info(`Spam score for @${actor} = ${points} (attackOnly; links/emails/phones ignored) (links :${linkCount} safe:${safeLinkCount} suspicious:${suspiciousLinkCount }, emails:${emailCount}, phones:${phoneCount}, mentions:${mentions}, sentiment:${sentiment}, attackHits:${attackHits}, insult:${insultHit}, techCtx:${techCtxHit})`);
173180
174- const isSpam = points >= 3; // adjust threshold to tune sensitivity
181+ // Only block when attack/insult crosses threshold
182+ const isSpam = attackContribution >= 2; // adjust threshold if needed
175183 if (!isSpam) {
176184 core.info("Comment not flagged as spam.");
177185 return;
0 commit comments