Skip to content

Commit a42c183

Browse files
wayneliu0019Rader
authored andcommitted
Create spam-issue-detect.yml
创建检测垃圾issue和comment的action任务: 每天凌晨2点执行,根据关键字检测issue和comment内容,超过阈值,关闭或者删除,默认dry-run模式
1 parent 114cda7 commit a42c183

File tree

1 file changed

+396
-0
lines changed

1 file changed

+396
-0
lines changed
Lines changed: 396 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,396 @@
1+
name: Daily Spam Cleanup
2+
3+
on:
4+
schedule:
5+
# 北京时间每天凌晨2点执行 (UTC 18:00)
6+
- cron: '0 18 * * *'
7+
workflow_dispatch:
8+
inputs:
9+
dry_run:
10+
description: 'Dry run (only report, do not delete)'
11+
required: false
12+
default: 'true'
13+
type: choice
14+
options:
15+
- 'true'
16+
- 'false'
17+
scan_issues:
18+
description: 'Scan open issues'
19+
required: false
20+
default: 'true'
21+
type: boolean
22+
scan_closed_issues:
23+
description: 'Scan closed issues (may find previously closed spam)'
24+
required: false
25+
default: 'false'
26+
type: boolean
27+
scan_comments:
28+
description: 'Scan issue comments'
29+
required: false
30+
default: 'true'
31+
type: boolean
32+
max_issues:
33+
description: 'Max number of issues to scan (0 = all)'
34+
required: false
35+
default: '100'
36+
type: string
37+
38+
permissions:
39+
issues: write
40+
pull-requests: write
41+
contents: read
42+
43+
jobs:
44+
cleanup:
45+
runs-on: ubuntu-latest
46+
steps:
47+
- name: Daily spam scan and cleanup
48+
uses: actions/github-script@v7
49+
with:
50+
script: |
51+
const dryRun = '${{ inputs.dry_run }}' === 'true';
52+
const scanIssues = '${{ inputs.scan_issues }}' === 'true';
53+
const scanClosedIssues = '${{ inputs.scan_closed_issues }}' === 'true';
54+
const scanComments = '${{ inputs.scan_comments }}' === 'true';
55+
const maxIssues = parseInt('${{ inputs.max_issues }}') || 100; // 默认100,0表示全部
56+
const HOURS_BACK = 24; // 只检测过去24小时内的内容
57+
58+
const owner = context.repo.owner;
59+
const repo = context.repo.repo;
60+
61+
// 计算时间范围
62+
const sinceDate = new Date();
63+
sinceDate.setHours(sinceDate.getHours() - HOURS_BACK);
64+
65+
core.info(`🕒 开始每日垃圾内容扫描`);
66+
core.info(` 扫描时间: ${new Date().toLocaleString('zh-CN', { timeZone: 'Asia/Shanghai' })}`);
67+
core.info(` 扫描范围: 最近${HOURS_BACK}小时内创建的内容`);
68+
core.info(` 最大数量: ${maxIssues === 0 ? '全部' : maxIssues}个`);
69+
core.info(` 干运行模式: ${dryRun}`);
70+
core.info(` 扫描开放Issue: ${scanIssues}`);
71+
core.info(` 扫描已关闭Issue: ${scanClosedIssues}`);
72+
core.info(` 扫描评论: ${scanComments}`);
73+
core.info(``);
74+
75+
// 创建检测函数
76+
const createAnalyzer = () => {
77+
return async (content, author, association) => {
78+
const contentLower = content.toLowerCase();
79+
80+
// 恶意内容关键词检测
81+
const defamationPatterns = {
82+
personalAttacks: [
83+
'骗子', '诈骗', '骗钱', '垃圾', '废物', '无耻', '不要脸', '人渣',
84+
'stupid', 'idiot', 'moron', 'scammer', 'fraud', 'cheater'
85+
],
86+
falseAccusations: [
87+
'偷窃', '抄袭', '剽窃', '造假', '欺诈',
88+
'stealing', 'plagiarism', 'fake',
89+
],
90+
extremeEmotions: [
91+
'天理难容', '不得好死', '断子绝孙', '去死', '滚蛋',
92+
'die', 'kill yourself', 'go to hell'
93+
],
94+
projectDefamation: [
95+
'垃圾项目', '骗局', '圈钱', '迟早倒闭', '建议解散',
96+
'scam project', 'ponzi scheme', 'worthless'
97+
]
98+
};
99+
100+
let score = 0;
101+
let detectedPatterns = [];
102+
103+
// 计算分数
104+
for (const [category, keywords] of Object.entries(defamationPatterns)) {
105+
const matches = keywords.filter(keyword =>
106+
contentLower.includes(keyword.toLowerCase())
107+
);
108+
109+
if (matches.length > 0) {
110+
detectedPatterns.push(`${category}`);
111+
112+
const weights = {
113+
personalAttacks: 4,
114+
falseAccusations: 3,
115+
extremeEmotions: 5,
116+
projectDefamation: 4
117+
};
118+
119+
score += matches.length * weights[category];
120+
}
121+
}
122+
123+
// 调试信息
124+
core.info(`详细分析: 用户=${author}, 内容长度=${content.length}, 分数=${score}, 模式=${detectedPatterns.join(',')}`);
125+
126+
// 决策逻辑
127+
const isSpam = score >= 10;
128+
129+
return {
130+
isSpam,
131+
score,
132+
reason: isSpam ? `检测到恶意内容 (分数: ${score})` : '正常内容',
133+
detectedPatterns
134+
};
135+
};
136+
};
137+
138+
let totalScanned = 0;
139+
let totalSpam = 0;
140+
let totalClosed = 0;
141+
let totalDeleted = 0;
142+
let totalModerated = 0;
143+
144+
try {
145+
const analyzeContent = createAnalyzer();
146+
core.info("✅ 垃圾检测模块加载成功");
147+
148+
// 扫描开放中的Issue(只扫描最近24小时内的)
149+
if (scanIssues) {
150+
core.info("📝 扫描最近24小时内开放的Issue...");
151+
let page = 1;
152+
let openScanned = 0;
153+
154+
while (maxIssues === 0 || openScanned < maxIssues) {
155+
const issues = await github.rest.issues.listForRepo({
156+
owner, repo,
157+
state: 'open',
158+
since: sinceDate.toISOString(), // 只获取最近24小时内的
159+
per_page: 50,
160+
page,
161+
sort: 'created',
162+
direction: 'desc'
163+
});
164+
165+
if (issues.data.length === 0) {
166+
core.info("📭 没有找到更多符合条件的开放Issue");
167+
break;
168+
}
169+
170+
for (const issue of issues.data) {
171+
if (issue.pull_request) continue;
172+
if (maxIssues > 0 && openScanned >= maxIssues) break;
173+
174+
openScanned++;
175+
totalScanned++;
176+
177+
core.info(`扫描开放Issue #${issue.number}: "${issue.title}"`);
178+
179+
// 结合标题和正文进行检测
180+
const fullContent = (issue.title + ' ' + (issue.body || '')).toLowerCase();
181+
182+
const analysis = await analyzeContent(
183+
fullContent,
184+
issue.user?.login || "unknown",
185+
issue.author_association || "NONE"
186+
);
187+
188+
if (analysis.isSpam) {
189+
totalSpam++;
190+
core.warning(`🚨 [SPAM] 开放Issue #${issue.number} by @${issue.user?.login}`);
191+
core.warning(` 标题: ${issue.title}`);
192+
core.warning(` 分数: ${analysis.score}, 模式: ${analysis.detectedPatterns.join(', ')}`);
193+
194+
if (!dryRun) {
195+
try {
196+
await github.rest.issues.update({
197+
owner, repo, issue_number: issue.number,
198+
state: "closed", state_reason: "not_planned"
199+
});
200+
await github.rest.issues.createComment({
201+
owner, repo, issue_number: issue.number,
202+
body: `## 🚫 自动垃圾检测\n\n此Issue在定时扫描中被识别为违规内容并已自动关闭。\n\n` +
203+
`**检测时间:** ${new Date().toLocaleString('zh-CN', { timeZone: 'Asia/Shanghai' })}\n` +
204+
`**检测分数:** ${analysis.score}\n` +
205+
`**检测模式:** ${analysis.detectedPatterns.join(', ')}\n\n` +
206+
`如果这是误判,请联系维护者。`
207+
});
208+
await github.rest.issues.addLabels({
209+
owner, repo, issue_number: issue.number,
210+
labels: ['spam', 'auto-removed']
211+
});
212+
totalClosed++;
213+
core.notice(`✅ 已关闭垃圾Issue #${issue.number}`);
214+
215+
// API速率限制保护
216+
await new Promise(resolve => setTimeout(resolve, 500));
217+
} catch (err) {
218+
core.error(`❌ 关闭Issue #${issue.number}失败: ${err.message}`);
219+
}
220+
} else {
221+
core.notice(`⚠️ [DRY RUN] 检测到垃圾Issue #${issue.number} (未执行操作)`);
222+
}
223+
} else {
224+
core.info(`✅ 开放Issue #${issue.number} 检测为正常内容`);
225+
}
226+
}
227+
page++;
228+
}
229+
core.info(`开放Issue扫描完成: ${openScanned}个`);
230+
}
231+
232+
// 扫描已关闭的Issue(只扫描最近24小时内的)
233+
if (scanClosedIssues) {
234+
core.info("📚 扫描最近24小时内已关闭的Issue...");
235+
let page = 1;
236+
let closedScanned = 0;
237+
238+
while (maxIssues === 0 || closedScanned < maxIssues) {
239+
const issues = await github.rest.issues.listForRepo({
240+
owner, repo,
241+
state: 'closed',
242+
since: sinceDate.toISOString(), // 只获取最近24小时内的
243+
per_page: 50,
244+
page,
245+
sort: 'created',
246+
direction: 'desc'
247+
});
248+
249+
if (issues.data.length === 0) {
250+
core.info("📭 没有找到更多符合条件的已关闭Issue");
251+
break;
252+
}
253+
254+
for (const issue of issues.data) {
255+
if (issue.pull_request) continue;
256+
if (maxIssues > 0 && closedScanned >= maxIssues) break;
257+
258+
closedScanned++;
259+
totalScanned++;
260+
261+
const fullContent = (issue.title + ' ' + (issue.body || '')).toLowerCase();
262+
const analysis = await analyzeContent(
263+
fullContent,
264+
issue.user?.login || "unknown",
265+
issue.author_association || "NONE"
266+
);
267+
268+
if (analysis.isSpam) {
269+
totalSpam++;
270+
core.warning(`🚨 [SPAM] 已关闭Issue #${issue.number} by @${issue.user?.login}`);
271+
core.warning(` 分数: ${analysis.score}, 模式: ${analysis.detectedPatterns.join(', ')}`);
272+
273+
if (!dryRun) {
274+
try {
275+
await github.rest.issues.update({
276+
owner, repo, issue_number: issue.number,
277+
title: "[已处理] 内容已移除",
278+
body: "**此内容因违反社区准则已被自动处理。**\n\n" +
279+
"原始内容包含不当言论,现已被隐藏。\n\n" +
280+
`**处理时间:** ${new Date().toLocaleString('zh-CN', { timeZone: 'Asia/Shanghai' })}\n` +
281+
"_此操作是在每日内容清理扫描中自动执行的。_"
282+
});
283+
totalModerated++;
284+
core.notice(`✅ 已处理已关闭垃圾Issue #${issue.number}`);
285+
286+
// API速率限制保护
287+
await new Promise(resolve => setTimeout(resolve, 500));
288+
} catch (err) {
289+
core.error(`❌ 处理已关闭Issue #${issue.number}失败: ${err.message}`);
290+
}
291+
} else {
292+
core.notice(`⚠️ [DRY RUN] 检测到已关闭垃圾Issue #${issue.number} (未执行操作)`);
293+
}
294+
}
295+
}
296+
page++;
297+
}
298+
core.info(`已关闭Issue扫描完成: ${closedScanned}个`);
299+
}
300+
301+
// 扫描评论(只扫描最近24小时内的)
302+
if (scanComments) {
303+
core.info("💬 扫描最近24小时内的评论...");
304+
let page = 1;
305+
let commentCount = 0;
306+
307+
while (page <= 10 && (maxIssues === 0 || commentCount < maxIssues)) {
308+
const comments = await github.rest.issues.listCommentsForRepo({
309+
owner, repo,
310+
since: sinceDate.toISOString(), // 只获取最近24小时内的
311+
per_page: 50,
312+
page,
313+
sort: 'created',
314+
direction: 'desc'
315+
});
316+
317+
if (comments.data.length === 0) {
318+
core.info("📭 没有找到更多符合条件的评论");
319+
break;
320+
}
321+
322+
for (const comment of comments.data) {
323+
if (maxIssues > 0 && commentCount >= maxIssues) break;
324+
325+
commentCount++;
326+
totalScanned++;
327+
328+
const analysis = await analyzeContent(
329+
comment.body || "",
330+
comment.user?.login || "unknown",
331+
comment.author_association || "NONE"
332+
);
333+
334+
if (analysis.isSpam) {
335+
totalSpam++;
336+
core.warning(`🚨 [SPAM] 评论 #${comment.id} by @${comment.user?.login}`);
337+
core.warning(` 分数: ${analysis.score}, 模式: ${analysis.detectedPatterns.join(', ')}`);
338+
339+
if (!dryRun) {
340+
try {
341+
await github.rest.issues.deleteComment({
342+
owner, repo, comment_id: comment.id
343+
});
344+
totalDeleted++;
345+
core.notice(`✅ 已删除垃圾评论 #${comment.id}`);
346+
347+
// API速率限制保护
348+
await new Promise(resolve => setTimeout(resolve, 500));
349+
} catch (err) {
350+
core.error(`❌ 删除评论 #${comment.id}失败: ${err.message}`);
351+
}
352+
} else {
353+
core.notice(`⚠️ [DRY RUN] 检测到垃圾评论 #${comment.id} (未执行操作)`);
354+
}
355+
}
356+
}
357+
page++;
358+
}
359+
core.info(`评论扫描完成: ${commentCount}条`);
360+
}
361+
362+
} catch (err) {
363+
core.error(`❌ 扫描过程出错: ${err.message}`);
364+
core.setFailed(`执行失败: ${err.message}`);
365+
return;
366+
}
367+
368+
// 生成总结报告
369+
core.notice("=".repeat(60));
370+
core.notice(`📊 每日扫描总结 ${dryRun ? '(干运行模式)' : '(已执行)'}`);
371+
core.notice("=".repeat(60));
372+
core.notice(`📝 扫描统计:`);
373+
core.notice(` • 扫描时间范围: 最近${HOURS_BACK}小时`);
374+
core.notice(` • 总计扫描: ${totalScanned}`);
375+
core.notice(``);
376+
core.notice(`🚨 检测结果:`);
377+
core.notice(` • 发现的垃圾内容: ${totalSpam}`);
378+
if (!dryRun) {
379+
core.notice(``);
380+
core.notice(`✅ 执行的操作:`);
381+
core.notice(` • 关闭的Issue: ${totalClosed}`);
382+
core.notice(` • 处理的已关闭Issue: ${totalModerated}`);
383+
core.notice(` • 删除的评论: ${totalDeleted}`);
384+
core.notice(` • 总计操作: ${totalClosed + totalModerated + totalDeleted}`);
385+
} else {
386+
core.notice(``);
387+
core.notice(`⚠️ 干运行模式 - 未执行任何操作`);
388+
core.notice(` 设置 dry_run: false 来执行实际清理`);
389+
}
390+
core.notice(``);
391+
core.notice(`⏰ 下次扫描: 北京时间明天凌晨2点`);
392+
core.notice("=".repeat(60));
393+
394+
if (totalSpam > 0 && dryRun) {
395+
core.notice(`💡 建议: 发现 ${totalSpam} 个潜在的垃圾内容,请审查后执行清理。`);
396+
}

0 commit comments

Comments
 (0)