Skip to content

Commit 4a50f4f

Browse files
authored
Add duplicate issue detection using GitHub AI models (home-assistant#146487)
1 parent 9ee4551 commit 4a50f4f

File tree

1 file changed

+374
-0
lines changed

1 file changed

+374
-0
lines changed
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
name: Auto-detect duplicate issues
2+
3+
# yamllint disable-line rule:truthy
4+
on:
5+
issues:
6+
types: [labeled]
7+
8+
permissions:
9+
issues: write
10+
models: read
11+
12+
jobs:
13+
detect-duplicates:
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- name: Check if integration label was added and extract details
18+
id: extract
19+
uses: actions/[email protected]
20+
with:
21+
script: |
22+
// Debug: Log the event payload
23+
console.log('Event name:', context.eventName);
24+
console.log('Event action:', context.payload.action);
25+
console.log('Event payload keys:', Object.keys(context.payload));
26+
27+
// Check the specific label that was added
28+
const addedLabel = context.payload.label;
29+
if (!addedLabel) {
30+
console.log('No label found in labeled event payload');
31+
core.setOutput('should_continue', 'false');
32+
return;
33+
}
34+
35+
console.log(`Label added: ${addedLabel.name}`);
36+
37+
if (!addedLabel.name.startsWith('integration:')) {
38+
console.log('Added label is not an integration label, skipping duplicate detection');
39+
core.setOutput('should_continue', 'false');
40+
return;
41+
}
42+
43+
console.log(`Integration label added: ${addedLabel.name}`);
44+
45+
let currentIssue;
46+
let integrationLabels = [];
47+
48+
try {
49+
const issue = await github.rest.issues.get({
50+
owner: context.repo.owner,
51+
repo: context.repo.repo,
52+
issue_number: context.payload.issue.number
53+
});
54+
55+
currentIssue = issue.data;
56+
57+
// Check if potential-duplicate label already exists
58+
const hasPotentialDuplicateLabel = currentIssue.labels
59+
.some(label => label.name === 'potential-duplicate');
60+
61+
if (hasPotentialDuplicateLabel) {
62+
console.log('Issue already has potential-duplicate label, skipping duplicate detection');
63+
core.setOutput('should_continue', 'false');
64+
return;
65+
}
66+
67+
integrationLabels = currentIssue.labels
68+
.filter(label => label.name.startsWith('integration:'))
69+
.map(label => label.name);
70+
} catch (error) {
71+
core.error(`Failed to fetch issue #${context.payload.issue.number}:`, error.message);
72+
core.setOutput('should_continue', 'false');
73+
return;
74+
}
75+
76+
// Check if we've already posted a duplicate detection comment recently
77+
let comments;
78+
try {
79+
comments = await github.rest.issues.listComments({
80+
owner: context.repo.owner,
81+
repo: context.repo.repo,
82+
issue_number: context.payload.issue.number,
83+
per_page: 10
84+
});
85+
} catch (error) {
86+
core.error('Failed to fetch comments:', error.message);
87+
// Continue anyway, worst case we might post a duplicate comment
88+
comments = { data: [] };
89+
}
90+
91+
// Check if we've already posted a duplicate detection comment
92+
const recentDuplicateComment = comments.data.find(comment =>
93+
comment.user && comment.user.login === 'github-actions[bot]' &&
94+
comment.body.includes('<!-- workflow: detect-duplicate-issues -->')
95+
);
96+
97+
if (recentDuplicateComment) {
98+
console.log('Already posted duplicate detection comment, skipping');
99+
core.setOutput('should_continue', 'false');
100+
return;
101+
}
102+
103+
core.setOutput('should_continue', 'true');
104+
core.setOutput('current_number', currentIssue.number);
105+
core.setOutput('current_title', currentIssue.title);
106+
core.setOutput('current_body', currentIssue.body);
107+
core.setOutput('current_url', currentIssue.html_url);
108+
core.setOutput('integration_labels', JSON.stringify(integrationLabels));
109+
110+
console.log(`Current issue: #${currentIssue.number}`);
111+
console.log(`Integration labels: ${integrationLabels.join(', ')}`);
112+
113+
- name: Fetch similar issues
114+
id: fetch_similar
115+
if: steps.extract.outputs.should_continue == 'true'
116+
uses: actions/[email protected]
117+
env:
118+
INTEGRATION_LABELS: ${{ steps.extract.outputs.integration_labels }}
119+
CURRENT_NUMBER: ${{ steps.extract.outputs.current_number }}
120+
with:
121+
script: |
122+
const integrationLabels = JSON.parse(process.env.INTEGRATION_LABELS);
123+
const currentNumber = parseInt(process.env.CURRENT_NUMBER);
124+
125+
if (integrationLabels.length === 0) {
126+
console.log('No integration labels found, skipping duplicate detection');
127+
core.setOutput('has_similar', 'false');
128+
return;
129+
}
130+
131+
// Use GitHub search API to find issues with matching integration labels
132+
console.log(`Searching for issues with integration labels: ${integrationLabels.join(', ')}`);
133+
134+
// Build search query for issues with any of the current integration labels
135+
const labelQueries = integrationLabels.map(label => `label:"${label}"`);
136+
let searchQuery;
137+
138+
if (labelQueries.length === 1) {
139+
searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue ${labelQueries[0]}`;
140+
} else {
141+
searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue (${labelQueries.join(' OR ')})`;
142+
}
143+
144+
console.log(`Search query: ${searchQuery}`);
145+
146+
let result;
147+
try {
148+
result = await github.rest.search.issuesAndPullRequests({
149+
q: searchQuery,
150+
per_page: 15,
151+
sort: 'updated',
152+
order: 'desc'
153+
});
154+
} catch (error) {
155+
core.error('Failed to search for similar issues:', error.message);
156+
if (error.status === 403 && error.message.includes('rate limit')) {
157+
core.error('GitHub API rate limit exceeded');
158+
}
159+
core.setOutput('has_similar', 'false');
160+
return;
161+
}
162+
163+
// Filter out the current issue, pull requests, and newer issues (higher numbers)
164+
const similarIssues = result.data.items
165+
.filter(item =>
166+
item.number !== currentNumber &&
167+
!item.pull_request &&
168+
item.number < currentNumber // Only include older issues (lower numbers)
169+
)
170+
.map(item => ({
171+
number: item.number,
172+
title: item.title,
173+
body: item.body,
174+
url: item.html_url,
175+
state: item.state,
176+
createdAt: item.created_at,
177+
updatedAt: item.updated_at,
178+
comments: item.comments,
179+
labels: item.labels.map(l => l.name)
180+
}));
181+
182+
console.log(`Found ${similarIssues.length} issues with matching integration labels`);
183+
console.log('Raw similar issues:', JSON.stringify(similarIssues.slice(0, 3), null, 2));
184+
185+
if (similarIssues.length === 0) {
186+
console.log('No similar issues found, setting has_similar to false');
187+
core.setOutput('has_similar', 'false');
188+
return;
189+
}
190+
191+
console.log('Similar issues found, setting has_similar to true');
192+
core.setOutput('has_similar', 'true');
193+
194+
// Clean the issue data to prevent JSON parsing issues
195+
const cleanedIssues = similarIssues.slice(0, 15).map(item => {
196+
// Handle body with improved truncation and null handling
197+
let cleanBody = '';
198+
if (item.body && typeof item.body === 'string') {
199+
// Remove control characters
200+
const cleaned = item.body.replace(/[\u0000-\u001F\u007F-\u009F]/g, '');
201+
// Truncate to 1000 characters and add ellipsis if needed
202+
cleanBody = cleaned.length > 1000
203+
? cleaned.substring(0, 1000) + '...'
204+
: cleaned;
205+
}
206+
207+
return {
208+
number: item.number,
209+
title: item.title.replace(/[\u0000-\u001F\u007F-\u009F]/g, ''), // Remove control characters
210+
body: cleanBody,
211+
url: item.url,
212+
state: item.state,
213+
createdAt: item.createdAt,
214+
updatedAt: item.updatedAt,
215+
comments: item.comments,
216+
labels: item.labels
217+
};
218+
});
219+
220+
console.log(`Cleaned issues count: ${cleanedIssues.length}`);
221+
console.log('First cleaned issue:', JSON.stringify(cleanedIssues[0], null, 2));
222+
223+
core.setOutput('similar_issues', JSON.stringify(cleanedIssues));
224+
225+
- name: Detect duplicates using AI
226+
id: ai_detection
227+
if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true'
228+
uses: actions/[email protected]
229+
with:
230+
model: openai/gpt-4o-mini
231+
system-prompt: |
232+
You are a Home Assistant issue duplicate detector. Your task is to identify potential duplicate issues based on their content.
233+
234+
Important considerations:
235+
- Open issues are more relevant than closed ones for duplicate detection
236+
- Recently updated issues may indicate ongoing work or discussion
237+
- Issues with more comments are generally more relevant and active
238+
- Higher comment count often indicates community engagement and importance
239+
- Older closed issues might be resolved differently than newer approaches
240+
- Consider the time between issues - very old issues may have different contexts
241+
242+
Rules:
243+
1. Compare the current issue with the provided similar issues
244+
2. Look for issues that report the same problem or request the same functionality
245+
3. Consider different wording but same underlying issue as duplicates
246+
4. For CLOSED issues, only mark as duplicate if they describe the EXACT same problem
247+
5. For OPEN issues, use a lower threshold (70%+ similarity)
248+
6. Prioritize issues with higher comment counts as they indicate more activity/relevance
249+
7. Return ONLY a JSON array of issue numbers that are potential duplicates
250+
8. If no duplicates are found, return an empty array: []
251+
9. Maximum 5 potential duplicates, prioritize open issues with comments
252+
10. Consider the age of issues - prefer recent duplicates over very old ones
253+
254+
Example response format:
255+
[1234, 5678, 9012]
256+
257+
prompt: |
258+
Current issue (just created):
259+
Title: ${{ steps.extract.outputs.current_title }}
260+
Body: ${{ steps.extract.outputs.current_body }}
261+
262+
Similar issues to compare against (each includes state, creation date, last update, and comment count):
263+
${{ steps.fetch_similar.outputs.similar_issues }}
264+
265+
Analyze these issues and identify which ones are potential duplicates of the current issue. Consider their state (open/closed), how recently they were updated, and their comment count (higher = more relevant).
266+
267+
max-tokens: 100
268+
269+
- name: Post duplicate detection results
270+
id: post_results
271+
if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true'
272+
uses: actions/[email protected]
273+
env:
274+
AI_RESPONSE: ${{ steps.ai_detection.outputs.response }}
275+
SIMILAR_ISSUES: ${{ steps.fetch_similar.outputs.similar_issues }}
276+
with:
277+
script: |
278+
const aiResponse = process.env.AI_RESPONSE;
279+
280+
console.log('Raw AI response:', JSON.stringify(aiResponse));
281+
282+
let duplicateNumbers = [];
283+
try {
284+
// Clean the response of any potential control characters
285+
const cleanResponse = aiResponse.trim().replace(/[\u0000-\u001F\u007F-\u009F]/g, '');
286+
console.log('Cleaned AI response:', cleanResponse);
287+
288+
duplicateNumbers = JSON.parse(cleanResponse);
289+
290+
// Ensure it's an array and contains only numbers
291+
if (!Array.isArray(duplicateNumbers)) {
292+
console.log('AI response is not an array, trying to extract numbers');
293+
const numberMatches = cleanResponse.match(/\d+/g);
294+
duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : [];
295+
}
296+
297+
// Filter to only valid numbers
298+
duplicateNumbers = duplicateNumbers.filter(n => typeof n === 'number' && !isNaN(n));
299+
300+
} catch (error) {
301+
console.log('Failed to parse AI response as JSON:', error.message);
302+
console.log('Raw response:', aiResponse);
303+
304+
// Fallback: try to extract numbers from the response
305+
const numberMatches = aiResponse.match(/\d+/g);
306+
duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : [];
307+
console.log('Extracted numbers as fallback:', duplicateNumbers);
308+
}
309+
310+
if (!Array.isArray(duplicateNumbers) || duplicateNumbers.length === 0) {
311+
console.log('No duplicates detected by AI');
312+
return;
313+
}
314+
315+
console.log(`AI detected ${duplicateNumbers.length} potential duplicates: ${duplicateNumbers.join(', ')}`);
316+
317+
// Get details of detected duplicates
318+
const similarIssues = JSON.parse(process.env.SIMILAR_ISSUES);
319+
const duplicates = similarIssues.filter(issue => duplicateNumbers.includes(issue.number));
320+
321+
if (duplicates.length === 0) {
322+
console.log('No matching issues found for detected numbers');
323+
return;
324+
}
325+
326+
// Create comment with duplicate detection results
327+
const duplicateLinks = duplicates.map(issue => `- [#${issue.number}: ${issue.title}](${issue.url})`).join('\n');
328+
329+
const commentBody = [
330+
'<!-- workflow: detect-duplicate-issues -->',
331+
'### 🔍 **Potential duplicate detection**',
332+
'',
333+
'I\'ve analyzed similar issues and found the following potential duplicates:',
334+
'',
335+
duplicateLinks,
336+
'',
337+
'**What to do next:**',
338+
'1. Please review these issues to see if they match your issue',
339+
'2. If you find an existing issue that covers your problem:',
340+
' - Consider closing this issue',
341+
' - Add your findings or 👍 on the existing issue instead',
342+
'3. If your issue is different or adds new aspects, please clarify how it differs',
343+
'',
344+
'This helps keep our issues organized and ensures similar issues are consolidated for better visibility.',
345+
'',
346+
'*This message was generated automatically by our duplicate detection system.*'
347+
].join('\n');
348+
349+
try {
350+
await github.rest.issues.createComment({
351+
owner: context.repo.owner,
352+
repo: context.repo.repo,
353+
issue_number: context.payload.issue.number,
354+
body: commentBody
355+
});
356+
357+
console.log(`Posted duplicate detection comment with ${duplicates.length} potential duplicates`);
358+
359+
// Add the potential-duplicate label
360+
await github.rest.issues.addLabels({
361+
owner: context.repo.owner,
362+
repo: context.repo.repo,
363+
issue_number: context.payload.issue.number,
364+
labels: ['potential-duplicate']
365+
});
366+
367+
console.log('Added potential-duplicate label to the issue');
368+
} catch (error) {
369+
core.error('Failed to post duplicate detection comment or add label:', error.message);
370+
if (error.status === 403) {
371+
core.error('Permission denied or rate limit exceeded');
372+
}
373+
// Don't throw - we've done the analysis, just couldn't post the result
374+
}

0 commit comments

Comments
 (0)