|
| 1 | +name: Auto-detect duplicate issues |
| 2 | + |
| 3 | +# yamllint disable-line rule:truthy |
| 4 | +on: |
| 5 | + issues: |
| 6 | + types: [labeled] |
| 7 | + |
| 8 | +permissions: |
| 9 | + issues: write |
| 10 | + models: read |
| 11 | + |
| 12 | +jobs: |
| 13 | + detect-duplicates: |
| 14 | + runs-on: ubuntu-latest |
| 15 | + |
| 16 | + steps: |
| 17 | + - name: Check if integration label was added and extract details |
| 18 | + id: extract |
| 19 | + |
| 20 | + with: |
| 21 | + script: | |
| 22 | + // Debug: Log the event payload |
| 23 | + console.log('Event name:', context.eventName); |
| 24 | + console.log('Event action:', context.payload.action); |
| 25 | + console.log('Event payload keys:', Object.keys(context.payload)); |
| 26 | +
|
| 27 | + // Check the specific label that was added |
| 28 | + const addedLabel = context.payload.label; |
| 29 | + if (!addedLabel) { |
| 30 | + console.log('No label found in labeled event payload'); |
| 31 | + core.setOutput('should_continue', 'false'); |
| 32 | + return; |
| 33 | + } |
| 34 | +
|
| 35 | + console.log(`Label added: ${addedLabel.name}`); |
| 36 | +
|
| 37 | + if (!addedLabel.name.startsWith('integration:')) { |
| 38 | + console.log('Added label is not an integration label, skipping duplicate detection'); |
| 39 | + core.setOutput('should_continue', 'false'); |
| 40 | + return; |
| 41 | + } |
| 42 | +
|
| 43 | + console.log(`Integration label added: ${addedLabel.name}`); |
| 44 | +
|
| 45 | + let currentIssue; |
| 46 | + let integrationLabels = []; |
| 47 | +
|
| 48 | + try { |
| 49 | + const issue = await github.rest.issues.get({ |
| 50 | + owner: context.repo.owner, |
| 51 | + repo: context.repo.repo, |
| 52 | + issue_number: context.payload.issue.number |
| 53 | + }); |
| 54 | +
|
| 55 | + currentIssue = issue.data; |
| 56 | +
|
| 57 | + // Check if potential-duplicate label already exists |
| 58 | + const hasPotentialDuplicateLabel = currentIssue.labels |
| 59 | + .some(label => label.name === 'potential-duplicate'); |
| 60 | +
|
| 61 | + if (hasPotentialDuplicateLabel) { |
| 62 | + console.log('Issue already has potential-duplicate label, skipping duplicate detection'); |
| 63 | + core.setOutput('should_continue', 'false'); |
| 64 | + return; |
| 65 | + } |
| 66 | +
|
| 67 | + integrationLabels = currentIssue.labels |
| 68 | + .filter(label => label.name.startsWith('integration:')) |
| 69 | + .map(label => label.name); |
| 70 | + } catch (error) { |
| 71 | + core.error(`Failed to fetch issue #${context.payload.issue.number}:`, error.message); |
| 72 | + core.setOutput('should_continue', 'false'); |
| 73 | + return; |
| 74 | + } |
| 75 | +
|
| 76 | + // Check if we've already posted a duplicate detection comment recently |
| 77 | + let comments; |
| 78 | + try { |
| 79 | + comments = await github.rest.issues.listComments({ |
| 80 | + owner: context.repo.owner, |
| 81 | + repo: context.repo.repo, |
| 82 | + issue_number: context.payload.issue.number, |
| 83 | + per_page: 10 |
| 84 | + }); |
| 85 | + } catch (error) { |
| 86 | + core.error('Failed to fetch comments:', error.message); |
| 87 | + // Continue anyway, worst case we might post a duplicate comment |
| 88 | + comments = { data: [] }; |
| 89 | + } |
| 90 | +
|
| 91 | + // Check if we've already posted a duplicate detection comment |
| 92 | + const recentDuplicateComment = comments.data.find(comment => |
| 93 | + comment.user && comment.user.login === 'github-actions[bot]' && |
| 94 | + comment.body.includes('<!-- workflow: detect-duplicate-issues -->') |
| 95 | + ); |
| 96 | +
|
| 97 | + if (recentDuplicateComment) { |
| 98 | + console.log('Already posted duplicate detection comment, skipping'); |
| 99 | + core.setOutput('should_continue', 'false'); |
| 100 | + return; |
| 101 | + } |
| 102 | +
|
| 103 | + core.setOutput('should_continue', 'true'); |
| 104 | + core.setOutput('current_number', currentIssue.number); |
| 105 | + core.setOutput('current_title', currentIssue.title); |
| 106 | + core.setOutput('current_body', currentIssue.body); |
| 107 | + core.setOutput('current_url', currentIssue.html_url); |
| 108 | + core.setOutput('integration_labels', JSON.stringify(integrationLabels)); |
| 109 | +
|
| 110 | + console.log(`Current issue: #${currentIssue.number}`); |
| 111 | + console.log(`Integration labels: ${integrationLabels.join(', ')}`); |
| 112 | +
|
| 113 | + - name: Fetch similar issues |
| 114 | + id: fetch_similar |
| 115 | + if: steps.extract.outputs.should_continue == 'true' |
| 116 | + |
| 117 | + env: |
| 118 | + INTEGRATION_LABELS: ${{ steps.extract.outputs.integration_labels }} |
| 119 | + CURRENT_NUMBER: ${{ steps.extract.outputs.current_number }} |
| 120 | + with: |
| 121 | + script: | |
| 122 | + const integrationLabels = JSON.parse(process.env.INTEGRATION_LABELS); |
| 123 | + const currentNumber = parseInt(process.env.CURRENT_NUMBER); |
| 124 | +
|
| 125 | + if (integrationLabels.length === 0) { |
| 126 | + console.log('No integration labels found, skipping duplicate detection'); |
| 127 | + core.setOutput('has_similar', 'false'); |
| 128 | + return; |
| 129 | + } |
| 130 | +
|
| 131 | + // Use GitHub search API to find issues with matching integration labels |
| 132 | + console.log(`Searching for issues with integration labels: ${integrationLabels.join(', ')}`); |
| 133 | +
|
| 134 | + // Build search query for issues with any of the current integration labels |
| 135 | + const labelQueries = integrationLabels.map(label => `label:"${label}"`); |
| 136 | + let searchQuery; |
| 137 | +
|
| 138 | + if (labelQueries.length === 1) { |
| 139 | + searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue ${labelQueries[0]}`; |
| 140 | + } else { |
| 141 | + searchQuery = `repo:${context.repo.owner}/${context.repo.repo} is:issue (${labelQueries.join(' OR ')})`; |
| 142 | + } |
| 143 | +
|
| 144 | + console.log(`Search query: ${searchQuery}`); |
| 145 | +
|
| 146 | + let result; |
| 147 | + try { |
| 148 | + result = await github.rest.search.issuesAndPullRequests({ |
| 149 | + q: searchQuery, |
| 150 | + per_page: 15, |
| 151 | + sort: 'updated', |
| 152 | + order: 'desc' |
| 153 | + }); |
| 154 | + } catch (error) { |
| 155 | + core.error('Failed to search for similar issues:', error.message); |
| 156 | + if (error.status === 403 && error.message.includes('rate limit')) { |
| 157 | + core.error('GitHub API rate limit exceeded'); |
| 158 | + } |
| 159 | + core.setOutput('has_similar', 'false'); |
| 160 | + return; |
| 161 | + } |
| 162 | +
|
| 163 | + // Filter out the current issue, pull requests, and newer issues (higher numbers) |
| 164 | + const similarIssues = result.data.items |
| 165 | + .filter(item => |
| 166 | + item.number !== currentNumber && |
| 167 | + !item.pull_request && |
| 168 | + item.number < currentNumber // Only include older issues (lower numbers) |
| 169 | + ) |
| 170 | + .map(item => ({ |
| 171 | + number: item.number, |
| 172 | + title: item.title, |
| 173 | + body: item.body, |
| 174 | + url: item.html_url, |
| 175 | + state: item.state, |
| 176 | + createdAt: item.created_at, |
| 177 | + updatedAt: item.updated_at, |
| 178 | + comments: item.comments, |
| 179 | + labels: item.labels.map(l => l.name) |
| 180 | + })); |
| 181 | +
|
| 182 | + console.log(`Found ${similarIssues.length} issues with matching integration labels`); |
| 183 | + console.log('Raw similar issues:', JSON.stringify(similarIssues.slice(0, 3), null, 2)); |
| 184 | +
|
| 185 | + if (similarIssues.length === 0) { |
| 186 | + console.log('No similar issues found, setting has_similar to false'); |
| 187 | + core.setOutput('has_similar', 'false'); |
| 188 | + return; |
| 189 | + } |
| 190 | +
|
| 191 | + console.log('Similar issues found, setting has_similar to true'); |
| 192 | + core.setOutput('has_similar', 'true'); |
| 193 | +
|
| 194 | + // Clean the issue data to prevent JSON parsing issues |
| 195 | + const cleanedIssues = similarIssues.slice(0, 15).map(item => { |
| 196 | + // Handle body with improved truncation and null handling |
| 197 | + let cleanBody = ''; |
| 198 | + if (item.body && typeof item.body === 'string') { |
| 199 | + // Remove control characters |
| 200 | + const cleaned = item.body.replace(/[\u0000-\u001F\u007F-\u009F]/g, ''); |
| 201 | + // Truncate to 1000 characters and add ellipsis if needed |
| 202 | + cleanBody = cleaned.length > 1000 |
| 203 | + ? cleaned.substring(0, 1000) + '...' |
| 204 | + : cleaned; |
| 205 | + } |
| 206 | +
|
| 207 | + return { |
| 208 | + number: item.number, |
| 209 | + title: item.title.replace(/[\u0000-\u001F\u007F-\u009F]/g, ''), // Remove control characters |
| 210 | + body: cleanBody, |
| 211 | + url: item.url, |
| 212 | + state: item.state, |
| 213 | + createdAt: item.createdAt, |
| 214 | + updatedAt: item.updatedAt, |
| 215 | + comments: item.comments, |
| 216 | + labels: item.labels |
| 217 | + }; |
| 218 | + }); |
| 219 | +
|
| 220 | + console.log(`Cleaned issues count: ${cleanedIssues.length}`); |
| 221 | + console.log('First cleaned issue:', JSON.stringify(cleanedIssues[0], null, 2)); |
| 222 | +
|
| 223 | + core.setOutput('similar_issues', JSON.stringify(cleanedIssues)); |
| 224 | +
|
| 225 | + - name: Detect duplicates using AI |
| 226 | + id: ai_detection |
| 227 | + if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true' |
| 228 | + |
| 229 | + with: |
| 230 | + model: openai/gpt-4o-mini |
| 231 | + system-prompt: | |
| 232 | + You are a Home Assistant issue duplicate detector. Your task is to identify potential duplicate issues based on their content. |
| 233 | +
|
| 234 | + Important considerations: |
| 235 | + - Open issues are more relevant than closed ones for duplicate detection |
| 236 | + - Recently updated issues may indicate ongoing work or discussion |
| 237 | + - Issues with more comments are generally more relevant and active |
| 238 | + - Higher comment count often indicates community engagement and importance |
| 239 | + - Older closed issues might be resolved differently than newer approaches |
| 240 | + - Consider the time between issues - very old issues may have different contexts |
| 241 | +
|
| 242 | + Rules: |
| 243 | + 1. Compare the current issue with the provided similar issues |
| 244 | + 2. Look for issues that report the same problem or request the same functionality |
| 245 | + 3. Consider different wording but same underlying issue as duplicates |
| 246 | + 4. For CLOSED issues, only mark as duplicate if they describe the EXACT same problem |
| 247 | + 5. For OPEN issues, use a lower threshold (70%+ similarity) |
| 248 | + 6. Prioritize issues with higher comment counts as they indicate more activity/relevance |
| 249 | + 7. Return ONLY a JSON array of issue numbers that are potential duplicates |
| 250 | + 8. If no duplicates are found, return an empty array: [] |
| 251 | + 9. Maximum 5 potential duplicates, prioritize open issues with comments |
| 252 | + 10. Consider the age of issues - prefer recent duplicates over very old ones |
| 253 | +
|
| 254 | + Example response format: |
| 255 | + [1234, 5678, 9012] |
| 256 | +
|
| 257 | + prompt: | |
| 258 | + Current issue (just created): |
| 259 | + Title: ${{ steps.extract.outputs.current_title }} |
| 260 | + Body: ${{ steps.extract.outputs.current_body }} |
| 261 | +
|
| 262 | + Similar issues to compare against (each includes state, creation date, last update, and comment count): |
| 263 | + ${{ steps.fetch_similar.outputs.similar_issues }} |
| 264 | +
|
| 265 | + Analyze these issues and identify which ones are potential duplicates of the current issue. Consider their state (open/closed), how recently they were updated, and their comment count (higher = more relevant). |
| 266 | +
|
| 267 | + max-tokens: 100 |
| 268 | + |
| 269 | + - name: Post duplicate detection results |
| 270 | + id: post_results |
| 271 | + if: steps.extract.outputs.should_continue == 'true' && steps.fetch_similar.outputs.has_similar == 'true' |
| 272 | + |
| 273 | + env: |
| 274 | + AI_RESPONSE: ${{ steps.ai_detection.outputs.response }} |
| 275 | + SIMILAR_ISSUES: ${{ steps.fetch_similar.outputs.similar_issues }} |
| 276 | + with: |
| 277 | + script: | |
| 278 | + const aiResponse = process.env.AI_RESPONSE; |
| 279 | +
|
| 280 | + console.log('Raw AI response:', JSON.stringify(aiResponse)); |
| 281 | +
|
| 282 | + let duplicateNumbers = []; |
| 283 | + try { |
| 284 | + // Clean the response of any potential control characters |
| 285 | + const cleanResponse = aiResponse.trim().replace(/[\u0000-\u001F\u007F-\u009F]/g, ''); |
| 286 | + console.log('Cleaned AI response:', cleanResponse); |
| 287 | +
|
| 288 | + duplicateNumbers = JSON.parse(cleanResponse); |
| 289 | +
|
| 290 | + // Ensure it's an array and contains only numbers |
| 291 | + if (!Array.isArray(duplicateNumbers)) { |
| 292 | + console.log('AI response is not an array, trying to extract numbers'); |
| 293 | + const numberMatches = cleanResponse.match(/\d+/g); |
| 294 | + duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : []; |
| 295 | + } |
| 296 | +
|
| 297 | + // Filter to only valid numbers |
| 298 | + duplicateNumbers = duplicateNumbers.filter(n => typeof n === 'number' && !isNaN(n)); |
| 299 | +
|
| 300 | + } catch (error) { |
| 301 | + console.log('Failed to parse AI response as JSON:', error.message); |
| 302 | + console.log('Raw response:', aiResponse); |
| 303 | +
|
| 304 | + // Fallback: try to extract numbers from the response |
| 305 | + const numberMatches = aiResponse.match(/\d+/g); |
| 306 | + duplicateNumbers = numberMatches ? numberMatches.map(n => parseInt(n)) : []; |
| 307 | + console.log('Extracted numbers as fallback:', duplicateNumbers); |
| 308 | + } |
| 309 | +
|
| 310 | + if (!Array.isArray(duplicateNumbers) || duplicateNumbers.length === 0) { |
| 311 | + console.log('No duplicates detected by AI'); |
| 312 | + return; |
| 313 | + } |
| 314 | +
|
| 315 | + console.log(`AI detected ${duplicateNumbers.length} potential duplicates: ${duplicateNumbers.join(', ')}`); |
| 316 | +
|
| 317 | + // Get details of detected duplicates |
| 318 | + const similarIssues = JSON.parse(process.env.SIMILAR_ISSUES); |
| 319 | + const duplicates = similarIssues.filter(issue => duplicateNumbers.includes(issue.number)); |
| 320 | +
|
| 321 | + if (duplicates.length === 0) { |
| 322 | + console.log('No matching issues found for detected numbers'); |
| 323 | + return; |
| 324 | + } |
| 325 | +
|
| 326 | + // Create comment with duplicate detection results |
| 327 | + const duplicateLinks = duplicates.map(issue => `- [#${issue.number}: ${issue.title}](${issue.url})`).join('\n'); |
| 328 | +
|
| 329 | + const commentBody = [ |
| 330 | + '<!-- workflow: detect-duplicate-issues -->', |
| 331 | + '### 🔍 **Potential duplicate detection**', |
| 332 | + '', |
| 333 | + 'I\'ve analyzed similar issues and found the following potential duplicates:', |
| 334 | + '', |
| 335 | + duplicateLinks, |
| 336 | + '', |
| 337 | + '**What to do next:**', |
| 338 | + '1. Please review these issues to see if they match your issue', |
| 339 | + '2. If you find an existing issue that covers your problem:', |
| 340 | + ' - Consider closing this issue', |
| 341 | + ' - Add your findings or 👍 on the existing issue instead', |
| 342 | + '3. If your issue is different or adds new aspects, please clarify how it differs', |
| 343 | + '', |
| 344 | + 'This helps keep our issues organized and ensures similar issues are consolidated for better visibility.', |
| 345 | + '', |
| 346 | + '*This message was generated automatically by our duplicate detection system.*' |
| 347 | + ].join('\n'); |
| 348 | +
|
| 349 | + try { |
| 350 | + await github.rest.issues.createComment({ |
| 351 | + owner: context.repo.owner, |
| 352 | + repo: context.repo.repo, |
| 353 | + issue_number: context.payload.issue.number, |
| 354 | + body: commentBody |
| 355 | + }); |
| 356 | +
|
| 357 | + console.log(`Posted duplicate detection comment with ${duplicates.length} potential duplicates`); |
| 358 | +
|
| 359 | + // Add the potential-duplicate label |
| 360 | + await github.rest.issues.addLabels({ |
| 361 | + owner: context.repo.owner, |
| 362 | + repo: context.repo.repo, |
| 363 | + issue_number: context.payload.issue.number, |
| 364 | + labels: ['potential-duplicate'] |
| 365 | + }); |
| 366 | +
|
| 367 | + console.log('Added potential-duplicate label to the issue'); |
| 368 | + } catch (error) { |
| 369 | + core.error('Failed to post duplicate detection comment or add label:', error.message); |
| 370 | + if (error.status === 403) { |
| 371 | + core.error('Permission denied or rate limit exceeded'); |
| 372 | + } |
| 373 | + // Don't throw - we've done the analysis, just couldn't post the result |
| 374 | + } |
0 commit comments