1+ ---
2+ name : Detect Duplicate Issues
3+
4+ on :
5+ issues :
6+ types :
7+ - opened
8+
9+ permissions :
10+ contents : read
11+ issues : write
12+ models : read
13+
14+ jobs :
15+ detect-duplicates :
16+ runs-on : ubuntu-latest
17+ steps :
18+ - name : Detect potential duplicate issues
19+ uses : actions/github-script@v7
20+ with :
21+ script : |
22+ const { owner, repo } = context.repo;
23+ const issueNumber = context.issue.number;
24+
25+ // Get the newly created issue
26+ const { data: newIssue } = await github.rest.issues.get({
27+ owner,
28+ repo,
29+ issue_number: issueNumber,
30+ });
31+
32+ // Skip if the issue is a pull request
33+ if (newIssue.pull_request) {
34+ console.log('Skipping pull request');
35+ return;
36+ }
37+
38+ console.log('Analyzing issue #' + issueNumber + ': "' + newIssue.title + '"');
39+
40+ // Get existing open issues (excluding the current one)
41+ const { data: existingIssues } = await github.rest.issues.listForRepo({
42+ owner,
43+ repo,
44+ state: 'open',
45+ per_page: 100,
46+ });
47+
48+ // Filter out pull requests and the current issue
49+ const openIssues = existingIssues.filter(issue =>
50+ !issue.pull_request && issue.number !== issueNumber
51+ );
52+
53+ console.log('Found ' + openIssues.length + ' existing open issues to compare against');
54+
55+ if (openIssues.length === 0) {
56+ console.log('No existing issues to compare against');
57+ return;
58+ }
59+
60+ // Use GitHub Models to find potential duplicates
61+ const duplicates = [];
62+
63+ if (openIssues.length === 0) {
64+ console.log('No existing issues to compare against');
65+ return;
66+ }
67+
68+ console.log('Analyzing ' + openIssues.length + ' existing issues for potential duplicates');
69+
70+ try {
71+ // Helper function to safely escape content for prompts
72+ function sanitizeContent(content) {
73+ if (!content) return 'No description provided';
74+ return content.replace(/[`'"\\]/g, ' ').slice(0, 500); // Limit length and escape problematic chars
75+ }
76+
77+ // Helper function to retry AI calls with exponential backoff
78+ async function retryApiCall(apiCallFn, maxRetries = 2) {
79+ for (let attempt = 0; attempt <= maxRetries; attempt++) {
80+ try {
81+ const response = await apiCallFn();
82+ if (response.ok) return response;
83+
84+ if (attempt < maxRetries) {
85+ const delay = Math.pow(2, attempt) * 1000; // 1s, 2s, 4s delays
86+ console.log('API call failed, retrying in ' + delay + 'ms (attempt ' + (attempt + 1) + '/' + (maxRetries + 1) + ')');
87+ await new Promise(resolve => setTimeout(resolve, delay));
88+ } else {
89+ return response; // Return the failed response on final attempt
90+ }
91+ } catch (error) {
92+ if (attempt === maxRetries) throw error;
93+ const delay = Math.pow(2, attempt) * 1000;
94+ console.log('API call error, retrying in ' + delay + 'ms: ' + error.message);
95+ await new Promise(resolve => setTimeout(resolve, delay));
96+ }
97+ }
98+ }
99+
100+ // Limit the number of issues to analyze to prevent token overflow
101+ const maxIssuesForAnalysis = Math.min(openIssues.length, 50); // Limit to 50 issues max
102+ const issuesToAnalyze = openIssues.slice(0, maxIssuesForAnalysis);
103+
104+ if (issuesToAnalyze.length < openIssues.length) {
105+ console.log('Limiting analysis to ' + maxIssuesForAnalysis + ' most recent issues (out of ' + openIssues.length + ' total)');
106+ }
107+
108+ // Step 1: Send issue titles and numbers to get top 5 candidates
109+ let titlePrompt = 'Analyze this NEW ISSUE against EXISTING ISSUES and identify the top 5 most similar ones:\n\n';
110+ titlePrompt += 'NEW ISSUE:\n';
111+ titlePrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
112+ titlePrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
113+ titlePrompt += 'EXISTING ISSUES:\n';
114+
115+ issuesToAnalyze.forEach((issue, index) => {
116+ titlePrompt += (index + 1) + '. Issue #' + issue.number + ' - ' + sanitizeContent(issue.title) + '\n';
117+ });
118+
119+ titlePrompt += '\nRespond with a JSON object containing the top 5 most similar issues. Format: {"similar_issues": [{"rank": 1, "issue_number": 123, "similarity": "high|medium"}, ...]}';
120+
121+ const titleResponse = await retryApiCall(() =>
122+ fetch('https://models.inference.ai.azure.com/chat/completions', {
123+ method: 'POST',
124+ headers: {
125+ 'Authorization': 'Bearer ' + github.token,
126+ 'Content-Type': 'application/json',
127+ },
128+ body: JSON.stringify({
129+ messages: [
130+ {
131+ role: 'system',
132+ content: 'You are an expert at analyzing GitHub issues to detect duplicates. Compare issue titles and descriptions to identify the most similar ones. Respond only with valid JSON containing the top 5 most similar issues ranked by relevance. Use "high" for likely duplicates and "medium" for related issues.'
133+ },
134+ {
135+ role: 'user',
136+ content: titlePrompt
137+ }
138+ ],
139+ model: 'gpt-4o-mini',
140+ temperature: 0.1,
141+ max_tokens: 200
142+ })
143+ })
144+ );
145+
146+ if (!titleResponse.ok) {
147+ const errorText = await titleResponse.text();
148+ console.log('First AI call failed after retries: ' + titleResponse.status + ' - ' + errorText);
149+ return;
150+ }
151+
152+ const titleResult = await titleResponse.json();
153+ const titleAnalysis = titleResult.choices[0]?.message?.content?.trim();
154+ console.log('AI title analysis result: ' + titleAnalysis);
155+
156+ // Parse JSON response to get top 5 candidates
157+ let candidateIssueNumbers = [];
158+ try {
159+ const jsonMatch = titleAnalysis.match(/\{.*\}/s);
160+ if (jsonMatch) {
161+ const jsonData = JSON.parse(jsonMatch[0]);
162+ candidateIssueNumbers = jsonData.similar_issues || [];
163+ }
164+ } catch (parseError) {
165+ console.log('Failed to parse JSON response, falling back to number extraction');
166+ // Fallback: extract issue numbers from response
167+ const numberMatches = titleAnalysis.match(/#(\d+)/g);
168+ if (numberMatches) {
169+ candidateIssueNumbers = numberMatches.slice(0, 5).map(match => ({
170+ issue_number: parseInt(match.replace('#', '')),
171+ similarity: 'medium'
172+ }));
173+ }
174+ }
175+
176+ if (candidateIssueNumbers.length === 0) {
177+ console.log('No candidate issues identified in first pass');
178+ return;
179+ }
180+
181+ console.log('Found ' + candidateIssueNumbers.length + ' candidate issues from title analysis');
182+
183+ // Step 2: Get full details for top candidates and do detailed analysis
184+ const candidateIssues = [];
185+ for (const candidate of candidateIssueNumbers) {
186+ const issue = openIssues.find(i => i.number === candidate.issue_number);
187+ if (issue) {
188+ candidateIssues.push({
189+ issue,
190+ initialSimilarity: candidate.similarity
191+ });
192+ }
193+ }
194+
195+ if (candidateIssues.length === 0) {
196+ console.log('No valid candidate issues found');
197+ return;
198+ }
199+
200+ // Step 3: Detailed analysis with full issue bodies
201+ let detailPrompt = 'Perform detailed comparison of this NEW ISSUE against the TOP CANDIDATE ISSUES:\n\n';
202+ detailPrompt += 'NEW ISSUE:\n';
203+ detailPrompt += 'Title: ' + sanitizeContent(newIssue.title) + '\n';
204+ detailPrompt += 'Body: ' + sanitizeContent(newIssue.body) + '\n\n';
205+ detailPrompt += 'CANDIDATE ISSUES FOR DETAILED ANALYSIS:\n';
206+
207+ candidateIssues.forEach((candidate, index) => {
208+ detailPrompt += (index + 1) + '. Issue #' + candidate.issue.number + '\n';
209+ detailPrompt += ' Title: ' + sanitizeContent(candidate.issue.title) + '\n';
210+ detailPrompt += ' Body: ' + sanitizeContent(candidate.issue.body) + '\n\n';
211+ });
212+
213+ detailPrompt += 'Respond with JSON format: {"duplicates": [{"issue_number": 123, "classification": "DUPLICATE|SIMILAR|DIFFERENT", "reason": "brief explanation"}]}';
214+
215+ const detailResponse = await retryApiCall(() =>
216+ fetch('https://models.inference.ai.azure.com/chat/completions', {
217+ method: 'POST',
218+ headers: {
219+ 'Authorization': 'Bearer ' + github.token,
220+ 'Content-Type': 'application/json',
221+ },
222+ body: JSON.stringify({
223+ messages: [
224+ {
225+ role: 'system',
226+ content: 'You are an expert at analyzing GitHub issues for duplicates. Compare the full content and determine: DUPLICATE (same core problem), SIMILAR (related but different aspects), or DIFFERENT (unrelated). Respond only with valid JSON.'
227+ },
228+ {
229+ role: 'user',
230+ content: detailPrompt
231+ }
232+ ],
233+ model: 'gpt-4o-mini',
234+ temperature: 0.1,
235+ max_tokens: 300
236+ })
237+ })
238+ );
239+
240+ if (detailResponse.ok) {
241+ const detailResult = await detailResponse.json();
242+ const detailAnalysis = detailResult.choices[0]?.message?.content?.trim();
243+ console.log('AI detailed analysis result: ' + detailAnalysis);
244+
245+ // Parse detailed analysis JSON
246+ try {
247+ const jsonMatch = detailAnalysis.match(/\{.*\}/s);
248+ if (jsonMatch) {
249+ const jsonData = JSON.parse(jsonMatch[0]);
250+ const results = jsonData.duplicates || [];
251+
252+ for (const result of results) {
253+ if (result.classification === 'DUPLICATE' || result.classification === 'SIMILAR') {
254+ const issue = candidateIssues.find(c => c.issue.number === result.issue_number)?.issue;
255+ if (issue) {
256+ duplicates.push({
257+ issue,
258+ similarity: result.classification === 'DUPLICATE' ? 'high' : 'medium'
259+ });
260+ console.log('Found ' + result.classification.toLowerCase() + ' issue: #' + issue.number + ' - ' + issue.title);
261+ }
262+ }
263+ }
264+ }
265+ } catch (parseError) {
266+ console.log('Failed to parse detailed analysis JSON, using fallback');
267+ // Fallback: look for DUPLICATE/SIMILAR mentions
268+ candidateIssues.forEach(candidate => {
269+ const issueRef = '#' + candidate.issue.number;
270+ if (detailAnalysis.includes(issueRef) &&
271+ (detailAnalysis.includes('DUPLICATE') || detailAnalysis.includes('SIMILAR'))) {
272+ duplicates.push({
273+ issue: candidate.issue,
274+ similarity: detailAnalysis.includes('DUPLICATE') ? 'high' : 'medium'
275+ });
276+ console.log('Found similar issue (fallback): #' + candidate.issue.number + ' - ' + candidate.issue.title);
277+ }
278+ });
279+ }
280+ } else {
281+ const errorText = await detailResponse.text();
282+ console.log('Detailed analysis failed after retries: ' + detailResponse.status + ' - ' + errorText);
283+ }
284+
285+ } catch (error) {
286+ console.log('Error in AI analysis: ' + error.message);
287+ }
288+
289+ // Post comment if duplicates found
290+ if (duplicates.length > 0) {
291+ const highPriority = duplicates.filter(d => d.similarity === 'high');
292+ const mediumPriority = duplicates.filter(d => d.similarity === 'medium');
293+
294+ let commentBody = '👋 **Potential duplicate issues detected**\n\n';
295+ commentBody += 'This issue appears to be similar to existing open issues:\n\n';
296+
297+ if (highPriority.length > 0) {
298+ commentBody += '### 🚨 Likely Duplicates\n';
299+ for (const { issue } of highPriority) {
300+ commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
301+ }
302+ commentBody += '\n';
303+ }
304+
305+ if (mediumPriority.length > 0) {
306+ commentBody += '### 🔍 Similar Issues\n';
307+ for (const { issue } of mediumPriority) {
308+ commentBody += '- #' + issue.number + ' - [' + issue.title + '](' + issue.html_url + ')\n';
309+ }
310+ commentBody += '\n';
311+ }
312+
313+ commentBody += 'Please review these issues to see if your issue is already covered. ';
314+ commentBody += 'If this is indeed a duplicate, consider closing this issue and contributing to the existing discussion.\n\n';
315+ commentBody += '---\n';
316+ commentBody += '*This comment was automatically generated using AI to help identify potential duplicates.*';
317+
318+ await github.rest.issues.createComment({
319+ owner,
320+ repo,
321+ issue_number: issueNumber,
322+ body: commentBody,
323+ });
324+
325+ console.log('Posted comment with ' + duplicates.length + ' potential duplicate(s)');
326+ } else {
327+ console.log('No potential duplicates found');
328+ }
0 commit comments