@@ -18,10 +18,7 @@ def run_gh_command(args):
1818 """Run a GitHub CLI command and return the output."""
1919 try :
2020 result = subprocess .run (
21- ["gh" ] + args ,
22- capture_output = True ,
23- text = True ,
24- check = True
21+ ["gh" ] + args , capture_output = True , text = True , check = True
2522 )
2623 return result .stdout
2724 except subprocess .CalledProcessError as e :
@@ -33,128 +30,190 @@ def run_gh_command(args):
3330def get_all_issues (label = "conductor:task" , limit = 200 ):
3431 """Get all issues with the specified label."""
3532 # Get open issues
36- open_issues = run_gh_command ([
37- "issue" , "list" ,
38- "--label" , label ,
39- "--state" , "open" ,
40- "--limit" , str (limit ),
41- "--json" , "number,title,body,labels,state"
42- ])
43-
33+ open_issues = run_gh_command (
34+ [
35+ "issue" ,
36+ "list" ,
37+ "--label" ,
38+ label ,
39+ "--state" ,
40+ "open" ,
41+ "--limit" ,
42+ str (limit ),
43+ "--json" ,
44+ "number,title,body,labels,state" ,
45+ ]
46+ )
47+
4448 # Get closed issues (last 50)
45- closed_issues = run_gh_command ([
46- "issue" , "list" ,
47- "--label" , label ,
48- "--state" , "closed" ,
49- "--limit" , "50" ,
50- "--json" , "number,title,body,labels,state"
51- ])
52-
49+ closed_issues = run_gh_command (
50+ [
51+ "issue" ,
52+ "list" ,
53+ "--label" ,
54+ label ,
55+ "--state" ,
56+ "closed" ,
57+ "--limit" ,
58+ "50" ,
59+ "--json" ,
60+ "number,title,body,labels,state" ,
61+ ]
62+ )
63+
5364 all_issues = []
5465 if open_issues :
5566 all_issues .extend (json .loads (open_issues ))
5667 if closed_issues :
5768 all_issues .extend (json .loads (closed_issues ))
58-
69+
5970 return all_issues
6071
6172
6273def extract_keywords (text ):
6374 """Extract meaningful keywords from text."""
6475 # Remove common words and clean up
6576 stop_words = {
66- 'the' , 'a' , 'an' , 'and' , 'or' , 'but' , 'in' , 'on' , 'at' , 'to' , 'for' ,
67- 'of' , 'with' , 'by' , 'from' , 'up' , 'about' , 'into' , 'through' , 'during' ,
68- 'before' , 'after' , 'above' , 'below' , 'between' , 'under' , 'again' ,
69- 'further' , 'then' , 'once' , 'is' , 'are' , 'was' , 'were' , 'be' , 'been' ,
70- 'being' , 'have' , 'has' , 'had' , 'do' , 'does' , 'did' , 'will' , 'would' ,
71- 'could' , 'should' , 'may' , 'might' , 'must' , 'shall' , 'can' , 'need'
77+ "the" ,
78+ "a" ,
79+ "an" ,
80+ "and" ,
81+ "or" ,
82+ "but" ,
83+ "in" ,
84+ "on" ,
85+ "at" ,
86+ "to" ,
87+ "for" ,
88+ "of" ,
89+ "with" ,
90+ "by" ,
91+ "from" ,
92+ "up" ,
93+ "about" ,
94+ "into" ,
95+ "through" ,
96+ "during" ,
97+ "before" ,
98+ "after" ,
99+ "above" ,
100+ "below" ,
101+ "between" ,
102+ "under" ,
103+ "again" ,
104+ "further" ,
105+ "then" ,
106+ "once" ,
107+ "is" ,
108+ "are" ,
109+ "was" ,
110+ "were" ,
111+ "be" ,
112+ "been" ,
113+ "being" ,
114+ "have" ,
115+ "has" ,
116+ "had" ,
117+ "do" ,
118+ "does" ,
119+ "did" ,
120+ "will" ,
121+ "would" ,
122+ "could" ,
123+ "should" ,
124+ "may" ,
125+ "might" ,
126+ "must" ,
127+ "shall" ,
128+ "can" ,
129+ "need" ,
72130 }
73-
131+
74132 # Convert to lowercase and split
75- words = re .findall (r' \b\w+\b' , text .lower ())
76-
133+ words = re .findall (r" \b\w+\b" , text .lower ())
134+
77135 # Filter out stop words and short words
78136 keywords = [w for w in words if w not in stop_words and len (w ) > 2 ]
79-
137+
80138 return set (keywords )
81139
82140
83141def calculate_similarity (title1 , title2 , body1 = "" , body2 = "" ):
84142 """Calculate similarity between two issues."""
85143 # Title similarity (weighted more heavily)
86144 title_ratio = SequenceMatcher (None , title1 .lower (), title2 .lower ()).ratio ()
87-
145+
88146 # Keyword overlap
89147 keywords1 = extract_keywords (f"{ title1 } { body1 } " )
90148 keywords2 = extract_keywords (f"{ title2 } { body2 } " )
91-
149+
92150 if keywords1 and keywords2 :
93151 overlap = len (keywords1 .intersection (keywords2 ))
94152 total = len (keywords1 .union (keywords2 ))
95153 keyword_ratio = overlap / total if total > 0 else 0
96154 else :
97155 keyword_ratio = 0
98-
156+
99157 # Combined score (title is more important)
100158 combined_score = (title_ratio * 0.7 ) + (keyword_ratio * 0.3 )
101-
159+
102160 return {
103- ' title_similarity' : title_ratio ,
104- ' keyword_overlap' : keyword_ratio ,
105- ' combined_score' : combined_score
161+ " title_similarity" : title_ratio ,
162+ " keyword_overlap" : keyword_ratio ,
163+ " combined_score" : combined_score ,
106164 }
107165
108166
109167def check_for_duplicates (new_title , new_body = "" , threshold = 0.6 ):
110168 """Check if a similar issue already exists."""
111169 print (f"🔍 Checking for duplicates of: '{ new_title } '" )
112170 print ("=" * 80 )
113-
171+
114172 # Get all existing issues
115173 issues = get_all_issues ()
116-
174+
117175 if not issues :
118176 print ("❌ Could not fetch issues from GitHub" )
119177 return []
120-
178+
121179 print (f"📊 Analyzing { len (issues )} existing issues..." )
122-
180+
123181 # Find similar issues
124182 similar_issues = []
125-
183+
126184 for issue in issues :
127185 similarity = calculate_similarity (
128- new_title ,
129- issue ['title' ],
130- new_body ,
131- issue .get ('body' , '' )
186+ new_title , issue ["title" ], new_body , issue .get ("body" , "" )
132187 )
133-
134- if similarity ['combined_score' ] >= threshold :
135- similar_issues .append ({
136- 'issue' : issue ,
137- 'similarity' : similarity
138- })
139-
188+
189+ if similarity ["combined_score" ] >= threshold :
190+ similar_issues .append ({"issue" : issue , "similarity" : similarity })
191+
140192 # Sort by similarity score
141- similar_issues .sort (key = lambda x : x [' similarity' ][ ' combined_score' ], reverse = True )
142-
193+ similar_issues .sort (key = lambda x : x [" similarity" ][ " combined_score" ], reverse = True )
194+
143195 return similar_issues
144196
145197
146198def search_by_keywords (keywords ):
147199 """Search for issues containing specific keywords."""
148200 search_query = " OR " .join (keywords )
149-
150- result = run_gh_command ([
151- "issue" , "list" ,
152- "--search" , search_query ,
153- "--state" , "all" ,
154- "--limit" , "20" ,
155- "--json" , "number,title,state,labels"
156- ])
157-
201+
202+ result = run_gh_command (
203+ [
204+ "issue" ,
205+ "list" ,
206+ "--search" ,
207+ search_query ,
208+ "--state" ,
209+ "all" ,
210+ "--limit" ,
211+ "20" ,
212+ "--json" ,
213+ "number,title,state,labels" ,
214+ ]
215+ )
216+
158217 if result :
159218 return json .loads (result )
160219 return []
@@ -164,71 +223,71 @@ def main():
164223 parser = argparse .ArgumentParser (
165224 description = "Check for duplicate GitHub issues before creating a new one"
166225 )
226+ parser .add_argument ("title" , help = "Title of the issue you want to create" )
167227 parser .add_argument (
168- "title" ,
169- help = "Title of the issue you want to create"
170- )
171- parser .add_argument (
172- "--body" , "-b" ,
173- default = "" ,
174- help = "Body/description of the issue"
228+ "--body" , "-b" , default = "" , help = "Body/description of the issue"
175229 )
176230 parser .add_argument (
177- "--threshold" , "-t" ,
231+ "--threshold" ,
232+ "-t" ,
178233 type = float ,
179234 default = 0.6 ,
180- help = "Similarity threshold (0.0-1.0, default: 0.6)"
235+ help = "Similarity threshold (0.0-1.0, default: 0.6)" ,
181236 )
182237 parser .add_argument (
183- "--keywords" , "-k" ,
184- nargs = "+" ,
185- help = "Additional keywords to search for"
238+ "--keywords" , "-k" , nargs = "+" , help = "Additional keywords to search for"
186239 )
187-
240+
188241 args = parser .parse_args ()
189-
242+
190243 # Check for duplicates
191244 similar_issues = check_for_duplicates (args .title , args .body , args .threshold )
192-
245+
193246 if similar_issues :
194247 print ("\n ⚠️ Found potentially similar issues:" )
195248 print ("-" * 80 )
196-
249+
197250 for item in similar_issues :
198- issue = item [' issue' ]
199- sim = item [' similarity' ]
200-
201- state_icon = "🟢" if issue [' state' ] == "OPEN" else "🔴"
251+ issue = item [" issue" ]
252+ sim = item [" similarity" ]
253+
254+ state_icon = "🟢" if issue [" state" ] == "OPEN" else "🔴"
202255 print (f"\n { state_icon } #{ issue ['number' ]} : { issue ['title' ]} " )
203- print (f" Similarity: { sim ['combined_score' ]:.1%} " +
204- f"(title: { sim ['title_similarity' ]:.1%} , " +
205- f"keywords: { sim ['keyword_overlap' ]:.1%} )" )
206-
256+ print (
257+ f" Similarity: { sim ['combined_score' ]:.1%} "
258+ + f"(title: { sim ['title_similarity' ]:.1%} , "
259+ + f"keywords: { sim ['keyword_overlap' ]:.1%} )"
260+ )
261+
207262 # Show labels
208- labels = [label [' name' ] for label in issue .get (' labels' , [])]
263+ labels = [label [" name" ] for label in issue .get (" labels" , [])]
209264 if labels :
210265 print (f" Labels: { ', ' .join (labels )} " )
211-
266+
212267 # Also search by keywords if provided
213268 if args .keywords :
214269 print (f"\n 🔍 Searching for issues with keywords: { ', ' .join (args .keywords )} " )
215270 keyword_results = search_by_keywords (args .keywords )
216-
271+
217272 if keyword_results :
218273 print (f"\n Found { len (keyword_results )} issues with matching keywords:" )
219274 for issue in keyword_results [:5 ]: # Show top 5
220- state_icon = "🟢" if issue [' state' ] == "OPEN" else "🔴"
275+ state_icon = "🟢" if issue [" state" ] == "OPEN" else "🔴"
221276 print (f"{ state_icon } #{ issue ['number' ]} : { issue ['title' ]} " )
222-
277+
223278 # Recommendation
224279 if similar_issues :
225- highest_score = similar_issues [0 ][' similarity' ][ ' combined_score' ]
280+ highest_score = similar_issues [0 ][" similarity" ][ " combined_score" ]
226281 if highest_score >= 0.8 :
227- print ("\n ❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!" )
282+ print (
283+ "\n ❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!"
284+ )
228285 print (" Consider adding to the existing issue instead." )
229286 return 1
230287 elif highest_score >= 0.6 :
231- print ("\n ⚠️ RECOMMENDATION: Review similar issues carefully before creating." )
288+ print (
289+ "\n ⚠️ RECOMMENDATION: Review similar issues carefully before creating."
290+ )
232291 print (" Your issue might be a duplicate or subset of an existing one." )
233292 return 2
234293 else :
@@ -237,4 +296,4 @@ def main():
237296
238297
239298if __name__ == "__main__" :
240- sys .exit (main ())
299+ sys .exit (main ())
0 commit comments