Skip to content

Commit 9c5e15d

Browse files
committed
style: Apply black formatting to check-duplicate-issues.py
1 parent b0c47f7 commit 9c5e15d

File tree

1 file changed

+157
-98
lines changed

1 file changed

+157
-98
lines changed

.conductor/scripts/check-duplicate-issues.py

Lines changed: 157 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@ def run_gh_command(args):
1818
"""Run a GitHub CLI command and return the output."""
1919
try:
2020
result = subprocess.run(
21-
["gh"] + args,
22-
capture_output=True,
23-
text=True,
24-
check=True
21+
["gh"] + args, capture_output=True, text=True, check=True
2522
)
2623
return result.stdout
2724
except subprocess.CalledProcessError as e:
@@ -33,128 +30,190 @@ def run_gh_command(args):
3330
def get_all_issues(label="conductor:task", limit=200):
3431
"""Get all issues with the specified label."""
3532
# Get open issues
36-
open_issues = run_gh_command([
37-
"issue", "list",
38-
"--label", label,
39-
"--state", "open",
40-
"--limit", str(limit),
41-
"--json", "number,title,body,labels,state"
42-
])
43-
33+
open_issues = run_gh_command(
34+
[
35+
"issue",
36+
"list",
37+
"--label",
38+
label,
39+
"--state",
40+
"open",
41+
"--limit",
42+
str(limit),
43+
"--json",
44+
"number,title,body,labels,state",
45+
]
46+
)
47+
4448
# Get closed issues (last 50)
45-
closed_issues = run_gh_command([
46-
"issue", "list",
47-
"--label", label,
48-
"--state", "closed",
49-
"--limit", "50",
50-
"--json", "number,title,body,labels,state"
51-
])
52-
49+
closed_issues = run_gh_command(
50+
[
51+
"issue",
52+
"list",
53+
"--label",
54+
label,
55+
"--state",
56+
"closed",
57+
"--limit",
58+
"50",
59+
"--json",
60+
"number,title,body,labels,state",
61+
]
62+
)
63+
5364
all_issues = []
5465
if open_issues:
5566
all_issues.extend(json.loads(open_issues))
5667
if closed_issues:
5768
all_issues.extend(json.loads(closed_issues))
58-
69+
5970
return all_issues
6071

6172

6273
def extract_keywords(text):
6374
"""Extract meaningful keywords from text."""
6475
# Remove common words and clean up
6576
stop_words = {
66-
'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
67-
'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during',
68-
'before', 'after', 'above', 'below', 'between', 'under', 'again',
69-
'further', 'then', 'once', 'is', 'are', 'was', 'were', 'be', 'been',
70-
'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
71-
'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need'
77+
"the",
78+
"a",
79+
"an",
80+
"and",
81+
"or",
82+
"but",
83+
"in",
84+
"on",
85+
"at",
86+
"to",
87+
"for",
88+
"of",
89+
"with",
90+
"by",
91+
"from",
92+
"up",
93+
"about",
94+
"into",
95+
"through",
96+
"during",
97+
"before",
98+
"after",
99+
"above",
100+
"below",
101+
"between",
102+
"under",
103+
"again",
104+
"further",
105+
"then",
106+
"once",
107+
"is",
108+
"are",
109+
"was",
110+
"were",
111+
"be",
112+
"been",
113+
"being",
114+
"have",
115+
"has",
116+
"had",
117+
"do",
118+
"does",
119+
"did",
120+
"will",
121+
"would",
122+
"could",
123+
"should",
124+
"may",
125+
"might",
126+
"must",
127+
"shall",
128+
"can",
129+
"need",
72130
}
73-
131+
74132
# Convert to lowercase and split
75-
words = re.findall(r'\b\w+\b', text.lower())
76-
133+
words = re.findall(r"\b\w+\b", text.lower())
134+
77135
# Filter out stop words and short words
78136
keywords = [w for w in words if w not in stop_words and len(w) > 2]
79-
137+
80138
return set(keywords)
81139

82140

83141
def calculate_similarity(title1, title2, body1="", body2=""):
84142
"""Calculate similarity between two issues."""
85143
# Title similarity (weighted more heavily)
86144
title_ratio = SequenceMatcher(None, title1.lower(), title2.lower()).ratio()
87-
145+
88146
# Keyword overlap
89147
keywords1 = extract_keywords(f"{title1} {body1}")
90148
keywords2 = extract_keywords(f"{title2} {body2}")
91-
149+
92150
if keywords1 and keywords2:
93151
overlap = len(keywords1.intersection(keywords2))
94152
total = len(keywords1.union(keywords2))
95153
keyword_ratio = overlap / total if total > 0 else 0
96154
else:
97155
keyword_ratio = 0
98-
156+
99157
# Combined score (title is more important)
100158
combined_score = (title_ratio * 0.7) + (keyword_ratio * 0.3)
101-
159+
102160
return {
103-
'title_similarity': title_ratio,
104-
'keyword_overlap': keyword_ratio,
105-
'combined_score': combined_score
161+
"title_similarity": title_ratio,
162+
"keyword_overlap": keyword_ratio,
163+
"combined_score": combined_score,
106164
}
107165

108166

109167
def check_for_duplicates(new_title, new_body="", threshold=0.6):
110168
"""Check if a similar issue already exists."""
111169
print(f"🔍 Checking for duplicates of: '{new_title}'")
112170
print("=" * 80)
113-
171+
114172
# Get all existing issues
115173
issues = get_all_issues()
116-
174+
117175
if not issues:
118176
print("❌ Could not fetch issues from GitHub")
119177
return []
120-
178+
121179
print(f"📊 Analyzing {len(issues)} existing issues...")
122-
180+
123181
# Find similar issues
124182
similar_issues = []
125-
183+
126184
for issue in issues:
127185
similarity = calculate_similarity(
128-
new_title,
129-
issue['title'],
130-
new_body,
131-
issue.get('body', '')
186+
new_title, issue["title"], new_body, issue.get("body", "")
132187
)
133-
134-
if similarity['combined_score'] >= threshold:
135-
similar_issues.append({
136-
'issue': issue,
137-
'similarity': similarity
138-
})
139-
188+
189+
if similarity["combined_score"] >= threshold:
190+
similar_issues.append({"issue": issue, "similarity": similarity})
191+
140192
# Sort by similarity score
141-
similar_issues.sort(key=lambda x: x['similarity']['combined_score'], reverse=True)
142-
193+
similar_issues.sort(key=lambda x: x["similarity"]["combined_score"], reverse=True)
194+
143195
return similar_issues
144196

145197

146198
def search_by_keywords(keywords):
147199
"""Search for issues containing specific keywords."""
148200
search_query = " OR ".join(keywords)
149-
150-
result = run_gh_command([
151-
"issue", "list",
152-
"--search", search_query,
153-
"--state", "all",
154-
"--limit", "20",
155-
"--json", "number,title,state,labels"
156-
])
157-
201+
202+
result = run_gh_command(
203+
[
204+
"issue",
205+
"list",
206+
"--search",
207+
search_query,
208+
"--state",
209+
"all",
210+
"--limit",
211+
"20",
212+
"--json",
213+
"number,title,state,labels",
214+
]
215+
)
216+
158217
if result:
159218
return json.loads(result)
160219
return []
@@ -164,71 +223,71 @@ def main():
164223
parser = argparse.ArgumentParser(
165224
description="Check for duplicate GitHub issues before creating a new one"
166225
)
226+
parser.add_argument("title", help="Title of the issue you want to create")
167227
parser.add_argument(
168-
"title",
169-
help="Title of the issue you want to create"
170-
)
171-
parser.add_argument(
172-
"--body", "-b",
173-
default="",
174-
help="Body/description of the issue"
228+
"--body", "-b", default="", help="Body/description of the issue"
175229
)
176230
parser.add_argument(
177-
"--threshold", "-t",
231+
"--threshold",
232+
"-t",
178233
type=float,
179234
default=0.6,
180-
help="Similarity threshold (0.0-1.0, default: 0.6)"
235+
help="Similarity threshold (0.0-1.0, default: 0.6)",
181236
)
182237
parser.add_argument(
183-
"--keywords", "-k",
184-
nargs="+",
185-
help="Additional keywords to search for"
238+
"--keywords", "-k", nargs="+", help="Additional keywords to search for"
186239
)
187-
240+
188241
args = parser.parse_args()
189-
242+
190243
# Check for duplicates
191244
similar_issues = check_for_duplicates(args.title, args.body, args.threshold)
192-
245+
193246
if similar_issues:
194247
print("\n⚠️ Found potentially similar issues:")
195248
print("-" * 80)
196-
249+
197250
for item in similar_issues:
198-
issue = item['issue']
199-
sim = item['similarity']
200-
201-
state_icon = "🟢" if issue['state'] == "OPEN" else "🔴"
251+
issue = item["issue"]
252+
sim = item["similarity"]
253+
254+
state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
202255
print(f"\n{state_icon} #{issue['number']}: {issue['title']}")
203-
print(f" Similarity: {sim['combined_score']:.1%} " +
204-
f"(title: {sim['title_similarity']:.1%}, " +
205-
f"keywords: {sim['keyword_overlap']:.1%})")
206-
256+
print(
257+
f" Similarity: {sim['combined_score']:.1%} "
258+
+ f"(title: {sim['title_similarity']:.1%}, "
259+
+ f"keywords: {sim['keyword_overlap']:.1%})"
260+
)
261+
207262
# Show labels
208-
labels = [label['name'] for label in issue.get('labels', [])]
263+
labels = [label["name"] for label in issue.get("labels", [])]
209264
if labels:
210265
print(f" Labels: {', '.join(labels)}")
211-
266+
212267
# Also search by keywords if provided
213268
if args.keywords:
214269
print(f"\n🔍 Searching for issues with keywords: {', '.join(args.keywords)}")
215270
keyword_results = search_by_keywords(args.keywords)
216-
271+
217272
if keyword_results:
218273
print(f"\nFound {len(keyword_results)} issues with matching keywords:")
219274
for issue in keyword_results[:5]: # Show top 5
220-
state_icon = "🟢" if issue['state'] == "OPEN" else "🔴"
275+
state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
221276
print(f"{state_icon} #{issue['number']}: {issue['title']}")
222-
277+
223278
# Recommendation
224279
if similar_issues:
225-
highest_score = similar_issues[0]['similarity']['combined_score']
280+
highest_score = similar_issues[0]["similarity"]["combined_score"]
226281
if highest_score >= 0.8:
227-
print("\n❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!")
282+
print(
283+
"\n❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!"
284+
)
228285
print(" Consider adding to the existing issue instead.")
229286
return 1
230287
elif highest_score >= 0.6:
231-
print("\n⚠️ RECOMMENDATION: Review similar issues carefully before creating.")
288+
print(
289+
"\n⚠️ RECOMMENDATION: Review similar issues carefully before creating."
290+
)
232291
print(" Your issue might be a duplicate or subset of an existing one.")
233292
return 2
234293
else:
@@ -237,4 +296,4 @@ def main():
237296

238297

239298
if __name__ == "__main__":
240-
sys.exit(main())
299+
sys.exit(main())

0 commit comments

Comments
 (0)