Skip to content

Commit a3c1cde

Browse files
authored
Merge pull request #62 from ryanmac/feature/upgrade-strategy
feat: Task listing fixes, upgrade support, and duplicate prevention
2 parents 656cb51 + 9c5e15d commit a3c1cde

File tree

8 files changed

+1697
-252
lines changed

8 files changed

+1697
-252
lines changed
Lines changed: 299 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,299 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Check for duplicate or similar issues before creating a new one.
4+
5+
This script helps prevent duplicate GitHub issues by searching for similar
6+
existing issues based on title and keywords.
7+
"""
8+
9+
import json
10+
import subprocess
11+
import sys
12+
from difflib import SequenceMatcher
13+
import re
14+
import argparse
15+
16+
17+
def run_gh_command(args):
18+
"""Run a GitHub CLI command and return the output."""
19+
try:
20+
result = subprocess.run(
21+
["gh"] + args, capture_output=True, text=True, check=True
22+
)
23+
return result.stdout
24+
except subprocess.CalledProcessError as e:
25+
print(f"Error running gh command: {e}")
26+
print(f"stderr: {e.stderr}")
27+
return None
28+
29+
30+
def get_all_issues(label="conductor:task", limit=200):
31+
"""Get all issues with the specified label."""
32+
# Get open issues
33+
open_issues = run_gh_command(
34+
[
35+
"issue",
36+
"list",
37+
"--label",
38+
label,
39+
"--state",
40+
"open",
41+
"--limit",
42+
str(limit),
43+
"--json",
44+
"number,title,body,labels,state",
45+
]
46+
)
47+
48+
# Get closed issues (last 50)
49+
closed_issues = run_gh_command(
50+
[
51+
"issue",
52+
"list",
53+
"--label",
54+
label,
55+
"--state",
56+
"closed",
57+
"--limit",
58+
"50",
59+
"--json",
60+
"number,title,body,labels,state",
61+
]
62+
)
63+
64+
all_issues = []
65+
if open_issues:
66+
all_issues.extend(json.loads(open_issues))
67+
if closed_issues:
68+
all_issues.extend(json.loads(closed_issues))
69+
70+
return all_issues
71+
72+
73+
def extract_keywords(text):
74+
"""Extract meaningful keywords from text."""
75+
# Remove common words and clean up
76+
stop_words = {
77+
"the",
78+
"a",
79+
"an",
80+
"and",
81+
"or",
82+
"but",
83+
"in",
84+
"on",
85+
"at",
86+
"to",
87+
"for",
88+
"of",
89+
"with",
90+
"by",
91+
"from",
92+
"up",
93+
"about",
94+
"into",
95+
"through",
96+
"during",
97+
"before",
98+
"after",
99+
"above",
100+
"below",
101+
"between",
102+
"under",
103+
"again",
104+
"further",
105+
"then",
106+
"once",
107+
"is",
108+
"are",
109+
"was",
110+
"were",
111+
"be",
112+
"been",
113+
"being",
114+
"have",
115+
"has",
116+
"had",
117+
"do",
118+
"does",
119+
"did",
120+
"will",
121+
"would",
122+
"could",
123+
"should",
124+
"may",
125+
"might",
126+
"must",
127+
"shall",
128+
"can",
129+
"need",
130+
}
131+
132+
# Convert to lowercase and split
133+
words = re.findall(r"\b\w+\b", text.lower())
134+
135+
# Filter out stop words and short words
136+
keywords = [w for w in words if w not in stop_words and len(w) > 2]
137+
138+
return set(keywords)
139+
140+
141+
def calculate_similarity(title1, title2, body1="", body2=""):
142+
"""Calculate similarity between two issues."""
143+
# Title similarity (weighted more heavily)
144+
title_ratio = SequenceMatcher(None, title1.lower(), title2.lower()).ratio()
145+
146+
# Keyword overlap
147+
keywords1 = extract_keywords(f"{title1} {body1}")
148+
keywords2 = extract_keywords(f"{title2} {body2}")
149+
150+
if keywords1 and keywords2:
151+
overlap = len(keywords1.intersection(keywords2))
152+
total = len(keywords1.union(keywords2))
153+
keyword_ratio = overlap / total if total > 0 else 0
154+
else:
155+
keyword_ratio = 0
156+
157+
# Combined score (title is more important)
158+
combined_score = (title_ratio * 0.7) + (keyword_ratio * 0.3)
159+
160+
return {
161+
"title_similarity": title_ratio,
162+
"keyword_overlap": keyword_ratio,
163+
"combined_score": combined_score,
164+
}
165+
166+
167+
def check_for_duplicates(new_title, new_body="", threshold=0.6):
168+
"""Check if a similar issue already exists."""
169+
print(f"🔍 Checking for duplicates of: '{new_title}'")
170+
print("=" * 80)
171+
172+
# Get all existing issues
173+
issues = get_all_issues()
174+
175+
if not issues:
176+
print("❌ Could not fetch issues from GitHub")
177+
return []
178+
179+
print(f"📊 Analyzing {len(issues)} existing issues...")
180+
181+
# Find similar issues
182+
similar_issues = []
183+
184+
for issue in issues:
185+
similarity = calculate_similarity(
186+
new_title, issue["title"], new_body, issue.get("body", "")
187+
)
188+
189+
if similarity["combined_score"] >= threshold:
190+
similar_issues.append({"issue": issue, "similarity": similarity})
191+
192+
# Sort by similarity score
193+
similar_issues.sort(key=lambda x: x["similarity"]["combined_score"], reverse=True)
194+
195+
return similar_issues
196+
197+
198+
def search_by_keywords(keywords):
199+
"""Search for issues containing specific keywords."""
200+
search_query = " OR ".join(keywords)
201+
202+
result = run_gh_command(
203+
[
204+
"issue",
205+
"list",
206+
"--search",
207+
search_query,
208+
"--state",
209+
"all",
210+
"--limit",
211+
"20",
212+
"--json",
213+
"number,title,state,labels",
214+
]
215+
)
216+
217+
if result:
218+
return json.loads(result)
219+
return []
220+
221+
222+
def main():
223+
parser = argparse.ArgumentParser(
224+
description="Check for duplicate GitHub issues before creating a new one"
225+
)
226+
parser.add_argument("title", help="Title of the issue you want to create")
227+
parser.add_argument(
228+
"--body", "-b", default="", help="Body/description of the issue"
229+
)
230+
parser.add_argument(
231+
"--threshold",
232+
"-t",
233+
type=float,
234+
default=0.6,
235+
help="Similarity threshold (0.0-1.0, default: 0.6)",
236+
)
237+
parser.add_argument(
238+
"--keywords", "-k", nargs="+", help="Additional keywords to search for"
239+
)
240+
241+
args = parser.parse_args()
242+
243+
# Check for duplicates
244+
similar_issues = check_for_duplicates(args.title, args.body, args.threshold)
245+
246+
if similar_issues:
247+
print("\n⚠️ Found potentially similar issues:")
248+
print("-" * 80)
249+
250+
for item in similar_issues:
251+
issue = item["issue"]
252+
sim = item["similarity"]
253+
254+
state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
255+
print(f"\n{state_icon} #{issue['number']}: {issue['title']}")
256+
print(
257+
f" Similarity: {sim['combined_score']:.1%} "
258+
+ f"(title: {sim['title_similarity']:.1%}, "
259+
+ f"keywords: {sim['keyword_overlap']:.1%})"
260+
)
261+
262+
# Show labels
263+
labels = [label["name"] for label in issue.get("labels", [])]
264+
if labels:
265+
print(f" Labels: {', '.join(labels)}")
266+
267+
# Also search by keywords if provided
268+
if args.keywords:
269+
print(f"\n🔍 Searching for issues with keywords: {', '.join(args.keywords)}")
270+
keyword_results = search_by_keywords(args.keywords)
271+
272+
if keyword_results:
273+
print(f"\nFound {len(keyword_results)} issues with matching keywords:")
274+
for issue in keyword_results[:5]: # Show top 5
275+
state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
276+
print(f"{state_icon} #{issue['number']}: {issue['title']}")
277+
278+
# Recommendation
279+
if similar_issues:
280+
highest_score = similar_issues[0]["similarity"]["combined_score"]
281+
if highest_score >= 0.8:
282+
print(
283+
"\n❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!"
284+
)
285+
print(" Consider adding to the existing issue instead.")
286+
return 1
287+
elif highest_score >= 0.6:
288+
print(
289+
"\n⚠️ RECOMMENDATION: Review similar issues carefully before creating."
290+
)
291+
print(" Your issue might be a duplicate or subset of an existing one.")
292+
return 2
293+
else:
294+
print("\n✅ No similar issues found. Safe to create new issue.")
295+
return 0
296+
297+
298+
if __name__ == "__main__":
299+
sys.exit(main())

0 commit comments

Comments
 (0)