style: Apply black formatting to check-duplicate-issues.py

ryanmac · ryanmac · commit 9c5e15d2ec80 · 2025-07-24T19:31:39.000-05:00
diff --git a/.conductor/scripts/check-duplicate-issues.py b/.conductor/scripts/check-duplicate-issues.py
@@ -18,10 +18,7 @@ def run_gh_command(args):
     """Run a GitHub CLI command and return the output."""
     try:
         result = subprocess.run(
-            ["gh"] + args,
-            capture_output=True,
-            text=True,
-            check=True
+            ["gh"] + args, capture_output=True, text=True, check=True
         )
         return result.stdout
     except subprocess.CalledProcessError as e:
@@ -33,128 +30,190 @@ def run_gh_command(args):
 def get_all_issues(label="conductor:task", limit=200):
     """Get all issues with the specified label."""
     # Get open issues
-    open_issues = run_gh_command([
-        "issue", "list",
-        "--label", label,
-        "--state", "open",
-        "--limit", str(limit),
-        "--json", "number,title,body,labels,state"
-    ])
-    
+    open_issues = run_gh_command(
+        [
+            "issue",
+            "list",
+            "--label",
+            label,
+            "--state",
+            "open",
+            "--limit",
+            str(limit),
+            "--json",
+            "number,title,body,labels,state",
+        ]
+    )
+
     # Get closed issues (last 50)
-    closed_issues = run_gh_command([
-        "issue", "list", 
-        "--label", label,
-        "--state", "closed",
-        "--limit", "50",
-        "--json", "number,title,body,labels,state"
-    ])
-    
+    closed_issues = run_gh_command(
+        [
+            "issue",
+            "list",
+            "--label",
+            label,
+            "--state",
+            "closed",
+            "--limit",
+            "50",
+            "--json",
+            "number,title,body,labels,state",
+        ]
+    )
+
     all_issues = []
     if open_issues:
         all_issues.extend(json.loads(open_issues))
     if closed_issues:
         all_issues.extend(json.loads(closed_issues))
-    
+
     return all_issues
 
 
 def extract_keywords(text):
     """Extract meaningful keywords from text."""
     # Remove common words and clean up
     stop_words = {
-        'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for',
-        'of', 'with', 'by', 'from', 'up', 'about', 'into', 'through', 'during',
-        'before', 'after', 'above', 'below', 'between', 'under', 'again',
-        'further', 'then', 'once', 'is', 'are', 'was', 'were', 'be', 'been',
-        'being', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would',
-        'could', 'should', 'may', 'might', 'must', 'shall', 'can', 'need'
+        "the",
+        "a",
+        "an",
+        "and",
+        "or",
+        "but",
+        "in",
+        "on",
+        "at",
+        "to",
+        "for",
+        "of",
+        "with",
+        "by",
+        "from",
+        "up",
+        "about",
+        "into",
+        "through",
+        "during",
+        "before",
+        "after",
+        "above",
+        "below",
+        "between",
+        "under",
+        "again",
+        "further",
+        "then",
+        "once",
+        "is",
+        "are",
+        "was",
+        "were",
+        "be",
+        "been",
+        "being",
+        "have",
+        "has",
+        "had",
+        "do",
+        "does",
+        "did",
+        "will",
+        "would",
+        "could",
+        "should",
+        "may",
+        "might",
+        "must",
+        "shall",
+        "can",
+        "need",
     }
-    
+
     # Convert to lowercase and split
-    words = re.findall(r'\b\w+\b', text.lower())
-    
+    words = re.findall(r"\b\w+\b", text.lower())
+
     # Filter out stop words and short words
     keywords = [w for w in words if w not in stop_words and len(w) > 2]
-    
+
     return set(keywords)
 
 
 def calculate_similarity(title1, title2, body1="", body2=""):
     """Calculate similarity between two issues."""
     # Title similarity (weighted more heavily)
     title_ratio = SequenceMatcher(None, title1.lower(), title2.lower()).ratio()
-    
+
     # Keyword overlap
     keywords1 = extract_keywords(f"{title1} {body1}")
     keywords2 = extract_keywords(f"{title2} {body2}")
-    
+
     if keywords1 and keywords2:
         overlap = len(keywords1.intersection(keywords2))
         total = len(keywords1.union(keywords2))
         keyword_ratio = overlap / total if total > 0 else 0
     else:
         keyword_ratio = 0
-    
+
     # Combined score (title is more important)
     combined_score = (title_ratio * 0.7) + (keyword_ratio * 0.3)
-    
+
     return {
-        'title_similarity': title_ratio,
-        'keyword_overlap': keyword_ratio,
-        'combined_score': combined_score
+        "title_similarity": title_ratio,
+        "keyword_overlap": keyword_ratio,
+        "combined_score": combined_score,
     }
 
 
 def check_for_duplicates(new_title, new_body="", threshold=0.6):
     """Check if a similar issue already exists."""
     print(f"🔍 Checking for duplicates of: '{new_title}'")
     print("=" * 80)
-    
+
     # Get all existing issues
     issues = get_all_issues()
-    
+
     if not issues:
         print("❌ Could not fetch issues from GitHub")
         return []
-    
+
     print(f"📊 Analyzing {len(issues)} existing issues...")
-    
+
     # Find similar issues
     similar_issues = []
-    
+
     for issue in issues:
         similarity = calculate_similarity(
-            new_title, 
-            issue['title'],
-            new_body,
-            issue.get('body', '')
+            new_title, issue["title"], new_body, issue.get("body", "")
         )
-        
-        if similarity['combined_score'] >= threshold:
-            similar_issues.append({
-                'issue': issue,
-                'similarity': similarity
-            })
-    
+
+        if similarity["combined_score"] >= threshold:
+            similar_issues.append({"issue": issue, "similarity": similarity})
+
     # Sort by similarity score
-    similar_issues.sort(key=lambda x: x['similarity']['combined_score'], reverse=True)
-    
+    similar_issues.sort(key=lambda x: x["similarity"]["combined_score"], reverse=True)
+
     return similar_issues
 
 
 def search_by_keywords(keywords):
     """Search for issues containing specific keywords."""
     search_query = " OR ".join(keywords)
-    
-    result = run_gh_command([
-        "issue", "list",
-        "--search", search_query,
-        "--state", "all",
-        "--limit", "20",
-        "--json", "number,title,state,labels"
-    ])
-    
+
+    result = run_gh_command(
+        [
+            "issue",
+            "list",
+            "--search",
+            search_query,
+            "--state",
+            "all",
+            "--limit",
+            "20",
+            "--json",
+            "number,title,state,labels",
+        ]
+    )
+
     if result:
         return json.loads(result)
     return []
@@ -164,71 +223,71 @@ def main():
     parser = argparse.ArgumentParser(
         description="Check for duplicate GitHub issues before creating a new one"
     )
+    parser.add_argument("title", help="Title of the issue you want to create")
     parser.add_argument(
-        "title",
-        help="Title of the issue you want to create"
-    )
-    parser.add_argument(
-        "--body", "-b",
-        default="",
-        help="Body/description of the issue"
+        "--body", "-b", default="", help="Body/description of the issue"
     )
     parser.add_argument(
-        "--threshold", "-t",
+        "--threshold",
+        "-t",
         type=float,
         default=0.6,
-        help="Similarity threshold (0.0-1.0, default: 0.6)"
+        help="Similarity threshold (0.0-1.0, default: 0.6)",
     )
     parser.add_argument(
-        "--keywords", "-k",
-        nargs="+",
-        help="Additional keywords to search for"
+        "--keywords", "-k", nargs="+", help="Additional keywords to search for"
     )
-    
+
     args = parser.parse_args()
-    
+
     # Check for duplicates
     similar_issues = check_for_duplicates(args.title, args.body, args.threshold)
-    
+
     if similar_issues:
         print("\n⚠️  Found potentially similar issues:")
         print("-" * 80)
-        
+
         for item in similar_issues:
-            issue = item['issue']
-            sim = item['similarity']
-            
-            state_icon = "🟢" if issue['state'] == "OPEN" else "🔴"
+            issue = item["issue"]
+            sim = item["similarity"]
+
+            state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
             print(f"\n{state_icon} #{issue['number']}: {issue['title']}")
-            print(f"   Similarity: {sim['combined_score']:.1%} " +
-                  f"(title: {sim['title_similarity']:.1%}, " +
-                  f"keywords: {sim['keyword_overlap']:.1%})")
-            
+            print(
+                f"   Similarity: {sim['combined_score']:.1%} "
+                + f"(title: {sim['title_similarity']:.1%}, "
+                + f"keywords: {sim['keyword_overlap']:.1%})"
+            )
+
             # Show labels
-            labels = [label['name'] for label in issue.get('labels', [])]
+            labels = [label["name"] for label in issue.get("labels", [])]
             if labels:
                 print(f"   Labels: {', '.join(labels)}")
-    
+
     # Also search by keywords if provided
     if args.keywords:
         print(f"\n🔍 Searching for issues with keywords: {', '.join(args.keywords)}")
         keyword_results = search_by_keywords(args.keywords)
-        
+
         if keyword_results:
             print(f"\nFound {len(keyword_results)} issues with matching keywords:")
             for issue in keyword_results[:5]:  # Show top 5
-                state_icon = "🟢" if issue['state'] == "OPEN" else "🔴"
+                state_icon = "🟢" if issue["state"] == "OPEN" else "🔴"
                 print(f"{state_icon} #{issue['number']}: {issue['title']}")
-    
+
     # Recommendation
     if similar_issues:
-        highest_score = similar_issues[0]['similarity']['combined_score']
+        highest_score = similar_issues[0]["similarity"]["combined_score"]
         if highest_score >= 0.8:
-            print("\n❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!")
+            print(
+                "\n❌ RECOMMENDATION: Do NOT create this issue - very similar issue exists!"
+            )
             print("   Consider adding to the existing issue instead.")
             return 1
         elif highest_score >= 0.6:
-            print("\n⚠️  RECOMMENDATION: Review similar issues carefully before creating.")
+            print(
+                "\n⚠️  RECOMMENDATION: Review similar issues carefully before creating."
+            )
             print("   Your issue might be a duplicate or subset of an existing one.")
             return 2
     else:
@@ -237,4 +296,4 @@ def main():
 
 
 if __name__ == "__main__":
-    sys.exit(main())
+    sys.exit(main())