Skip to content

Commit d47d60f

Browse files
committed
up
1 parent 5e181ed commit d47d60f

9 files changed

+423
-1136
lines changed

duplicate_code_finder.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
import ast
2+
import argparse
3+
from pathlib import Path
4+
from collections import defaultdict
5+
6+
# This script requires Python 3.9+ for the ast.unparse() function.
7+
8+
class ClassMethodVisitor(ast.NodeVisitor):
9+
"""
10+
An AST visitor that collects method names and the source code of their bodies.
11+
"""
12+
def __init__(self):
13+
self.methods = defaultdict(list)
14+
15+
def visit_ClassDef(self, node: ast.ClassDef):
16+
"""
17+
Visits a class definition, then inspects its methods.
18+
"""
19+
class_name = node.name
20+
for item in node.body:
21+
if isinstance(item, ast.FunctionDef):
22+
method_name = item.name
23+
body_source = ast.unparse(item.body).strip()
24+
self.methods[method_name].append((class_name, body_source))
25+
self.generic_visit(node)
26+
27+
def find_duplicate_method_content(directory: str, show_code: bool = True):
28+
"""
29+
Parses all Python files in a directory to find methods with duplicate content.
30+
31+
Args:
32+
directory: The path to the directory to inspect.
33+
show_code: If True, prints the shared code block for each duplicate.
34+
"""
35+
target_dir = Path(directory)
36+
if not target_dir.is_dir():
37+
print(f"❌ Error: '{directory}' is not a valid directory.")
38+
return
39+
40+
visitor = ClassMethodVisitor()
41+
42+
for py_file in target_dir.rglob('*.py'):
43+
try:
44+
with open(py_file, 'r', encoding='utf-8') as f:
45+
source_code = f.read()
46+
tree = ast.parse(source_code, filename=py_file)
47+
visitor.visit(tree)
48+
except Exception as e:
49+
print(f"⚠️ Warning: Could not process {py_file}. Error: {e}")
50+
51+
print("\n--- Duplicate Method Content Report ---")
52+
duplicates_found = False
53+
54+
for method_name, implementations in sorted(visitor.methods.items()):
55+
body_groups = defaultdict(list)
56+
for class_name, body_source in implementations:
57+
body_groups[body_source].append(class_name)
58+
59+
for body_source, class_list in body_groups.items():
60+
if len(class_list) > 1:
61+
duplicates_found = True
62+
unique_classes = sorted(list(set(class_list)))
63+
print(f"\n[+] Method `def {method_name}(...)` has identical content in {len(unique_classes)} classes:")
64+
for class_name in unique_classes:
65+
print(f" - {class_name}")
66+
67+
# Conditionally print the shared code block based on the flag
68+
if show_code:
69+
print("\n Shared Code Block:")
70+
indented_code = "\n".join([f" {line}" for line in body_source.splitlines()])
71+
print(indented_code)
72+
print(" " + "-" * 30)
73+
74+
if not duplicates_found:
75+
print("\n✅ No methods with identical content were found across classes.")
76+
77+
def main():
78+
"""Main function to set up argument parsing."""
79+
parser = argparse.ArgumentParser(
80+
description="Find methods with identical content across Python classes in a directory."
81+
)
82+
parser.add_argument(
83+
"directory",
84+
type=str,
85+
help="The path to the directory to inspect."
86+
)
87+
# New argument to control output verbosity
88+
parser.add_argument(
89+
"--hide-code",
90+
action="store_true",
91+
help="Do not print the shared code block for each duplicate found."
92+
)
93+
args = parser.parse_args()
94+
find_duplicate_method_content(args.directory, show_code=not args.hide_code)
95+
96+
if __name__ == "__main__":
97+
main()

0 commit comments

Comments
 (0)