Skip to content

Commit 8f36941

Browse files
committed
! improve show-duplicate-java-classes
- ignore BadZipFile for jar file - extract functions - biz function print_duplicate_classes_info - util function print_box_message - adjust function layout, move util functions to the front - from __future__ import print_function - use start=1 for enumerate - rename var name: java_class -> class/clazz
1 parent d7d3d7b commit 8f36941

File tree

1 file changed

+79
-61
lines changed

1 file changed

+79
-61
lines changed

bin/show-duplicate-java-classes

Lines changed: 79 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,51 @@
1212
# @author tg123 (farmer1992 at gmail dot com)
1313
# @author Jerry Lee (oldratlee at gmail dot com)
1414

15+
from __future__ import print_function
16+
1517
__author__ = 'tg123'
1618

1719
import sys
1820
from glob import glob
1921
from os import walk
20-
from zipfile import ZipFile
22+
from zipfile import ZipFile, BadZipfile
2123
from os.path import relpath, isdir
2224
from optparse import OptionParser
2325

2426

25-
def list_jar_file_under_lib_dirs(libs):
27+
################################################################################
28+
# utils functions
29+
################################################################################
30+
31+
def check_python_version():
32+
if sys.version_info < (2, 7):
33+
exit('Using python 2 and its version lower than 2.7, not supported!\n'
34+
'Please use python 3 instead!\nPython version: ' + sys.version)
35+
36+
37+
def print_box_message(msg):
38+
print()
39+
print('=' * 80)
40+
print(msg)
41+
print('=' * 80)
42+
43+
44+
def list_jar_file_under_lib_dirs(lib_dirs):
2645
jar_files = set()
27-
for lib in libs:
28-
if isdir(lib):
29-
jar_files |= {f for f in glob(lib + '/*.jar')}
46+
for lib_dir in lib_dirs:
47+
if isdir(lib_dir):
48+
jar_files |= {f for f in glob(lib_dir + '/*.jar')}
3049
else:
31-
jar_files.add(lib)
50+
jar_files.add(lib_dir)
3251
return jar_files
3352

3453

3554
def list_class_under_jar_file(jar_file):
36-
return {f for f in ZipFile(jar_file).namelist() if f.lower().endswith('.class')}
55+
try:
56+
return {f for f in ZipFile(jar_file).namelist() if f.lower().endswith('.class')}
57+
except BadZipfile:
58+
print('WARN: %s is bad zip file, ignored!' % jar_file, file=sys.stderr)
59+
return set()
3760

3861

3962
def list_class_under_class_dir(class_dir):
@@ -42,89 +65,84 @@ def list_class_under_class_dir(class_dir):
4265
for filename in file_names if filename.lower().endswith('.class')}
4366

4467

45-
def expand_2_class_path(jar_files, class_dirs):
46-
java_class_2_class_paths = {}
68+
################################################################################
69+
# biz functions
70+
################################################################################
71+
72+
def build_index__class_to_class_paths(jar_files, class_dirs):
73+
class_to_class_paths = {}
74+
4775
# list all classes in jar files
4876
for jar_file in jar_files:
4977
for class_file in list_class_under_jar_file(jar_file):
50-
java_class_2_class_paths.setdefault(class_file, set()).add(jar_file)
51-
# list all classes in class dir
78+
class_to_class_paths.setdefault(class_file, set()).add(jar_file)
79+
80+
# list all classes in class dirs
5281
for class_dir in class_dirs:
5382
for class_file in list_class_under_class_dir(class_dir):
54-
java_class_2_class_paths.setdefault(class_file, set()).add(class_dir)
83+
class_to_class_paths.setdefault(class_file, set()).add(class_dir)
5584

56-
return java_class_2_class_paths, jar_files | set(class_dirs)
85+
return class_to_class_paths, jar_files | set(class_dirs)
5786

5887

59-
def find_duplicate_classes(java_class_2_class_paths):
60-
class_path_2_duplicate_classes = {}
88+
def find_duplicate_classes(class_to_class_paths):
89+
class_paths_to_duplicate_classes = {}
6190

62-
for java_class, class_paths in list(java_class_2_class_paths.items()):
91+
for clazz, class_paths in class_to_class_paths.items():
6392
# skip java 9 module-info files
64-
if java_class.endswith('/module-info.class'):
93+
if clazz.endswith('/module-info.class'):
6594
continue
6695
if len(class_paths) > 1:
67-
classes = class_path_2_duplicate_classes.setdefault(frozenset(class_paths), set())
68-
classes.add(java_class)
96+
classes = class_paths_to_duplicate_classes.setdefault(frozenset(class_paths), set())
97+
classes.add(clazz)
6998

70-
return class_path_2_duplicate_classes
99+
return class_paths_to_duplicate_classes
71100

72101

73-
def print_class_paths(class_paths):
74-
print('')
75-
print('=' * 80)
76-
print('class paths to find:')
77-
print('=' * 80)
78-
for idx, class_path in enumerate(class_paths):
79-
print('%-3d: %s' % (idx + 1, class_path))
102+
def print_duplicate_classes_info(class_paths_to_duplicate_classes):
103+
print('Found duplicate classes in below class paths:')
104+
for idx, jars in enumerate(class_paths_to_duplicate_classes, start=1):
105+
print('%-3d(%d@%d): %s' % (idx, len(class_paths_to_duplicate_classes[jars]), len(jars), ' '.join(jars)))
106+
107+
print_box_message('Duplicate classes detail info:')
108+
for idx, (jars, classes) in enumerate(class_paths_to_duplicate_classes.items(), start=1):
109+
print('%-3d(%d@%d): %s' % (idx, len(class_paths_to_duplicate_classes[jars]), len(jars), ' '.join(jars)))
110+
for i, c in enumerate(classes, start=1):
111+
print('%7d %s' % (i, c))
112+
113+
114+
def print_class_paths_info(class_paths):
115+
print_box_message('class paths to find:')
116+
for idx, class_path in enumerate(class_paths, start=1):
117+
print('%-3d: %s' % (idx, class_path))
80118

81119

82120
def main():
83121
option_parser = OptionParser('usage: %prog '
84122
'[-c class-dir1 [-c class-dir2] ...] '
85123
'[lib-dir1|jar-file1 [lib-dir2|jar-file2] ...]')
86124
option_parser.add_option('-c', '--class-dir', dest='class_dirs', default=[], action='append', help='add class dir')
87-
options, libs = option_parser.parse_args()
125+
options, lib_dirs = option_parser.parse_args()
126+
if not options.class_dirs and not lib_dirs:
127+
lib_dirs = ['.']
88128

89-
if not options.class_dirs and not libs:
90-
libs = ['.']
129+
class_to_class_paths, class_paths = build_index__class_to_class_paths(
130+
list_jar_file_under_lib_dirs(lib_dirs), options.class_dirs)
91131

92-
java_class_2_class_paths, class_paths = expand_2_class_path(
93-
list_jar_file_under_lib_dirs(libs), options.class_dirs)
132+
class_paths_to_duplicate_classes = find_duplicate_classes(class_to_class_paths)
94133

95-
class_path_2_duplicate_classes = find_duplicate_classes(java_class_2_class_paths)
96-
97-
if not class_path_2_duplicate_classes:
134+
have_duplicate_classes = bool(class_paths_to_duplicate_classes)
135+
if have_duplicate_classes:
136+
print_duplicate_classes_info(class_paths_to_duplicate_classes)
137+
else:
98138
print('COOL! No duplicate classes found!')
99-
print_class_paths(class_paths)
100-
exit()
101139

102-
print('Found duplicate classes in below class path:')
103-
for idx, jars in enumerate(class_path_2_duplicate_classes):
104-
print('%-3d(%d@%d): %s' % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), ' '.join(jars)))
140+
print_class_paths_info(class_paths)
105141

106-
print('')
107-
print('=' * 80)
108-
print('Duplicate classes detail info:')
109-
print('=' * 80)
110-
for idx, (jars, classes) in enumerate(class_path_2_duplicate_classes.items()):
111-
print('%-3d(%d@%d): %s' % (idx + 1, len(class_path_2_duplicate_classes[jars]), len(jars), ' '.join(jars)))
112-
for i, c in enumerate(classes):
113-
print('\t%-3d %s' % (i + 1, c))
114-
115-
print_class_paths(class_paths)
116-
exit(1)
117-
118-
119-
def check_python_version():
120-
python_version = sys.version_info
121-
if python_version.major > 2:
122-
return
123-
if python_version.minor < 7:
124-
exit('Using python 2 and its version lower than 2.7, not support!\n'
125-
'Please use python 3 instead!\nPython version: ' + sys.version)
142+
return int(have_duplicate_classes)
126143

127144

128145
if __name__ == '__main__':
129146
check_python_version()
130-
main()
147+
148+
exit(main())

0 commit comments

Comments
 (0)