1+ #!/usr/bin/env python3
2+ """
3+ Java file analysis script for semantic metrics extraction
4+ """
5+
6+ import sys
7+ import os
8+ import javalang
9+ import subprocess
10+ from collections import defaultdict
11+ import csv
12+ import re
13+
14+ def get_cyclomatic_complexity (method ):
15+ complexity = 1
16+ for _ , node in method .filter (javalang .tree .IfStatement ):
17+ complexity += 1
18+ for _ , node in method .filter (javalang .tree .ForStatement ):
19+ complexity += 1
20+ for _ , node in method .filter (javalang .tree .WhileStatement ):
21+ complexity += 1
22+ for _ , node in method .filter (javalang .tree .DoStatement ):
23+ complexity += 1
24+ for _ , node in method .filter (javalang .tree .SwitchStatement ):
25+ complexity += len ([s for s in node .cases if s .statements ])
26+ for _ , node in method .filter (javalang .tree .CatchClause ):
27+ complexity += 1
28+ return complexity
29+
30+ def get_bug_count (file_path , repo_dir ):
31+ try :
32+ relative_path = os .path .relpath (file_path , repo_dir )
33+ result = subprocess .run (
34+ ['git' , '-C' , repo_dir , 'log' , '--follow' , '--' , relative_path ],
35+ capture_output = True ,
36+ text = True
37+ )
38+ if result .returncode != 0 :
39+ return 0
40+ bug_count = len ([line for line in result .stdout .splitlines () if re .search (r'\b(fix|hotfix|bugfix|chore|refactor|test-fix)\b' , line , re .IGNORECASE )])
41+ return bug_count
42+ except :
43+ return 0
44+
45+ def analyze_file (file_path , project_name , version , repo_dir ):
46+ try :
47+ with open (file_path , 'r' , encoding = 'utf-8' ) as f :
48+ code = f .read ()
49+ except Exception as e :
50+ print (f"Error reading file { file_path } : { e } " )
51+ return None
52+
53+ try :
54+ tree = javalang .parse .parse (code )
55+ except Exception as e :
56+ print (f"Error parsing Java code in { file_path } : { e } " )
57+ return None
58+
59+ try :
60+ # Count classes found
61+ classes = list (tree .filter (javalang .tree .ClassDeclaration ))
62+ if not classes :
63+ print (f"No classes found in { file_path } " )
64+ return None
65+
66+ print (f"Found { len (classes )} class(es) in { file_path } " )
67+
68+ for _ , class_node in classes :
69+ fully_qualified_name = f"{ tree .package .name } .{ class_node .name } " if tree .package else class_node .name
70+
71+ metrics = {
72+ 'project_name' : project_name ,
73+ 'version' : version ,
74+ 'class_name' : fully_qualified_name ,
75+ 'wmc' : 0 ,
76+ 'rfc' : 0 ,
77+ 'loc' : len (code .splitlines ()),
78+ 'max_cc' : 0 ,
79+ 'avg_cc' : 0 ,
80+ 'cbo' : 0 ,
81+ 'ca' : 0 ,
82+ 'ce' : 0 ,
83+ 'ic' : 0 ,
84+ 'cbm' : 0 ,
85+ 'lcom' : 0 ,
86+ 'lcom3' : 0 ,
87+ 'dit' : 0 ,
88+ 'noc' : 0 ,
89+ 'mfa' : 0 ,
90+ 'npm' : 0 ,
91+ 'dam' : 0 ,
92+ 'moa' : 0 ,
93+ 'cam' : 0 ,
94+ 'amc' : 0 ,
95+ 'bug' : get_bug_count (file_path , repo_dir )
96+ }
97+
98+ # Methods and complexity
99+ methods = class_node .methods
100+ metrics ['wmc' ] = len (methods )
101+ cc_values = []
102+ method_names = set ()
103+ for method in methods :
104+ cc = get_cyclomatic_complexity (method )
105+ cc_values .append (cc )
106+ method_names .add (method .name )
107+ if isinstance (method , javalang .tree .MethodDeclaration ):
108+ metrics ['npm' ] += 1 if method .modifiers and 'public' in method .modifiers else 0
109+
110+ metrics ['max_cc' ] = max (cc_values ) if cc_values else 0
111+ metrics ['avg_cc' ] = sum (cc_values ) / len (cc_values ) if cc_values else 0
112+ metrics ['amc' ] = metrics ['loc' ] / metrics ['wmc' ] if metrics ['wmc' ] > 0 else 0
113+
114+ # Inheritance metrics
115+ metrics ['dit' ] = 1 if class_node .extends else 0
116+ metrics ['ic' ] = metrics ['dit' ]
117+
118+ # Coupling and cohesion
119+ fields = [f for f in class_node .fields if isinstance (f , javalang .tree .FieldDeclaration )]
120+ metrics ['moa' ] = sum (1 for f in fields if f .type and isinstance (f .type , javalang .tree .ReferenceType ))
121+ total_fields = len (fields )
122+ private_fields = sum (1 for f in fields if f .modifiers and ('private' in f .modifiers or 'protected' in f .modifiers ))
123+ metrics ['dam' ] = private_fields / total_fields if total_fields > 0 else 0
124+
125+ # LCOM calculation
126+ field_usage = defaultdict (set )
127+ for method in methods :
128+ for _ , node in method .filter (javalang .tree .MemberReference ):
129+ if node .qualifier in [f .declarators [0 ].name for f in fields ]:
130+ field_usage [method .name ].add (node .qualifier )
131+ lcom = 0
132+ for i , m1 in enumerate (methods ):
133+ for m2 in methods [i + 1 :]:
134+ if not (field_usage [m1 .name ] & field_usage [m2 .name ]):
135+ lcom += 1
136+ metrics ['lcom' ] = lcom
137+ metrics ['lcom3' ] = 2 * lcom / (len (methods ) * (len (methods ) - 1 )) if len (methods ) > 1 else 0
138+
139+ # RFC and CBO
140+ called_methods = set ()
141+ for method in methods :
142+ for _ , node in method .filter (javalang .tree .MethodInvocation ):
143+ called_methods .add (node .member )
144+ metrics ['rfc' ] = len (methods ) + len (called_methods )
145+ metrics ['cbo' ] = len (called_methods )
146+
147+ # CBM: Count intra-class method calls
148+ intra_class_calls = 0
149+ for method in methods :
150+ for _ , node in method .filter (javalang .tree .MethodInvocation ):
151+ if node .member in method_names :
152+ intra_class_calls += 1
153+ metrics ['cbm' ] = intra_class_calls
154+
155+ # CAM: Cohesion among methods (simplified)
156+ metrics ['cam' ] = 0.5 # Default value
157+
158+ # MFA: Measure of functional abstraction
159+ metrics ['mfa' ] = 0.0 # Default value
160+
161+ print (f"Successfully analyzed class: { fully_qualified_name } " )
162+ return metrics
163+ except Exception as e :
164+ print (f"Error analyzing class in { file_path } : { e } " )
165+ return None
166+
167+ return None
168+
169+ if __name__ == "__main__" :
170+ if len (sys .argv ) != 5 :
171+ print ("Usage: python analyze_java_file.py <file_path> <project_name> <version> <repo_dir>" )
172+ sys .exit (1 )
173+
174+ file_path = sys .argv [1 ]
175+ project_name = sys .argv [2 ]
176+ version = sys .argv [3 ]
177+ repo_dir = sys .argv [4 ]
178+
179+ try :
180+ metrics = analyze_file (file_path , project_name , version , repo_dir )
181+ if metrics :
182+ # Write to CSV only if we have actual data
183+ with open ('temp_metrics.csv' , 'w' , newline = '' ) as f :
184+ fieldnames = ['project_name' , 'version' , 'class_name' , 'wmc' , 'rfc' , 'loc' , 'max_cc' , 'avg_cc' ,
185+ 'cbo' , 'ca' , 'ce' , 'ic' , 'cbm' , 'lcom' , 'lcom3' , 'dit' , 'noc' , 'mfa' ,
186+ 'npm' , 'dam' , 'moa' , 'cam' , 'amc' , 'bug' ]
187+ writer = csv .DictWriter (f , fieldnames = fieldnames )
188+ writer .writeheader () # Write the header first
189+ writer .writerow (metrics )
190+ print ("Metrics extracted successfully" )
191+ sys .exit (0 )
192+ else :
193+ print ("No classes found in file or parsing failed" )
194+ sys .exit (1 )
195+ except Exception as e :
196+ print (f"Unexpected error processing { file_path } : { e } " )
197+ sys .exit (1 )
0 commit comments