1
+ #!/usr/bin/env python3
2
+ # Copyright (c) Microsoft Corporation.
3
+ # Licensed under the MIT License.
4
+
5
+ """
6
+ AntiPatternDetector
7
+ ------------------
8
+ Detects anti-patterns in spec files and related artifacts.
9
+
10
+ This module provides systematic detection of common problems in spec files,
11
+ with configurable severity levels and detailed reporting.
12
+
13
+ Functions:
14
+ ----------
15
+ detect_all():
16
+ Main entry point that runs all anti-pattern detection methods on a spec file.
17
+ Combines results from patch file, CVE, and changelog issue detection.
18
+
19
+ detect_patch_file_issues():
20
+ Detects patch file related problems:
21
+ - Missing patch files referenced in spec but not found in directory
22
+ - Unused patch files present in directory but not referenced in spec
23
+ - CVE patch naming mismatches (CVE-named patches without corresponding CVE documentation)
24
+
25
+ detect_cve_issues():
26
+ Detects CVE reference related problems:
27
+ - Future-dated CVEs (CVE years beyond current expected range)
28
+ - Missing CVE documentation in changelog (CVEs referenced in spec but not in changelog)
29
+ - Validates CVE format and cross-references with changelog entries
30
+
31
+ detect_changelog_issues():
32
+ Detects changelog format and content problems:
33
+ - Missing %changelog section entirely
34
+ - Empty changelog sections with no entries
35
+ - Invalid changelog entry format (non-standard RPM changelog format)
36
+ - Validates standard format: * Day Month DD YYYY User <email> - Version
37
+
38
+ Severity Levels:
39
+ ---------------
40
+ - CRITICAL: Must be fixed before merge
41
+ - ERROR: Should be fixed before merge
42
+ - WARNING: Review recommended but doesn't block merge
43
+ - INFO: Informational only
44
+ """
45
+
46
+ import os
47
+ import re
48
+ import logging
49
+ from enum import Enum , auto
50
+ from typing import List , Dict , Optional , Any , Set , Tuple
51
+ from dataclasses import dataclass
52
+
53
+ # Configure logging
54
+ logger = logging .getLogger ("anti-pattern-detector" )
55
+
56
+ class Severity (Enum ):
57
+ """Severity levels for anti-patterns"""
58
+ INFO = auto () # Informational only
59
+ WARNING = auto () # Warning that should be reviewed
60
+ ERROR = auto () # Error that should be fixed
61
+ CRITICAL = auto () # Critical issue that must be fixed
62
+
63
+ def __ge__ (self , other ):
64
+ if self .__class__ is other .__class__ :
65
+ return self .value >= other .value
66
+ return NotImplemented
67
+
68
+ @dataclass
69
+ class AntiPattern :
70
+ """Represents a detected anti-pattern in a spec file"""
71
+ id : str # Unique identifier for this type of anti-pattern
72
+ name : str # Human-readable name/title
73
+ description : str # Detailed description of the problem
74
+ severity : Severity # Severity level
75
+ file_path : str # Path to the file with the issue
76
+ line_number : Optional [int ] # Line number (if applicable)
77
+ context : Optional [str ] # Surrounding context from the file
78
+ recommendation : str # Suggested fix or improvement
79
+
80
+ class AntiPatternDetector :
81
+ """Detects common anti-patterns in spec files"""
82
+
83
+ def __init__ (self , repo_root : str ):
84
+ """
85
+ Initialize the anti-pattern detector.
86
+
87
+ Args:
88
+ repo_root: Root directory of the repository
89
+ """
90
+ self .repo_root = repo_root
91
+ logger .info ("Initialized AntiPatternDetector" )
92
+
93
+ # Define severity mapping for anti-patterns
94
+ # This allows for easy configuration of severity levels
95
+ self .severity_map = {
96
+ # Patch related issues
97
+ 'missing-patch-file' : Severity .ERROR ,
98
+ 'cve-patch-mismatch' : Severity .ERROR ,
99
+ 'unused-patch-file' : Severity .WARNING ,
100
+ 'patch-without-cve-ref' : Severity .WARNING ,
101
+
102
+ # CVE related issues
103
+ 'missing-cve-reference' : Severity .ERROR ,
104
+ 'invalid-cve-format' : Severity .ERROR ,
105
+ 'future-dated-cve' : Severity .ERROR ,
106
+ 'duplicate-cve-patch' : Severity .WARNING ,
107
+
108
+ # Changelog related issues
109
+ 'missing-changelog-entry' : Severity .ERROR ,
110
+ 'invalid-changelog-format' : Severity .WARNING ,
111
+ 'missing-cve-in-changelog' : Severity .ERROR ,
112
+ }
113
+
114
+ def detect_all (self , file_path : str , file_content : str ,
115
+ file_list : List [str ]) -> List [AntiPattern ]:
116
+ """
117
+ Run all anti-pattern detection methods on a spec file.
118
+
119
+ Args:
120
+ file_path: Path to the spec file relative to repo root
121
+ file_content: Content of the spec file
122
+ file_list: List of files in the same directory
123
+
124
+ Returns:
125
+ List of detected anti-patterns
126
+ """
127
+ logger .info (f"Running all anti-pattern detections on { file_path } " )
128
+
129
+ # Combined list of all detected anti-patterns
130
+ all_patterns = []
131
+
132
+ # Run each detection method and collect results
133
+ all_patterns .extend (self .detect_patch_file_issues (file_path , file_content , file_list ))
134
+ all_patterns .extend (self .detect_cve_issues (file_path , file_content ))
135
+ all_patterns .extend (self .detect_changelog_issues (file_path , file_content ))
136
+
137
+ # Return combined results
138
+ logger .info (f"Found { len (all_patterns )} anti-patterns in { file_path } " )
139
+ return all_patterns
140
+
141
+ def detect_patch_file_issues (self , file_path : str , file_content : str ,
142
+ file_list : List [str ]) -> List [AntiPattern ]:
143
+ """
144
+ Detect issues related to patch files.
145
+
146
+ Args:
147
+ file_path: Path to the spec file relative to repo root
148
+ file_content: Content of the spec file
149
+ file_list: List of files in the same directory
150
+
151
+ Returns:
152
+ List of detected patch-related anti-patterns
153
+ """
154
+ patterns = []
155
+
156
+ # Extract patch references from spec file
157
+ patch_refs = {}
158
+ pattern = r'^Patch(\d+):\s+(.+?)$'
159
+
160
+ for line_num , line in enumerate (file_content .splitlines (), 1 ):
161
+ match = re .match (pattern , line .strip ())
162
+ if match :
163
+ patch_num = match .group (1 )
164
+ patch_file = match .group (2 ).strip ()
165
+ patch_refs [patch_file ] = line_num
166
+
167
+ # Check if referenced patch file exists
168
+ if patch_file not in file_list :
169
+ patterns .append (AntiPattern (
170
+ id = 'missing-patch-file' ,
171
+ name = "Missing Patch File" ,
172
+ description = f"Patch file '{ patch_file } ' is referenced in the spec but not found in the directory" ,
173
+ severity = self .severity_map .get ('missing-patch-file' , Severity .ERROR ),
174
+ file_path = file_path ,
175
+ line_number = line_num ,
176
+ context = line .strip (),
177
+ recommendation = "Add the missing patch file or update the Patch reference"
178
+ ))
179
+
180
+ # Check for CVE patch naming conventions
181
+ for patch_file in file_list :
182
+ if patch_file .endswith ('.patch' ):
183
+ # Check if patch exists in spec file
184
+ if patch_file not in patch_refs :
185
+ patterns .append (AntiPattern (
186
+ id = 'unused-patch-file' ,
187
+ name = "Unused Patch File" ,
188
+ description = f"Patch file '{ patch_file } ' exists in directory but is not referenced in spec" ,
189
+ severity = self .severity_map .get ('unused-patch-file' , Severity .WARNING ),
190
+ file_path = file_path ,
191
+ line_number = None ,
192
+ context = None ,
193
+ recommendation = "Add a reference to the patch file or remove it if not needed"
194
+ ))
195
+
196
+ # Check if CVE patches match CVE references
197
+ if patch_file .startswith ('CVE-' ):
198
+ cve_id = re .match (r'(CVE-\d{4}-\d+)' , patch_file )
199
+ if cve_id and cve_id .group (1 ) not in file_content :
200
+ patterns .append (AntiPattern (
201
+ id = 'cve-patch-mismatch' ,
202
+ name = "CVE Patch Mismatch" ,
203
+ description = f"Patch file '{ patch_file } ' appears to fix { cve_id .group (1 )} but this CVE is not mentioned in the spec" ,
204
+ severity = self .severity_map .get ('cve-patch-mismatch' , Severity .ERROR ),
205
+ file_path = file_path ,
206
+ line_number = None ,
207
+ context = None ,
208
+ recommendation = f"Add { cve_id .group (1 )} to the spec file changelog entry"
209
+ ))
210
+
211
+ return patterns
212
+
213
+ def detect_cve_issues (self , file_path : str , file_content : str ) -> List [AntiPattern ]:
214
+ """
215
+ Detect issues related to CVE references.
216
+
217
+ Args:
218
+ file_path: Path to the spec file relative to repo root
219
+ file_content: Content of the spec file
220
+
221
+ Returns:
222
+ List of detected CVE-related anti-patterns
223
+ """
224
+ patterns = []
225
+
226
+ # Extract all CVE references
227
+ cve_pattern = r'CVE-(\d{4})-(\d{4,})'
228
+ cve_matches = list (re .finditer (cve_pattern , file_content ))
229
+
230
+ # Skip if no CVE references (may not be a security update)
231
+ if not cve_matches :
232
+ return patterns
233
+
234
+ # Check for duplicate CVEs
235
+ seen_cves = set ()
236
+ for match in cve_matches :
237
+ cve_id = match .group (0 )
238
+ if cve_id in seen_cves :
239
+ continue
240
+
241
+ seen_cves .add (cve_id )
242
+
243
+ # Get line number for context
244
+ line_num = file_content [:match .start ()].count ('\n ' ) + 1
245
+ line = file_content .splitlines ()[line_num - 1 ]
246
+
247
+ # Check future-dated CVEs
248
+ year = int (match .group (1 ))
249
+ if year > 2026 : # Adjust this date as needed
250
+ patterns .append (AntiPattern (
251
+ id = 'future-dated-cve' ,
252
+ name = "Future-Dated CVE" ,
253
+ description = f"CVE { cve_id } appears to be from the future (year { year } )" ,
254
+ severity = self .severity_map .get ('future-dated-cve' , Severity .ERROR ),
255
+ file_path = file_path ,
256
+ line_number = line_num ,
257
+ context = line .strip (),
258
+ recommendation = "Check if the CVE year is correct"
259
+ ))
260
+
261
+ # Check changelog for CVE references
262
+ changelog_pattern = r'%changelog(.*?)$'
263
+ changelog_match = re .search (changelog_pattern , file_content , re .DOTALL )
264
+
265
+ if changelog_match :
266
+ changelog_text = changelog_match .group (1 )
267
+
268
+ # Check entire changelog for CVE mentions, not just latest entry
269
+ # We consider any CVE mentioned anywhere in the changelog to be properly documented
270
+ missing_cves = set ()
271
+ for cve_id in seen_cves :
272
+ if cve_id not in changelog_text :
273
+ # This CVE is not mentioned in any changelog entry
274
+ missing_cves .add (cve_id )
275
+
276
+ # Report only CVEs that are truly missing from the entire changelog
277
+ for cve_id in missing_cves :
278
+ patterns .append (AntiPattern (
279
+ id = 'missing-cve-in-changelog' ,
280
+ name = "Missing CVE in Changelog" ,
281
+ description = f"{ cve_id } is referenced in the spec file but not mentioned in any changelog entry" ,
282
+ severity = self .severity_map .get ('missing-cve-in-changelog' , Severity .ERROR ),
283
+ file_path = file_path ,
284
+ line_number = None ,
285
+ context = None ,
286
+ recommendation = f"Add { cve_id } to a changelog entry"
287
+ ))
288
+
289
+ return patterns
290
+
291
+ def detect_changelog_issues (self , file_path : str , file_content : str ) -> List [AntiPattern ]:
292
+ """
293
+ Detect issues related to the changelog.
294
+
295
+ Args:
296
+ file_path: Path to the spec file relative to repo root
297
+ file_content: Content of the spec file
298
+
299
+ Returns:
300
+ List of detected changelog-related anti-patterns
301
+ """
302
+ patterns = []
303
+
304
+ # Check if changelog exists
305
+ if '%changelog' not in file_content :
306
+ patterns .append (AntiPattern (
307
+ id = 'missing-changelog-entry' ,
308
+ name = "Missing Changelog" ,
309
+ description = "Spec file does not contain a %changelog section" ,
310
+ severity = self .severity_map .get ('missing-changelog-entry' , Severity .ERROR ),
311
+ file_path = file_path ,
312
+ line_number = None ,
313
+ context = None ,
314
+ recommendation = "Add a %changelog section to the spec file"
315
+ ))
316
+ return patterns # Can't do further changelog checks
317
+
318
+ # Extract changelog
319
+ changelog_pattern = r'%changelog(.*?)$'
320
+ changelog_match = re .search (changelog_pattern , file_content , re .DOTALL )
321
+
322
+ if changelog_match :
323
+ changelog_text = changelog_match .group (1 ).strip ()
324
+
325
+ # Check if changelog follows expected format
326
+ entry_pattern = r'\*\s+\w+\s+\w+\s+\d+\s+\d{4}'
327
+ entries = re .finditer (entry_pattern , changelog_text )
328
+
329
+ entry_found = False
330
+ for entry_match in entries :
331
+ entry_found = True
332
+ entry_text = entry_match .group (0 )
333
+ line_num = file_content [:entry_match .start ()].count ('\n ' ) + 1
334
+
335
+ # Check if entry has standard format
336
+ if not re .match (r'\*\s+(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+[A-Z][a-z]+\s+\d{1,2}\s+\d{4}' , entry_text ):
337
+ patterns .append (AntiPattern (
338
+ id = 'invalid-changelog-format' ,
339
+ name = "Invalid Changelog Format" ,
340
+ description = f"Changelog entry '{ entry_text } ' does not follow standard format" ,
341
+ severity = self .severity_map .get ('invalid-changelog-format' , Severity .WARNING ),
342
+ file_path = file_path ,
343
+ line_number = line_num ,
344
+ context = entry_text ,
345
+ recommendation = "Use standard format: * Day Month DD YYYY User <email> - Version"
346
+ ))
347
+
348
+ if not entry_found :
349
+ patterns .append (AntiPattern (
350
+ id = 'missing-changelog-entry' ,
351
+ name = "Empty Changelog" ,
352
+ description = "Spec file has a %changelog section but no entries" ,
353
+ severity = self .severity_map .get ('missing-changelog-entry' , Severity .ERROR ),
354
+ file_path = file_path ,
355
+ line_number = None ,
356
+ context = None ,
357
+ recommendation = "Add changelog entries for all changes"
358
+ ))
359
+
360
+ return patterns
0 commit comments