1+ #!/usr/bin/env python3
2+ # Copyright (c) Microsoft Corporation.
3+ # Licensed under the MIT License.
4+
5+ """
6+ AntiPatternDetector
7+ ------------------
8+ Detects anti-patterns in spec files and related artifacts.
9+
10+ This module provides systematic detection of common problems in spec files,
11+ with configurable severity levels and detailed reporting.
12+
13+ Functions:
14+ ----------
15+ detect_all():
16+ Main entry point that runs all anti-pattern detection methods on a spec file.
17+ Combines results from patch file, CVE, and changelog issue detection.
18+
19+ detect_patch_file_issues():
20+ Detects patch file related problems:
21+ - Missing patch files referenced in spec but not found in directory
22+ - Unused patch files present in directory but not referenced in spec
23+ - CVE patch naming mismatches (CVE-named patches without corresponding CVE documentation)
24+
25+ detect_cve_issues():
26+ Detects CVE reference related problems:
27+ - Future-dated CVEs (CVE years beyond current expected range)
28+ - Missing CVE documentation in changelog (CVEs referenced in spec but not in changelog)
29+ - Validates CVE format and cross-references with changelog entries
30+
31+ detect_changelog_issues():
32+ Detects changelog format and content problems:
33+ - Missing %changelog section entirely
34+ - Empty changelog sections with no entries
35+ - Invalid changelog entry format (non-standard RPM changelog format)
36+ - Validates standard format: * Day Month DD YYYY User <email> - Version
37+
38+ Severity Levels:
39+ ---------------
40+ - CRITICAL: Must be fixed before merge
41+ - ERROR: Should be fixed before merge
42+ - WARNING: Review recommended but doesn't block merge
43+ - INFO: Informational only
44+ """
45+
46+ import os
47+ import re
48+ import logging
49+ from enum import Enum , auto
50+ from typing import List , Dict , Optional , Any , Set , Tuple
51+ from dataclasses import dataclass
52+
53+ # Configure logging
54+ logger = logging .getLogger ("anti-pattern-detector" )
55+
56+ class Severity (Enum ):
57+ """Severity levels for anti-patterns"""
58+ INFO = auto () # Informational only
59+ WARNING = auto () # Warning that should be reviewed
60+ ERROR = auto () # Error that should be fixed
61+ CRITICAL = auto () # Critical issue that must be fixed
62+
63+ def __ge__ (self , other ):
64+ if self .__class__ is other .__class__ :
65+ return self .value >= other .value
66+ return NotImplemented
67+
68+ @dataclass
69+ class AntiPattern :
70+ """Represents a detected anti-pattern in a spec file"""
71+ id : str # Unique identifier for this type of anti-pattern
72+ name : str # Human-readable name/title
73+ description : str # Detailed description of the problem
74+ severity : Severity # Severity level
75+ file_path : str # Path to the file with the issue
76+ line_number : Optional [int ] # Line number (if applicable)
77+ context : Optional [str ] # Surrounding context from the file
78+ recommendation : str # Suggested fix or improvement
79+
80+ class AntiPatternDetector :
81+ """Detects common anti-patterns in spec files"""
82+
83+ def __init__ (self , repo_root : str ):
84+ """
85+ Initialize the anti-pattern detector.
86+
87+ Args:
88+ repo_root: Root directory of the repository
89+ """
90+ self .repo_root = repo_root
91+ logger .info ("Initialized AntiPatternDetector" )
92+
93+ # Define severity mapping for anti-patterns
94+ # This allows for easy configuration of severity levels
95+ self .severity_map = {
96+ # Patch related issues
97+ 'missing-patch-file' : Severity .ERROR ,
98+ 'cve-patch-mismatch' : Severity .ERROR ,
99+ 'unused-patch-file' : Severity .WARNING ,
100+ 'patch-without-cve-ref' : Severity .WARNING ,
101+
102+ # CVE related issues
103+ 'missing-cve-reference' : Severity .ERROR ,
104+ 'invalid-cve-format' : Severity .ERROR ,
105+ 'future-dated-cve' : Severity .ERROR ,
106+ 'duplicate-cve-patch' : Severity .WARNING ,
107+
108+ # Changelog related issues
109+ 'missing-changelog-entry' : Severity .ERROR ,
110+ 'invalid-changelog-format' : Severity .WARNING ,
111+ 'missing-cve-in-changelog' : Severity .ERROR ,
112+ }
113+
114+ def detect_all (self , file_path : str , file_content : str ,
115+ file_list : List [str ]) -> List [AntiPattern ]:
116+ """
117+ Run all anti-pattern detection methods on a spec file.
118+
119+ Args:
120+ file_path: Path to the spec file relative to repo root
121+ file_content: Content of the spec file
122+ file_list: List of files in the same directory
123+
124+ Returns:
125+ List of detected anti-patterns
126+ """
127+ logger .info (f"Running all anti-pattern detections on { file_path } " )
128+
129+ # Combined list of all detected anti-patterns
130+ all_patterns = []
131+
132+ # Run each detection method and collect results
133+ all_patterns .extend (self .detect_patch_file_issues (file_path , file_content , file_list ))
134+ all_patterns .extend (self .detect_cve_issues (file_path , file_content ))
135+ all_patterns .extend (self .detect_changelog_issues (file_path , file_content ))
136+
137+ # Return combined results
138+ logger .info (f"Found { len (all_patterns )} anti-patterns in { file_path } " )
139+ return all_patterns
140+
141+ def detect_patch_file_issues (self , file_path : str , file_content : str ,
142+ file_list : List [str ]) -> List [AntiPattern ]:
143+ """
144+ Detect issues related to patch files.
145+
146+ Args:
147+ file_path: Path to the spec file relative to repo root
148+ file_content: Content of the spec file
149+ file_list: List of files in the same directory
150+
151+ Returns:
152+ List of detected patch-related anti-patterns
153+ """
154+ patterns = []
155+
156+ # Extract patch references from spec file
157+ patch_refs = {}
158+ pattern = r'^Patch(\d+):\s+(.+?)$'
159+
160+ for line_num , line in enumerate (file_content .splitlines (), 1 ):
161+ match = re .match (pattern , line .strip ())
162+ if match :
163+ patch_num = match .group (1 )
164+ patch_file = match .group (2 ).strip ()
165+ patch_refs [patch_file ] = line_num
166+
167+ # Check if referenced patch file exists
168+ if patch_file not in file_list :
169+ patterns .append (AntiPattern (
170+ id = 'missing-patch-file' ,
171+ name = "Missing Patch File" ,
172+ description = f"Patch file '{ patch_file } ' is referenced in the spec but not found in the directory" ,
173+ severity = self .severity_map .get ('missing-patch-file' , Severity .ERROR ),
174+ file_path = file_path ,
175+ line_number = line_num ,
176+ context = line .strip (),
177+ recommendation = "Add the missing patch file or update the Patch reference"
178+ ))
179+
180+ # Check for CVE patch naming conventions
181+ for patch_file in file_list :
182+ if patch_file .endswith ('.patch' ):
183+ # Check if patch exists in spec file
184+ if patch_file not in patch_refs :
185+ patterns .append (AntiPattern (
186+ id = 'unused-patch-file' ,
187+ name = "Unused Patch File" ,
188+ description = f"Patch file '{ patch_file } ' exists in directory but is not referenced in spec" ,
189+ severity = self .severity_map .get ('unused-patch-file' , Severity .WARNING ),
190+ file_path = file_path ,
191+ line_number = None ,
192+ context = None ,
193+ recommendation = "Add a reference to the patch file or remove it if not needed"
194+ ))
195+
196+ # Check if CVE patches match CVE references
197+ if patch_file .startswith ('CVE-' ):
198+ cve_id = re .match (r'(CVE-\d{4}-\d+)' , patch_file )
199+ if cve_id and cve_id .group (1 ) not in file_content :
200+ patterns .append (AntiPattern (
201+ id = 'cve-patch-mismatch' ,
202+ name = "CVE Patch Mismatch" ,
203+ description = f"Patch file '{ patch_file } ' appears to fix { cve_id .group (1 )} but this CVE is not mentioned in the spec" ,
204+ severity = self .severity_map .get ('cve-patch-mismatch' , Severity .ERROR ),
205+ file_path = file_path ,
206+ line_number = None ,
207+ context = None ,
208+ recommendation = f"Add { cve_id .group (1 )} to the spec file changelog entry"
209+ ))
210+
211+ return patterns
212+
213+ def detect_cve_issues (self , file_path : str , file_content : str ) -> List [AntiPattern ]:
214+ """
215+ Detect issues related to CVE references.
216+
217+ Args:
218+ file_path: Path to the spec file relative to repo root
219+ file_content: Content of the spec file
220+
221+ Returns:
222+ List of detected CVE-related anti-patterns
223+ """
224+ patterns = []
225+
226+ # Extract all CVE references
227+ cve_pattern = r'CVE-(\d{4})-(\d{4,})'
228+ cve_matches = list (re .finditer (cve_pattern , file_content ))
229+
230+ # Skip if no CVE references (may not be a security update)
231+ if not cve_matches :
232+ return patterns
233+
234+ # Check for duplicate CVEs
235+ seen_cves = set ()
236+ for match in cve_matches :
237+ cve_id = match .group (0 )
238+ if cve_id in seen_cves :
239+ continue
240+
241+ seen_cves .add (cve_id )
242+
243+ # Get line number for context
244+ line_num = file_content [:match .start ()].count ('\n ' ) + 1
245+ line = file_content .splitlines ()[line_num - 1 ]
246+
247+ # Check future-dated CVEs
248+ year = int (match .group (1 ))
249+ if year > 2026 : # Adjust this date as needed
250+ patterns .append (AntiPattern (
251+ id = 'future-dated-cve' ,
252+ name = "Future-Dated CVE" ,
253+ description = f"CVE { cve_id } appears to be from the future (year { year } )" ,
254+ severity = self .severity_map .get ('future-dated-cve' , Severity .ERROR ),
255+ file_path = file_path ,
256+ line_number = line_num ,
257+ context = line .strip (),
258+ recommendation = "Check if the CVE year is correct"
259+ ))
260+
261+ # Check changelog for CVE references
262+ changelog_pattern = r'%changelog(.*?)$'
263+ changelog_match = re .search (changelog_pattern , file_content , re .DOTALL )
264+
265+ if changelog_match :
266+ changelog_text = changelog_match .group (1 )
267+
268+ # Check entire changelog for CVE mentions, not just latest entry
269+ # We consider any CVE mentioned anywhere in the changelog to be properly documented
270+ missing_cves = set ()
271+ for cve_id in seen_cves :
272+ if cve_id not in changelog_text :
273+ # This CVE is not mentioned in any changelog entry
274+ missing_cves .add (cve_id )
275+
276+ # Report only CVEs that are truly missing from the entire changelog
277+ for cve_id in missing_cves :
278+ patterns .append (AntiPattern (
279+ id = 'missing-cve-in-changelog' ,
280+ name = "Missing CVE in Changelog" ,
281+ description = f"{ cve_id } is referenced in the spec file but not mentioned in any changelog entry" ,
282+ severity = self .severity_map .get ('missing-cve-in-changelog' , Severity .ERROR ),
283+ file_path = file_path ,
284+ line_number = None ,
285+ context = None ,
286+ recommendation = f"Add { cve_id } to a changelog entry"
287+ ))
288+
289+ return patterns
290+
291+ def detect_changelog_issues (self , file_path : str , file_content : str ) -> List [AntiPattern ]:
292+ """
293+ Detect issues related to the changelog.
294+
295+ Args:
296+ file_path: Path to the spec file relative to repo root
297+ file_content: Content of the spec file
298+
299+ Returns:
300+ List of detected changelog-related anti-patterns
301+ """
302+ patterns = []
303+
304+ # Check if changelog exists
305+ if '%changelog' not in file_content :
306+ patterns .append (AntiPattern (
307+ id = 'missing-changelog-entry' ,
308+ name = "Missing Changelog" ,
309+ description = "Spec file does not contain a %changelog section" ,
310+ severity = self .severity_map .get ('missing-changelog-entry' , Severity .ERROR ),
311+ file_path = file_path ,
312+ line_number = None ,
313+ context = None ,
314+ recommendation = "Add a %changelog section to the spec file"
315+ ))
316+ return patterns # Can't do further changelog checks
317+
318+ # Extract changelog
319+ changelog_pattern = r'%changelog(.*?)$'
320+ changelog_match = re .search (changelog_pattern , file_content , re .DOTALL )
321+
322+ if changelog_match :
323+ changelog_text = changelog_match .group (1 ).strip ()
324+
325+ # Check if changelog follows expected format
326+ entry_pattern = r'\*\s+\w+\s+\w+\s+\d+\s+\d{4}'
327+ entries = re .finditer (entry_pattern , changelog_text )
328+
329+ entry_found = False
330+ for entry_match in entries :
331+ entry_found = True
332+ entry_text = entry_match .group (0 )
333+ line_num = file_content [:entry_match .start ()].count ('\n ' ) + 1
334+
335+ # Check if entry has standard format
336+ if not re .match (r'\*\s+(Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+[A-Z][a-z]+\s+\d{1,2}\s+\d{4}' , entry_text ):
337+ patterns .append (AntiPattern (
338+ id = 'invalid-changelog-format' ,
339+ name = "Invalid Changelog Format" ,
340+ description = f"Changelog entry '{ entry_text } ' does not follow standard format" ,
341+ severity = self .severity_map .get ('invalid-changelog-format' , Severity .WARNING ),
342+ file_path = file_path ,
343+ line_number = line_num ,
344+ context = entry_text ,
345+ recommendation = "Use standard format: * Day Month DD YYYY User <email> - Version"
346+ ))
347+
348+ if not entry_found :
349+ patterns .append (AntiPattern (
350+ id = 'missing-changelog-entry' ,
351+ name = "Empty Changelog" ,
352+ description = "Spec file has a %changelog section but no entries" ,
353+ severity = self .severity_map .get ('missing-changelog-entry' , Severity .ERROR ),
354+ file_path = file_path ,
355+ line_number = None ,
356+ context = None ,
357+ recommendation = "Add changelog entries for all changes"
358+ ))
359+
360+ return patterns
0 commit comments