77Handles C++ files (.cpp, .hpp, .h) and Python files (.py).
88"""
99
10+ import difflib
1011import os
12+ import re
1113import sys
1214from datetime import datetime
1315from pathlib import Path
@@ -34,6 +36,31 @@ def is_supported_file_type(file_path: str) -> bool:
3436 return get_file_type_info (file_path ) is not None
3537
3638
39+ def get_encoding_line_number (file_path : str ) -> int :
40+ """
41+ Detect encoding declaration in Python files.
42+ Returns the line number (0-indexed) if found, -1 otherwise.
43+ Encoding must be in the first or second line per PEP 263.
44+ """
45+ if not file_path .endswith ('.py' ):
46+ return - 1
47+
48+ encoding_pattern = r'^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)'
49+
50+ try :
51+ with open (file_path , 'r' , encoding = 'utf-8-sig' , errors = 'ignore' ) as f :
52+ for i in range (2 ): # Check first two lines per PEP 263
53+ line = f .readline ()
54+ if not line :
55+ break
56+ if re .match (encoding_pattern , line ):
57+ return i
58+ except Exception :
59+ pass
60+
61+ return - 1
62+
63+
3764def get_expected_header (file_path : str ) -> str :
3865 """Get the expected copyright header for a file."""
3966 info = get_file_type_info (file_path )
@@ -55,10 +82,18 @@ def should_check_file(file_path: str) -> bool:
5582
5683
5784def get_file_header (file_path : str , num_lines : int = 3 ) -> str :
58- """Read the first few lines of a file, handling BOM if present."""
85+ """Read the first few lines of a file, handling BOM and encoding declaration if present."""
5986 try :
6087 with open (file_path , 'r' , encoding = 'utf-8-sig' , errors = 'ignore' ) as f :
61- return '' .join (f .readline () for _ in range (num_lines ))
88+ all_lines = [f .readline () for _ in range (num_lines + 2 )] # Read extra lines to account for encoding
89+
90+ # For Python files, skip the encoding line when checking copyright
91+ encoding_line = get_encoding_line_number (file_path )
92+ if encoding_line >= 0 :
93+ # Skip encoding line and return the copyright-relevant lines
94+ return '' .join (all_lines [encoding_line + 1 :encoding_line + 1 + num_lines ])
95+
96+ return '' .join (all_lines [:num_lines ])
6297 except Exception as e :
6398 print (f"Warning: Could not read { file_path } : { e } " )
6499 return ""
@@ -82,39 +117,54 @@ def generate_diff(file_path: str) -> str:
82117 """Generate a unified diff for fixing the copyright header."""
83118 try :
84119 with open (file_path , 'r' , encoding = 'utf-8-sig' , errors = 'ignore' ) as f :
85- lines = f .read (). split ( ' \n ' )
120+ original_lines = f .readlines ( )
86121 except Exception as e :
87122 raise IOError (f"Could not read { file_path } : { e } " )
88-
123+
89124 info = get_file_type_info (file_path )
90125 if not info :
91126 return ""
92-
127+
93128 comment_style , has_extra_line = info
94129 current_year = datetime .now ().year
95- has_copyright = any ('Copyright' in line or 'SPDX-License-Identifier' in line
96- for line in lines [:5 ])
97-
98- diff_lines = [f"--- a/{ file_path } " , f"+++ b/{ file_path } " ]
99-
130+
131+ encoding_line = get_encoding_line_number (file_path )
132+ header_start = encoding_line + 1 if encoding_line >= 0 else 0
133+
134+ has_copyright = any ('Copyright' in line or 'SPDX-License-Identifier' in line
135+ for line in original_lines [header_start :header_start + 5 ])
136+
137+ correct_header = [
138+ f"{ comment_style } Copyright (C) 2018-{ current_year } Intel Corporation\n " ,
139+ f"{ comment_style } SPDX-License-Identifier: Apache-2.0\n " ,
140+ ]
141+ if has_extra_line :
142+ correct_header .append (f"{ comment_style } \n " )
143+ correct_header .append ("\n " )
144+
145+ corrected_lines = list (original_lines )
100146 if has_copyright :
101- # Wrong year - replace first line only
102- diff_lines .extend ([
103- "@@ -1 +1 @@" ,
104- f"-{ lines [0 ]} " ,
105- f"+{ comment_style } Copyright (C) 2018-{ current_year } Intel Corporation"
106- ])
147+ # Walk the actual contiguous comment block so we never overwrite lines beyond it.
148+ block_end = header_start
149+ while block_end < len (original_lines ) and original_lines [block_end ].startswith (comment_style ):
150+ block_end += 1
151+ # Include the single trailing blank line that separates the header from code
152+ if block_end < len (original_lines ) and original_lines [block_end ].strip () == '' :
153+ block_end += 1
154+ corrected_lines [header_start :block_end ] = correct_header
107155 else :
108- # Missing copyright - insert at beginning
109- num_lines = 4 if has_extra_line else 3
110- diff_lines .append (f"@@ -0,0 +1,{ num_lines } @@" )
111- diff_lines .append (f"+{ comment_style } Copyright (C) 2018-{ current_year } Intel Corporation" )
112- diff_lines .append (f"+{ comment_style } SPDX-License-Identifier: Apache-2.0" )
113- if has_extra_line :
114- diff_lines .append (f"+{ comment_style } " )
115- diff_lines .append ("+" )
116-
117- return '\n ' .join (diff_lines ) + '\n '
156+ # Insert the header at the right position (after encoding line if present)
157+ corrected_lines [header_start :header_start ] = correct_header
158+
159+ normalized_path = file_path .lstrip ('/' )
160+ diff = difflib .unified_diff (
161+ [l .rstrip ('\n ' ) for l in original_lines ],
162+ [l .rstrip ('\n ' ) for l in corrected_lines ],
163+ fromfile = f"a/{ normalized_path } " ,
164+ tofile = f"b/{ normalized_path } " ,
165+ lineterm = '' ,
166+ )
167+ return '\n ' .join (diff ) + '\n '
118168
119169
120170def main ():
0 commit comments