Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions dev/tools/fix-boilerplate
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3
# Copyright 2025 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import os
import sys

# Assuming fix-boilerplate is in dev/tools and headers.py is in dev/tools/shared
script_dir = os.path.dirname(os.path.realpath(__file__))
# Add dev/tools to sys.path to allow importing shared.headers
sys.path.append(script_dir)

from shared import headers

def main():
# Find the repo root from the script's location
repo_root = os.path.abspath(os.path.join(script_dir, '..', '..'))

# Excludes from the original fix-boilerplate script
excludes = [
'_archived/**',
'databases/**',
'web/**',
]

print(f"Scanning for license headers in {repo_root}")
headers.apply_headers_to_tree(repo_root, excludes=excludes)
print("Done.")

if __name__ == "__main__":
main()
266 changes: 266 additions & 0 deletions dev/tools/shared/headers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
#!/usr/bin/env python3

# Copyright 2025 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import fnmatch
import argparse
import datetime

# The license header to apply
APACHE_HEADER = """Copyright {year} The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

# Mapping of file extensions to their comment syntax
# (line_prefix, block_start, block_end)
COMMENT_STYLES = {
".go": ("// ", None, None),
".sh": ("# ", None, None),
".py": ("# ", None, None),
".js": ("// ", None, None),
".ts": ("// ", None, None),
".java": ("// ", None, None),
".scala": ("// ", None, None),
".c": ("// ", None, None),
".h": ("// ", None, None),
".cpp": ("// ", None, None),
".tf": ("# ", None, None),
# Block comments for file types that support them
".css": (None, "/*", " */"),
".xml": (None, "<!--", "-->"),
".html": (None, "<!--", "-->"),
}

# Default glob patterns to exclude, relative to the root directory
DEFAULT_EXCLUDES = [
".git/**",
".idea/**",
"__pycache__/**",
"node_modules/**",
"vendor/**",
"**/*.yaml",
"**/*.yml",
"**/LICENSE",
"**/*.md",
"**/OWNERS",
"**/SECURITY_CONTACTS",
"go.mod",
"go.sum",
"*.json",
"*.pyc",
"*.so",
"*.o",
"*.a",
"*.dll",
"*.exe",
"*.jar",
"*.class",
"*.zip",
"*.tar.gz",
"*.tgz",
"*.rar",
"*.7z",
"*.log",
"*.sum",
"*.DS_Store",
]

def file_extension_magic(file_path):
"""Tries to determine the file type, as encoded by a typical extension."""
# Default to the file extension
_, ext = os.path.splitext(file_path)
if ext:
return ext
# Look for a shebang line
with open(file_path, 'r', encoding='utf-8') as f:
# Read the first 4k of the file, which should be enough for any header.
try:
content = f.read(4096)
except UnicodeDecodeError:
# Likely a binary file
return None
# First line is shebang (e.g., #!/usr/bin/env python)
first_line = content.split('\n', 1)[0]
if first_line.startswith("#!"):
if "python" in first_line:
return ".py"
if "bash" in first_line or "sh" in first_line:
return ".sh"
print((f"unknown shebang in {file_path}: {first_line}"))
return None

def get_comment_style(file_extension):
"""Gets the comment style for a file based on its extension."""
return COMMENT_STYLES.get(file_extension)

def format_header(header_text, style):
"""Formats the header text with the correct comment style."""
line_prefix, block_start, block_end = style

# Add a space for line prefixes if they don't have one
if line_prefix and not line_prefix.endswith(' '):
line_prefix += ' '

header_lines = header_text.strip().split('\n')

if line_prefix:
# Handle empty lines in header correctly
formatted_lines = [f"{line_prefix}{line}".rstrip() if line else line_prefix.rstrip() for line in header_lines]
return '\n'.join(formatted_lines) + '\n\n'

if block_start and block_end:
# Handle block comments
formatted_header = f"{block_start}\n"
formatted_header += '\n'.join(f" {line}".rstrip() if line else "" for line in header_lines)
formatted_header += f"\n{block_end}\n\n"
return formatted_header

return None


def has_license_header(file_path):
"""Checks if a file already has an Apache license header."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
# Read the first 4k of the file, which should be enough for any header.
content = f.read(4096)
if not "Licensed under the Apache License, Version 2.0" in content:
return False
if not "The Kubernetes Authors" in content:
return False
return True
except Exception as e:
# print(f"Could not read file {file_path}: {e}")
return True # Skip file on error


def apply_license_header(file_path, header_text, dry_run=False):
"""Applies the license header to a single file if it doesn't have one."""

file_extension = file_extension_magic(file_path)
if not file_extension:
# print(f"Skipping (unknown file type): {file_path}")
return


if has_license_header(file_path):
# print(f"Skipping (header exists): {file_path}")
return

style = get_comment_style(file_extension)
if not style:
# print(f"Skipping (unsupported extension): {file_path}")
return

formatted_header = format_header(header_text, style)
if not formatted_header:
# print(f"Skipping (could not format header): {file_path}")
return

print(f"Applying header to: {file_path}")
if not dry_run:
try:
with open(file_path, 'r+', encoding='utf-8') as f:
content = f.read()
f.seek(0, 0)
# Handle shebangs (e.g., #!/usr/bin/env python)
if content.startswith("#!"):
lines = content.split('\n', 1)
shebang = lines[0]
rest_of_content = lines[1] if len(lines) > 1 else ""
f.write(shebang + '\n' + formatted_header + rest_of_content)
else:
f.write(formatted_header + content)
except Exception as e:
print(f"Could not write to file {file_path}: {e}")


def _match_path_parts(path_parts, pattern_parts):
"""Recursively matches path components against pattern components."""
if not pattern_parts:
return not path_parts
if not path_parts:
return pattern_parts == ['**'] or all(p == '' for p in pattern_parts)

p_part = pattern_parts[0]
if p_part == '**':
if len(pattern_parts) == 1:
return True # `/**` at the end matches everything remaining
# `/**/` can match zero or more directories.
for i in range(len(path_parts) + 1):
if _match_path_parts(path_parts[i:], pattern_parts[1:]):
return True
return False
else:
if fnmatch.fnmatch(path_parts[0], p_part):
return _match_path_parts(path_parts[1:], pattern_parts[1:])
return False

def is_path_excluded(relative_path, exclude_patterns):
"""Checks if a relative path matches any of the .gitignore-style exclude patterns."""
relative_path = relative_path.replace(os.path.sep, '/')
path_parts = relative_path.split('/')

for pattern in exclude_patterns:
pattern = pattern.replace(os.path.sep, '/')
if '/' not in pattern:
# If no slash, match against any component of the path
if any(fnmatch.fnmatch(part, pattern) for part in path_parts):
return True
else:
# If slash is present, match from the root
pattern_parts = pattern.split('/')
if _match_path_parts(path_parts, pattern_parts):
return True
return False


def apply_headers_to_tree(root_dir, excludes=None, dry_run=False):
"""
Applies headers to all files in a repository, respecting excludes.
"""
year = datetime.datetime.now().year
header_text = APACHE_HEADER.format(year=year)

all_excludes = DEFAULT_EXCLUDES + (excludes or [])
print(f"Excluding patterns: {all_excludes}")

for root, dirs, files in os.walk(root_dir, topdown=True):
rel_root = os.path.relpath(root, root_dir)
if rel_root == '.':
rel_root = ''

# Filter dirs in-place so os.walk doesn't recurse into them
dirs[:] = [d for d in dirs if not is_path_excluded(os.path.join(rel_root, d), all_excludes)]

for file in files:
rel_path = os.path.join(rel_root, file)
if is_path_excluded(rel_path, all_excludes):
continue

full_path = os.path.join(root, file)
apply_license_header(full_path, header_text, dry_run)