Skip to content

Commit 72770d7

Browse files
committed
Add SPDX license header check workflow and script
1 parent 684d8ad commit 72770d7

File tree

2 files changed

+296
-0
lines changed

2 files changed

+296
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
name: License Headers
5+
6+
on:
7+
push:
8+
branches: [main]
9+
pull_request:
10+
branches: [main]
11+
12+
permissions:
13+
contents: read
14+
15+
jobs:
16+
license-headers:
17+
name: Check License Headers
18+
runs-on: ubuntu-latest
19+
steps:
20+
- uses: actions/checkout@v4
21+
22+
- uses: actions/setup-python@v5
23+
with:
24+
python-version: "3.12"
25+
26+
- name: Check SPDX license headers
27+
run: python scripts/check_license_headers.py --check

scripts/check_license_headers.py

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
"""Check or add SPDX license headers on source files.
7+
8+
Usage:
9+
# Check mode (CI) — exit 1 if any file is missing a header
10+
python scripts/check_license_headers.py --check
11+
12+
# Add/update headers on all source files
13+
python scripts/check_license_headers.py
14+
15+
# Operate on specific files only
16+
python scripts/check_license_headers.py path/to/file.py
17+
"""
18+
19+
from __future__ import annotations
20+
21+
import argparse
22+
import os
23+
import sys
24+
from pathlib import Path
25+
26+
# ---------------------------------------------------------------------------
27+
# Configuration
28+
# ---------------------------------------------------------------------------
29+
30+
COPYRIGHT_TEXT = (
31+
"Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved."
32+
)
33+
LICENSE_ID = "Apache-2.0"
34+
35+
# Map file extensions to their line-comment prefix.
36+
COMMENT_STYLES: dict[str, str] = {
37+
".py": "#",
38+
".sh": "#",
39+
".yaml": "#",
40+
".yml": "#",
41+
".toml": "#",
42+
}
43+
44+
# Directories to skip entirely (relative to repo root).
45+
EXCLUDE_DIRS: set[str] = {
46+
".git",
47+
".github",
48+
".venv",
49+
"__pycache__",
50+
}
51+
52+
# Individual filenames to skip.
53+
EXCLUDE_FILES: set[str] = {
54+
".gitkeep",
55+
}
56+
57+
# ---------------------------------------------------------------------------
58+
# Header generation
59+
# ---------------------------------------------------------------------------
60+
61+
62+
def make_header(comment: str) -> str:
63+
"""Return the two-line SPDX header for a given comment prefix."""
64+
return (
65+
f"{comment} SPDX-FileCopyrightText: {COPYRIGHT_TEXT}\n"
66+
f"{comment} SPDX-License-Identifier: {LICENSE_ID}\n"
67+
)
68+
69+
70+
# ---------------------------------------------------------------------------
71+
# File discovery
72+
# ---------------------------------------------------------------------------
73+
74+
75+
def find_repo_root() -> Path:
76+
"""Walk up from CWD to find the directory containing .git."""
77+
path = Path.cwd()
78+
while path != path.parent:
79+
if (path / ".git").exists():
80+
return path
81+
path = path.parent
82+
return Path.cwd()
83+
84+
85+
def is_excluded(rel: Path) -> bool:
86+
"""Return True if a path should be skipped."""
87+
rel_str = str(rel)
88+
89+
if rel.name in EXCLUDE_FILES:
90+
return True
91+
92+
for exc_dir in EXCLUDE_DIRS:
93+
if rel_str == exc_dir or rel_str.startswith(exc_dir + "/"):
94+
return True
95+
96+
return False
97+
98+
99+
def is_dockerfile(path: Path) -> bool:
100+
"""Return True for Dockerfile variants (matched by name, not extension)."""
101+
return path.name == "Dockerfile" or path.name.startswith("Dockerfile.")
102+
103+
104+
def get_comment_style(path: Path) -> str | None:
105+
"""Return the comment prefix for a file, or None if unsupported."""
106+
if is_dockerfile(path):
107+
return "#"
108+
return COMMENT_STYLES.get(path.suffix)
109+
110+
111+
def discover_files(root: Path) -> list[Path]:
112+
"""Walk the repo and return all files that should have headers."""
113+
results = []
114+
for dirpath, dirnames, filenames in os.walk(root):
115+
rel_dir = Path(dirpath).relative_to(root)
116+
117+
dirnames[:] = [d for d in dirnames if not is_excluded(rel_dir / d)]
118+
119+
for fname in filenames:
120+
fpath = Path(dirpath) / fname
121+
rel = fpath.relative_to(root)
122+
if is_excluded(rel):
123+
continue
124+
if get_comment_style(rel) is not None:
125+
results.append(fpath)
126+
127+
return sorted(results)
128+
129+
130+
# ---------------------------------------------------------------------------
131+
# Header checking and insertion
132+
# ---------------------------------------------------------------------------
133+
134+
SPDX_MARKER = "SPDX-License-Identifier"
135+
136+
137+
def has_header(lines: list[str]) -> bool:
138+
"""Check if the SPDX header is present in the first 10 lines."""
139+
for line in lines[:10]:
140+
if SPDX_MARKER in line:
141+
return True
142+
return False
143+
144+
145+
def find_insertion_point(lines: list[str], path: Path) -> int:
146+
"""Determine where to insert the header."""
147+
if not lines:
148+
return 0
149+
150+
first = lines[0]
151+
152+
if first.startswith("#!"):
153+
return 1
154+
155+
if is_dockerfile(path) and first.lower().startswith("# syntax="):
156+
return 1
157+
158+
return 0
159+
160+
161+
def insert_header(content: str, comment: str, path: Path) -> str:
162+
"""Insert the SPDX header into file content, returning the new content."""
163+
header = make_header(comment)
164+
lines = content.splitlines(keepends=True)
165+
insert_at = find_insertion_point(lines, path)
166+
167+
if insert_at == 0:
168+
if lines:
169+
return header + "\n" + content
170+
return header
171+
else:
172+
before = lines[:insert_at]
173+
after = lines[insert_at:]
174+
return "".join(before) + "\n" + header + "\n" + "".join(after)
175+
176+
177+
# ---------------------------------------------------------------------------
178+
# Main logic
179+
# ---------------------------------------------------------------------------
180+
181+
182+
def process_file(path: Path, root: Path, *, check: bool, verbose: bool) -> bool:
183+
"""Process a single file. Returns True if the file is compliant."""
184+
rel = path.relative_to(root)
185+
comment = get_comment_style(rel)
186+
if comment is None:
187+
return True
188+
189+
content = path.read_text(encoding="utf-8")
190+
lines = content.splitlines()
191+
192+
if has_header(lines):
193+
if verbose:
194+
print(f" ok: {rel}")
195+
return True
196+
197+
if check:
198+
print(f" MISSING: {rel}")
199+
return False
200+
201+
new_content = insert_header(content, comment, rel)
202+
path.write_text(new_content, encoding="utf-8")
203+
if verbose:
204+
print(f" added: {rel}")
205+
return True
206+
207+
208+
def main() -> int:
209+
parser = argparse.ArgumentParser(
210+
description="Check or add SPDX license headers on source files.",
211+
)
212+
parser.add_argument(
213+
"--check",
214+
action="store_true",
215+
help="Check mode: exit 1 if any file is missing a header.",
216+
)
217+
parser.add_argument(
218+
"--verbose",
219+
"-v",
220+
action="store_true",
221+
help="Print status for every file processed.",
222+
)
223+
parser.add_argument(
224+
"paths",
225+
nargs="*",
226+
type=Path,
227+
help="Specific files to process (default: all files under repo root).",
228+
)
229+
args = parser.parse_args()
230+
231+
root = find_repo_root()
232+
233+
if args.paths:
234+
files = []
235+
for p in args.paths:
236+
p = p.resolve()
237+
if not p.is_file():
238+
continue
239+
rel = p.relative_to(root)
240+
if is_excluded(rel):
241+
continue
242+
if get_comment_style(rel) is not None:
243+
files.append(p)
244+
else:
245+
files = discover_files(root)
246+
247+
if args.check:
248+
print(f"Checking {len(files)} files for SPDX headers...")
249+
else:
250+
print(f"Processing {len(files)} files...")
251+
252+
missing = []
253+
for f in files:
254+
if not process_file(f, root, check=args.check, verbose=args.verbose):
255+
missing.append(f)
256+
257+
if args.check:
258+
if missing:
259+
print(f"\n{len(missing)} file(s) missing SPDX headers.")
260+
return 1
261+
print("All files have SPDX headers.")
262+
return 0
263+
264+
print("Done.")
265+
return 0
266+
267+
268+
if __name__ == "__main__":
269+
sys.exit(main())

0 commit comments

Comments
 (0)