Skip to content

Commit a717586

Browse files
committed
[gdb/contrib] Add script to license check new files
While reading through gdb-patches backlog after a return from PTO, I noticed that a newly added file was licensed with "MIT", and that license was not listed in Fedora's gdb.spec file. [Fedora no longer supports "effective" licenses.] That lead me to this simple script which generates a list of all the newly added files between two given commits and scans these files for licenses. Example usage: bash$ cd /path/to/binutils-gdb/gdb bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint Scanning directories gdb*/... gdb/contrib/common-misspellings.txt: no longer in repo? gdb/contrib/spellcheck.sh: no longer in repo? gdbsupport/unordered_dense.h: MIT I don't think anything in here is Fedora- or RPM-specific, so I'd like to submit this for consideration for inclusion in contrib/. I believe other distros may find it useful. Approved-By: Tom Tromey <[email protected]>
1 parent f79a8e5 commit a717586

File tree

1 file changed

+149
-0
lines changed

1 file changed

+149
-0
lines changed
Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
#!/usr/bin/env python3
2+
3+
# Copyright (C) 2025 Free Software Foundation, Inc.
4+
#
5+
# This file is part of GDB.
6+
#
7+
# This program is free software; you can redistribute it and/or modify
8+
# it under the terms of the GNU General Public License as published by
9+
# the Free Software Foundation; either version 3 of the License, or
10+
# (at your option) any later version.
11+
#
12+
# This program is distributed in the hope that it will be useful,
13+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
# GNU General Public License for more details.
16+
#
17+
# You should have received a copy of the GNU General Public License
18+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
19+
20+
# This program requires the python modules GitPython (git) and scancode-toolkit.
21+
# It builds a list of all the newly added files to the repository and scans
22+
# each file for a license, printing it to the terminal. If "--skip" is used,
23+
# it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later".
24+
# This makes it a little bit easier to detect any possible new licenses.
25+
#
26+
# Example:
27+
# bash$ cd /path/to/binutils-gdb/gdb
28+
# bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint
29+
# Scanning directories gdb*/...
30+
# gdb/contrib/common-misspellings.txt: no longer in repo?
31+
# gdb/contrib/spellcheck.sh: no longer in repo?
32+
# gdbsupport/unordered_dense.h: MIT
33+
34+
import os
35+
import sys
36+
import argparse
37+
from pathlib import PurePath
38+
from git import Repo
39+
from scancode import api
40+
41+
# A list of "common" licenses. If "--skip" is used, any file
42+
# with a license in this list will be omitted from the output.
43+
COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"]
44+
45+
# Default list of directories to scan. Default scans are limited to
46+
# gdb-specific git directories because much of the rest of binutils-gdb
47+
# is actually owned by other projects/packages.
48+
DEFAULT_SCAN_DIRS = "gdb*"
49+
50+
51+
# Get the commit object associated with the string commit CSTR
52+
# from the git repository REPO.
53+
#
54+
# Returns the object or prints an error and exits.
55+
def get_commit(repo, cstr):
56+
try:
57+
return repo.commit(cstr)
58+
except:
59+
print(f'unknown commit "{cstr}"')
60+
sys.exit(2)
61+
62+
63+
# Uses scancode-toolkit package to scan FILE's licenses.
64+
# Returns the full license dict from scancode on success or
65+
# propagates any exceptions.
66+
def get_licenses_for_file(file):
67+
return api.get_licenses(file)
68+
69+
70+
# Helper function to print FILE to the terminal if skipping
71+
# common licenses.
72+
def skip_print_file(skip, file):
73+
if skip:
74+
print(f"{file}: ", end="")
75+
76+
77+
def main(argv):
78+
parser = argparse.ArgumentParser()
79+
parser.add_argument("from_commit")
80+
parser.add_argument("to_commit")
81+
parser.add_argument(
82+
"-s", "--skip", help="skip common licenses in output", action="store_true"
83+
)
84+
parser.add_argument(
85+
"-p",
86+
"--paths",
87+
help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")',
88+
type=str,
89+
default=DEFAULT_SCAN_DIRS,
90+
)
91+
args = parser.parse_args()
92+
93+
# Commit boundaries to search for new files
94+
from_commit = args.from_commit
95+
to_commit = args.to_commit
96+
97+
# Get the list of new files from git. Try the current directory,
98+
# looping up to the root attempting to find a valid git repository.
99+
path = PurePath(os.getcwd())
100+
paths = list(path.parents)
101+
paths.insert(0, path)
102+
for dir in paths:
103+
try:
104+
repo = Repo(dir)
105+
break
106+
except:
107+
pass
108+
109+
if dir == path.parents[-1]:
110+
print(f'not a git repository (or any parent up to mount point "{dir}")')
111+
sys.exit(2)
112+
113+
# Get from/to commits
114+
fc = get_commit(repo, from_commit)
115+
tc = get_commit(repo, to_commit)
116+
117+
# Loop over new files
118+
paths = [str(dir) for dir in args.paths.split(",")]
119+
print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...')
120+
for file in fc.diff(tc, paths=paths).iter_change_type("A"):
121+
filename = file.a_path
122+
if not args.skip:
123+
print(f"checking licenses for {filename}... ", end="", flush=True)
124+
try:
125+
f = dir.joinpath(dir, filename).as_posix()
126+
lic = get_licenses_for_file(f)
127+
if len(lic["license_clues"]) > 1:
128+
print("multiple licenses detected")
129+
elif (
130+
not args.skip
131+
or lic["detected_license_expression_spdx"] not in COMMON_LICENSES
132+
):
133+
skip_print_file(args.skip, filename)
134+
print(f"{lic['detected_license_expression_spdx']}")
135+
except OSError:
136+
# Likely hit a file that was added to the repo and subsequently removed.
137+
skip_print_file(args.skip, filename)
138+
print("no longer in repo?")
139+
except KeyboardInterrupt:
140+
print("interrupted")
141+
break
142+
except Exception as e:
143+
# If scanning fails, there is little we can do but print an error.
144+
skip_print_file(args.skip, filename)
145+
print(e)
146+
147+
148+
if __name__ == "__main__":
149+
main(sys.argv)

0 commit comments

Comments
 (0)