|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# Copyright (C) 2025 Free Software Foundation, Inc. |
| 4 | +# |
| 5 | +# This file is part of GDB. |
| 6 | +# |
| 7 | +# This program is free software; you can redistribute it and/or modify |
| 8 | +# it under the terms of the GNU General Public License as published by |
| 9 | +# the Free Software Foundation; either version 3 of the License, or |
| 10 | +# (at your option) any later version. |
| 11 | +# |
| 12 | +# This program is distributed in the hope that it will be useful, |
| 13 | +# but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | +# GNU General Public License for more details. |
| 16 | +# |
| 17 | +# You should have received a copy of the GNU General Public License |
| 18 | +# along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 19 | + |
| 20 | +# This program requires the python modules GitPython (git) and scancode-toolkit. |
| 21 | +# It builds a list of all the newly added files to the repository and scans |
| 22 | +# each file for a license, printing it to the terminal. If "--skip" is used, |
| 23 | +# it will only output non-"common" licenses, e.g., omitting "GPL-3.0-or-later". |
| 24 | +# This makes it a little bit easier to detect any possible new licenses. |
| 25 | +# |
| 26 | +# Example: |
| 27 | +# bash$ cd /path/to/binutils-gdb/gdb |
| 28 | +# bash$ ./contrib/license-check-new-files.sh -s gdb-15-branchpoint gdb-16-branchpoint |
| 29 | +# Scanning directories gdb*/... |
| 30 | +# gdb/contrib/common-misspellings.txt: no longer in repo? |
| 31 | +# gdb/contrib/spellcheck.sh: no longer in repo? |
| 32 | +# gdbsupport/unordered_dense.h: MIT |
| 33 | + |
| 34 | +import os |
| 35 | +import sys |
| 36 | +import argparse |
| 37 | +from pathlib import PurePath |
| 38 | +from git import Repo |
| 39 | +from scancode import api |
| 40 | + |
| 41 | +# A list of "common" licenses. If "--skip" is used, any file |
| 42 | +# with a license in this list will be omitted from the output. |
| 43 | +COMMON_LICENSES = ["GPL-2.0-or-later", "GPL-3.0-or-later"] |
| 44 | + |
| 45 | +# Default list of directories to scan. Default scans are limited to |
| 46 | +# gdb-specific git directories because much of the rest of binutils-gdb |
| 47 | +# is actually owned by other projects/packages. |
| 48 | +DEFAULT_SCAN_DIRS = "gdb*" |
| 49 | + |
| 50 | + |
| 51 | +# Get the commit object associated with the string commit CSTR |
| 52 | +# from the git repository REPO. |
| 53 | +# |
| 54 | +# Returns the object or prints an error and exits. |
| 55 | +def get_commit(repo, cstr): |
| 56 | + try: |
| 57 | + return repo.commit(cstr) |
| 58 | + except: |
| 59 | + print(f'unknown commit "{cstr}"') |
| 60 | + sys.exit(2) |
| 61 | + |
| 62 | + |
| 63 | +# Uses scancode-toolkit package to scan FILE's licenses. |
| 64 | +# Returns the full license dict from scancode on success or |
| 65 | +# propagates any exceptions. |
| 66 | +def get_licenses_for_file(file): |
| 67 | + return api.get_licenses(file) |
| 68 | + |
| 69 | + |
| 70 | +# Helper function to print FILE to the terminal if skipping |
| 71 | +# common licenses. |
| 72 | +def skip_print_file(skip, file): |
| 73 | + if skip: |
| 74 | + print(f"{file}: ", end="") |
| 75 | + |
| 76 | + |
| 77 | +def main(argv): |
| 78 | + parser = argparse.ArgumentParser() |
| 79 | + parser.add_argument("from_commit") |
| 80 | + parser.add_argument("to_commit") |
| 81 | + parser.add_argument( |
| 82 | + "-s", "--skip", help="skip common licenses in output", action="store_true" |
| 83 | + ) |
| 84 | + parser.add_argument( |
| 85 | + "-p", |
| 86 | + "--paths", |
| 87 | + help=f'paths to scan (default is "{DEFAULT_SCAN_DIRS}")', |
| 88 | + type=str, |
| 89 | + default=DEFAULT_SCAN_DIRS, |
| 90 | + ) |
| 91 | + args = parser.parse_args() |
| 92 | + |
| 93 | + # Commit boundaries to search for new files |
| 94 | + from_commit = args.from_commit |
| 95 | + to_commit = args.to_commit |
| 96 | + |
| 97 | + # Get the list of new files from git. Try the current directory, |
| 98 | + # looping up to the root attempting to find a valid git repository. |
| 99 | + path = PurePath(os.getcwd()) |
| 100 | + paths = list(path.parents) |
| 101 | + paths.insert(0, path) |
| 102 | + for dir in paths: |
| 103 | + try: |
| 104 | + repo = Repo(dir) |
| 105 | + break |
| 106 | + except: |
| 107 | + pass |
| 108 | + |
| 109 | + if dir == path.parents[-1]: |
| 110 | + print(f'not a git repository (or any parent up to mount point "{dir}")') |
| 111 | + sys.exit(2) |
| 112 | + |
| 113 | + # Get from/to commits |
| 114 | + fc = get_commit(repo, from_commit) |
| 115 | + tc = get_commit(repo, to_commit) |
| 116 | + |
| 117 | + # Loop over new files |
| 118 | + paths = [str(dir) for dir in args.paths.split(",")] |
| 119 | + print(f'Scanning directories {",".join(f"{s}/" for s in paths)}...') |
| 120 | + for file in fc.diff(tc, paths=paths).iter_change_type("A"): |
| 121 | + filename = file.a_path |
| 122 | + if not args.skip: |
| 123 | + print(f"checking licenses for {filename}... ", end="", flush=True) |
| 124 | + try: |
| 125 | + f = dir.joinpath(dir, filename).as_posix() |
| 126 | + lic = get_licenses_for_file(f) |
| 127 | + if len(lic["license_clues"]) > 1: |
| 128 | + print("multiple licenses detected") |
| 129 | + elif ( |
| 130 | + not args.skip |
| 131 | + or lic["detected_license_expression_spdx"] not in COMMON_LICENSES |
| 132 | + ): |
| 133 | + skip_print_file(args.skip, filename) |
| 134 | + print(f"{lic['detected_license_expression_spdx']}") |
| 135 | + except OSError: |
| 136 | + # Likely hit a file that was added to the repo and subsequently removed. |
| 137 | + skip_print_file(args.skip, filename) |
| 138 | + print("no longer in repo?") |
| 139 | + except KeyboardInterrupt: |
| 140 | + print("interrupted") |
| 141 | + break |
| 142 | + except Exception as e: |
| 143 | + # If scanning fails, there is little we can do but print an error. |
| 144 | + skip_print_file(args.skip, filename) |
| 145 | + print(e) |
| 146 | + |
| 147 | + |
| 148 | +if __name__ == "__main__": |
| 149 | + main(sys.argv) |
0 commit comments