Skip to content

Commit 69b149e

Browse files
Biscgittiborsimko
authored andcommitted
ci(check-licenses): add license value checker
1 parent 29183a0 commit 69b149e

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

.github/workflows/ci.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,3 +118,19 @@ jobs:
118118
pip install --upgrade pip
119119
pip install isort
120120
./run-tests.sh --check-isort
121+
122+
check-licenses:
123+
runs-on: ubuntu-20.04
124+
steps:
125+
- name: Checkout
126+
uses: actions/checkout@v2
127+
128+
- name: Setup Python
129+
uses: actions/setup-python@v2
130+
with:
131+
python-version: 3.9
132+
133+
- name: Check licenses
134+
run: |
135+
pip install --upgrade pip
136+
./run-tests.sh --check-licenses

run-tests.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,13 +99,18 @@ check_isort () {
9999
isort -rc -c -df --profile black -- **/*.py
100100
}
101101

102+
check_licenses () {
103+
scripts/check_licenses.py
104+
}
105+
102106
check_all () {
103107
check_script
104108
check_fixtures
105109
check_pycodestyle
106110
check_black
107111
check_pydocstyle
108112
check_isort
113+
check_licenses
109114
}
110115

111116
if [ $# -eq 0 ]; then
@@ -121,6 +126,7 @@ do
121126
--check-pycodestyle) check_pycodestyle;;
122127
--check-pydocstyle) check_pydocstyle;;
123128
--check-isort) check_isort;;
129+
--check-licenses) check_licenses;;
124130
*)
125131
esac
126132
done

scripts/check_licenses.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/usr/bin/env python
2+
3+
"""Check if license fields are valid in all records."""
4+
5+
import asyncio
6+
import json
7+
import logging
8+
import os
9+
import pathlib
10+
import time
11+
12+
VALID_LICENSE_IDENTIFIERS = [
13+
"CC0-1.0",
14+
"GPL-3.0-only",
15+
"MIT",
16+
"Apache-2.0",
17+
"BSD-3-Clause",
18+
]
19+
20+
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
21+
22+
23+
async def validate_file(path: pathlib.Path) -> int:
24+
"""Validate a single file."""
25+
checks = 0
26+
errors = 0
27+
records = await asyncio.get_event_loop().run_in_executor(
28+
None, lambda p: json.loads(open(p, "rb").read()), path
29+
)
30+
31+
for record in records:
32+
if rec_licenses := record.get("license"):
33+
try:
34+
attr = rec_licenses["attribution"]
35+
except KeyError:
36+
recid = record.get("recid", "UNSET")
37+
message = f"License field set but without attribution in file {path.name} with recid {recid}!"
38+
39+
logging.error(message)
40+
errors += 1
41+
continue
42+
43+
if attr not in VALID_LICENSE_IDENTIFIERS:
44+
recid = record.get("recid", "UNSET")
45+
message = f"Invalid license identifier `{attr}` in file {path.name} for recid {recid}! "
46+
47+
logging.error(message)
48+
errors += 1
49+
else:
50+
checks += 1
51+
52+
if errors:
53+
raise ValueError(errors)
54+
55+
logging.info(f"Successfully validated file {path.name}")
56+
return checks
57+
58+
59+
async def check_all_paths():
60+
"""Execute checks on all found files."""
61+
start_time = time.perf_counter()
62+
63+
loop = asyncio.get_event_loop()
64+
65+
root_path = pathlib.Path(os.getcwd()) / "data" / "records"
66+
all_paths = list(root_path.glob("*.json"))
67+
68+
tasks = [loop.create_task(validate_file(file_path)) for file_path in all_paths]
69+
results = await asyncio.gather(*tasks, return_exceptions=True)
70+
71+
finish_time = time.perf_counter() - start_time
72+
logging.info(f"Processed {len(all_paths)} files within {finish_time:.2f} seconds.")
73+
74+
if any(isinstance(result, Exception) for result in results):
75+
errors = sum(
76+
[
77+
int(str(result)) if str(result).isdigit() else 1
78+
for result in results
79+
if isinstance(result, Exception)
80+
]
81+
)
82+
logging.error(
83+
f"Validation completed with {errors} errors!\n"
84+
f"\tPlease ensure the licenses are one of the following: {VALID_LICENSE_IDENTIFIERS}.\n"
85+
f"\tIf you are using a valid SPDX license string that is not in the above list, "
86+
f"please contact `[email protected]`."
87+
)
88+
exit(1)
89+
90+
else:
91+
logging.info(f"Successfully validated {sum(results)} records. No errors found.")
92+
93+
94+
def main():
95+
"""Test to validate all license fields."""
96+
loop = asyncio.new_event_loop()
97+
try:
98+
loop.run_until_complete(check_all_paths())
99+
finally:
100+
loop.close()
101+
102+
103+
if __name__ == "__main__":
104+
main()

0 commit comments

Comments
 (0)