Skip to content

Commit 8031f33

Browse files
committed
scripts: west: spdx: extract copyright info
use REUSE to extract copyright text from source files and include in SBOM documents Signed-off-by: Benjamin Cabé <[email protected]>
1 parent ffc8ec8 commit 8031f33

File tree

5 files changed

+72
-5
lines changed

5 files changed

+72
-5
lines changed

doc/develop/west/zephyr-cmds.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,13 @@ Each file in the bill-of-materials is scanned, so that its hashes (SHA256 and
136136
SHA1) can be recorded, along with any detected licenses if an
137137
``SPDX-License-Identifier`` comment appears in the file.
138138

139+
Copyright notices are extracted using the third-party :command:`reuse` tool from the REUSE group.
140+
When found, these notices are added to SPDX documents as ``FileCopyrightText`` fields.
141+
142+
.. note::
143+
Copyright extraction uses heuristics that may not capture complete notice text, so
144+
``FileCopyrightText`` content is best-effort. This aligns with SPDX specification recommendations.
145+
139146
SPDX Relationships are created to indicate dependencies between
140147
CMake build targets, build targets that are linked together, and
141148
source files that are compiled to generate the built library files.

scripts/requirements-actions.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ pytest
2828
python-magic-bin; sys_platform == "win32"
2929
python-magic; sys_platform != "win32"
3030
pyyaml
31+
reuse
3132
ruff==0.11.11
3233
setuptools>=70.2.0
3334
spdx-tools

scripts/requirements-actions.txt

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ astroid==3.3.10 \
1212
--hash=sha256:104fb9cb9b27ea95e847a94c003be03a9e039334a8ebca5ee27dafaf5c5711eb \
1313
--hash=sha256:c332157953060c6deb9caa57303ae0d20b0fbdb2e59b4a4f2a6ba49d0a7961ce
1414
# via pylint
15+
attrs==25.3.0 \
16+
--hash=sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3 \
17+
--hash=sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b
18+
# via reuse
1519
awscli==1.40.38 \
1620
--hash=sha256:3f90f2815e3b72691785d2bbd4f2c3dcbde311c55e1f5633a37cc3e514ba6fdb \
1721
--hash=sha256:ec9b69ff4600ef151df3336f0e7af4cf30280d27f9d8e67e09c4b386ceb1f154
@@ -20,10 +24,16 @@ beartype==0.21.0 \
2024
--hash=sha256:b6a1bd56c72f31b0a496a36cc55df6e2f475db166ad07fa4acc7e74f4c7f34c0 \
2125
--hash=sha256:f9a5078f5ce87261c2d22851d19b050b64f6a805439e8793aecf01ce660d3244
2226
# via spdx-tools
27+
binaryornot==0.4.4 \
28+
--hash=sha256:359501dfc9d40632edc9fac890e19542db1a287bbcfa58175b66658392018061 \
29+
--hash=sha256:b8b71173c917bddcd2c16070412e369c3ed7f0528926f70cac18a6c97fd563e4
30+
# via reuse
2331
boolean-py==5.0 \
2432
--hash=sha256:60cbc4bad079753721d32649545505362c754e121570ada4658b852a3a318d95 \
2533
--hash=sha256:ef28a70bd43115208441b53a045d1549e2f0ec6e3d08a9d142cbc41c1938e8d9
26-
# via license-expression
34+
# via
35+
# license-expression
36+
# reuse
2737
botocore==1.38.39 \
2838
--hash=sha256:2305f688e9328af473a504197584112f228513e06412038d83205ce8d1456f40 \
2939
--hash=sha256:ee3aa03af1dabed4f3710cd64f6d9d488281eee720710bf1cf9f2b2fd30025ae
@@ -118,7 +128,9 @@ cffi==1.17.1 \
118128
chardet==5.2.0 \
119129
--hash=sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7 \
120130
--hash=sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970
121-
# via tox
131+
# via
132+
# binaryornot
133+
# tox
122134
charset-normalizer==3.4.2 \
123135
--hash=sha256:005fa3432484527f9732ebd315da8da8001593e2cf46a3d817669f062c3d9ed4 \
124136
--hash=sha256:046595208aae0120559a67693ecc65dd75d46f7bf687f159127046628178dc45 \
@@ -212,7 +224,9 @@ charset-normalizer==3.4.2 \
212224
--hash=sha256:fb707f3e15060adf5b7ada797624a6c6e0138e2a26baa089df64c68ee98e040f \
213225
--hash=sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a \
214226
--hash=sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f
215-
# via requests
227+
# via
228+
# python-debian
229+
# requests
216230
clang-format==20.1.6 \
217231
--hash=sha256:0ea008a20951527d35a1e2b8febdca3c47c6f8e9a1bd174601c891e20053ef2e \
218232
--hash=sha256:11530ff352c64176ba4297ad398452d9fcd442b4a8bb2a804cc7915bc94b96e1 \
@@ -236,6 +250,7 @@ click==8.1.3 \
236250
--hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48
237251
# via
238252
# gitlint-core
253+
# reuse
239254
# spdx-tools
240255
colorama==0.4.6 \
241256
--hash=sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44 \
@@ -452,6 +467,7 @@ jinja2==3.1.6 \
452467
# via
453468
# gcovr
454469
# junit2html
470+
# reuse
455471
jmespath==1.0.1 \
456472
--hash=sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980 \
457473
--hash=sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe
@@ -466,7 +482,9 @@ junitparser==3.2.0 \
466482
license-expression==30.4.1 \
467483
--hash=sha256:679646bc3261a17690494a3e1cada446e5ee342dbd87dcfa4a0c24cc5dce13ee \
468484
--hash=sha256:9f02105f9e0fcecba6a85dfbbed7d94ea1c3a70cf23ddbfb5adf3438a6f6fce0
469-
# via spdx-tools
485+
# via
486+
# reuse
487+
# spdx-tools
470488
lxml==5.4.0 \
471489
--hash=sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5 \
472490
--hash=sha256:073eb6dcdf1f587d9b88c8c93528b57eccda40209cf9be549d469b942b41d70b \
@@ -906,6 +924,10 @@ python-dateutil==2.9.0.post0 \
906924
# botocore
907925
# elasticsearch
908926
# pykwalify
927+
python-debian==1.0.1 \
928+
--hash=sha256:3ada9b83a3d671b58081782c0969cffa0102f6ce433fbbc7cf21275b8b5cc771 \
929+
--hash=sha256:8f137c230c1d9279c2ac892b35915068b2aca090c9fd3da5671ff87af32af12c
930+
# via reuse
909931
python-magic==0.4.27 ; sys_platform != 'win32' \
910932
--hash=sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b \
911933
--hash=sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3
@@ -1079,6 +1101,10 @@ requests==2.32.4 \
10791101
--hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \
10801102
--hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422
10811103
# via pygithub
1104+
reuse==5.0.2 \
1105+
--hash=sha256:7a680f00324e87a72061677a892d8cbabfddf7adcf7a5376aeeed2d78995bbbb \
1106+
--hash=sha256:878016ae5dd29c10bad4606d6676c12a268c12aa9fcfea66403598e16eed085c
1107+
# via -r requirements-actions.in
10821108
rsa==4.7.2 \
10831109
--hash=sha256:78f9a9bf4e7be0c5ded4583326e7461e3a3c5aae24073648b4bdfa797d78c9d2 \
10841110
--hash=sha256:9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9
@@ -1227,7 +1253,9 @@ tomli==2.2.1 \
12271253
tomlkit==0.13.3 \
12281254
--hash=sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1 \
12291255
--hash=sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0
1230-
# via pylint
1256+
# via
1257+
# pylint
1258+
# reuse
12311259
tox==4.27.0 \
12321260
--hash=sha256:2b8a7fb986b82aa2c830c0615082a490d134e0626dbc9189986da46a313c4f20 \
12331261
--hash=sha256:b97d5ecc0c0d5755bcc5348387fef793e1bfa68eb33746412f4c60881d7f5f57

scripts/requirements-base.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pyserial
2424
requests>=2.32.0
2525
semver
2626
tqdm>=4.67.1
27+
reuse
2728

2829
# for ram/rom reports
2930
anytree

scripts/west_commands/zspdx/scanner.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import os
77
import re
88

9+
from reuse.project import Project
910
from west import log
1011

1112
from zspdx.licenses import LICENSES
@@ -177,6 +178,32 @@ def normalizeExpression(licsConcluded):
177178
return " AND ".join(revised)
178179

179180

181+
def getCopyrightInfo(filePath):
182+
"""
183+
Scans the specified file for copyright information using REUSE tools.
184+
185+
Arguments:
186+
- filePath: path to file to scan
187+
188+
Returns: list of copyright statements if found; empty list if not found
189+
"""
190+
log.dbg(f" - getting copyright info for {filePath}")
191+
192+
try:
193+
project = Project(os.path.dirname(filePath))
194+
infos = project.reuse_info_of(filePath)
195+
copyrights = []
196+
197+
for info in infos:
198+
if info.copyright_lines:
199+
copyrights.extend(info.copyright_lines)
200+
201+
return copyrights
202+
except Exception as e:
203+
log.wrn(f"Error getting copyright info for {filePath}: {e}")
204+
return []
205+
206+
180207
def scanDocument(cfg, doc):
181208
"""
182209
Scan for licenses and calculate hashes for all Files and Packages
@@ -213,6 +240,9 @@ def scanDocument(cfg, doc):
213240
f.concludedLicense = expression
214241
f.licenseInfoInFile = splitExpression(expression)
215242

243+
if copyrights := getCopyrightInfo(f.abspath):
244+
f.copyrightText = f"<text>\n{'\n'.join(copyrights)}\n</text>"
245+
216246
# check if any custom license IDs should be flagged for document
217247
for lic in f.licenseInfoInFile:
218248
checkLicenseValid(lic, doc)

0 commit comments

Comments
 (0)