Skip to content

Commit bfb4110

Browse files
authored
Merge pull request #2351 from alpianon/alpianon-patch-documentnamespace
added DocumentNamespace tag
2 parents 5c7208a + e6c3957 commit bfb4110

File tree

12 files changed

+140
-61
lines changed

12 files changed

+140
-61
lines changed

CHANGELOG.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Outputs:
3131
the JSON output
3232
- Add new Debian machine readable copyright output.
3333
- The CSV output "Resource" column has been renamed to "path".
34+
- The SPDX output now has the mandatory DocumentNamespace attribut per SPDX specs #2344
3435

3536

3637
Copyright detection:

setup-mini.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,6 @@ python_functions=test
234234

235235
addopts =
236236
-rfExXw
237-
--strict
237+
--strict-markers
238238
--ignore setup.py
239239
--doctest-modules

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,6 @@ python_functions=test
235235

236236
addopts =
237237
-rfExXw
238-
--strict
238+
--strict-markers
239239
--ignore setup.py
240240
--doctest-modules

src/formattedcode/output_spdx.py

Lines changed: 111 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -6,34 +6,31 @@
66
# See https://github.com/nexB/scancode-toolkit for support or download.
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
9-
from os.path import abspath
10-
from os.path import basename
11-
from os.path import dirname
12-
from os.path import isdir
139
import sys
14-
10+
import uuid
1511
from io import BytesIO
16-
try:
17-
from StringIO import StringIO
18-
except ImportError:
19-
from io import StringIO
12+
from io import StringIO
2013

2114
from spdx.checksum import Algorithm
2215
from spdx.creationinfo import Tool
16+
from spdx.document import ExtractedLicense
2317
from spdx.document import Document
2418
from spdx.document import License
25-
from spdx.document import ExtractedLicense
2619
from spdx.file import File
2720
from spdx.package import Package
2821
from spdx.utils import NoAssert
2922
from spdx.utils import SPDXNone
3023
from spdx.version import Version
3124

32-
from formattedcode import FileOptionType
3325
from commoncode.cliutils import OUTPUT_GROUP
3426
from commoncode.cliutils import PluggableCommandLineOption
27+
from commoncode.fileutils import file_name
28+
from commoncode.text import python_safe_name
29+
from formattedcode import FileOptionType
3530
from plugincode.output import output_impl
3631
from plugincode.output import OutputPlugin
32+
from commoncode.fileutils import parent_directory
33+
import os
3734

3835
# Tracing flags
3936
TRACE = False
@@ -64,8 +61,8 @@ def logger_debug(*args):
6461

6562
def _patch_license_list():
6663
"""
67-
Patch the SPDX library license list to match the list of ScanCode known SPDX
68-
licenses.
64+
Patch the SPDX Python library license list to match the list of ScanCode
65+
known SPDX licenses.
6966
"""
7067
global _spdx_list_is_patched
7168
if not _spdx_list_is_patched:
@@ -79,8 +76,8 @@ def _patch_license_list():
7976

8077
def get_licenses_by_spdx_key(licenses):
8178
"""
82-
Return a mapping of {spdx_key: license object} given a sequence of License
83-
objects.
79+
Return a mapping of {spdx_key: license object} given a ``license`` sequence
80+
of License objects.
8481
"""
8582
spdx_licenses = {}
8683
for lic in licenses:
@@ -118,16 +115,14 @@ def is_enabled(self, spdx_tv, **kwargs):
118115
return spdx_tv
119116

120117
def process_codebase(self, codebase, spdx_tv, **kwargs):
121-
check_sha1(codebase)
122-
files = self.get_files(codebase, **kwargs)
123-
header = codebase.get_or_create_current_header()
124-
tool_name = header.tool_name
125-
tool_version = header.tool_version
126-
notice = header.notice
127-
input = kwargs.get('input', '') # NOQA
128-
129-
write_spdx(
130-
spdx_tv, files, tool_name, tool_version, notice, input, as_tagvalue=True)
118+
_process_codebase(
119+
spdx_plugin=self,
120+
codebase=codebase,
121+
input_path=kwargs.get('input', ''),
122+
output_file=spdx_tv,
123+
as_tagvalue=True,
124+
**kwargs
125+
)
131126

132127

133128
@output_impl
@@ -145,16 +140,55 @@ def is_enabled(self, spdx_rdf, **kwargs):
145140
return spdx_rdf
146141

147142
def process_codebase(self, codebase, spdx_rdf, **kwargs):
148-
check_sha1(codebase)
149-
files = self.get_files(codebase, **kwargs)
150-
header = codebase.get_or_create_current_header()
151-
tool_name = header.tool_name
152-
tool_version = header.tool_version
153-
notice = header.notice
154-
input = kwargs.get('input', '') # NOQA
143+
_process_codebase(
144+
spdx_plugin=self,
145+
codebase=codebase,
146+
input_path=kwargs.get('input', ''),
147+
output_file=spdx_rdf,
148+
as_tagvalue=False,
149+
**kwargs
150+
)
155151

156-
write_spdx(
157-
spdx_rdf, files, tool_name, tool_version, notice, input, as_tagvalue=False)
152+
153+
def _process_codebase(
154+
spdx_plugin,
155+
codebase,
156+
input_path,
157+
output_file,
158+
as_tagvalue=True,
159+
**kwargs,
160+
):
161+
check_sha1(codebase)
162+
files = spdx_plugin.get_files(codebase, **kwargs)
163+
header = codebase.get_or_create_current_header()
164+
tool_name = header.tool_name
165+
tool_version = header.tool_version
166+
notice = header.notice
167+
package_name = build_package_name(input_path)
168+
169+
write_spdx(
170+
output_file=output_file,
171+
files=files,
172+
tool_name=tool_name,
173+
tool_version=tool_version,
174+
notice=notice,
175+
package_name=package_name,
176+
as_tagvalue=as_tagvalue,
177+
)
178+
179+
180+
def build_package_name(input_path):
181+
"""
182+
Return a package name built from an ``input_path`` path.
183+
184+
"""
185+
if input_path:
186+
absinput = absinput = os.path.abspath(input_path)
187+
if os.path.isfile(absinput):
188+
input_path = parent_directory(absinput)
189+
return python_safe_name(file_name(input_path))
190+
191+
return 'scancode-toolkit-analyzed-package'
158192

159193

160194
def check_sha1(codebase):
@@ -166,31 +200,45 @@ def check_sha1(codebase):
166200
'WARNING: Files are missing a SHA1 attribute. '
167201
'Incomplete SPDX document created.',
168202
err=True,
169-
fg='red')
203+
fg='red',
204+
)
170205

171206

172-
def write_spdx(output_file, files, tool_name, tool_version, notice, input_file, as_tagvalue=True):
207+
def write_spdx(
208+
output_file,
209+
files,
210+
tool_name,
211+
tool_version,
212+
notice,
213+
package_name='',
214+
download_location=NoAssert(),
215+
as_tagvalue=True,
216+
):
173217
"""
174-
Write scan output as SPDX Tag/value or RDF.
218+
Write scan output as SPDX Tag/value to ``output_file`` file-like
219+
object using the ``files`` list of scanned file data.
220+
Write as RDF XML if ``as_tagvalue`` is False.
221+
222+
Use the ``notice`` string as a notice included in a document comment.
223+
Include the ``tool_name`` and ``tool_version`` to indicate which tool is
224+
producing this SPDX document.
225+
Use ``package_name`` as a Package name and as a namespace prefix base.
175226
"""
176227
as_rdf = not as_tagvalue
177228
_patch_license_list()
178-
absinput = abspath(input_file)
179229

180-
if isdir(absinput):
181-
input_path = absinput
182-
else:
183-
input_path = dirname(absinput)
230+
ns_prefix = '_'.join(package_name.lower().split())
184231

185232
doc = Document(Version(2, 1), License.from_identifier('CC0-1.0'))
186233
doc.comment = notice
234+
doc.namespace = f'http://spdx.org/spdxdocs/{ns_prefix}-{uuid.uuid4()}'
187235
tool_name = tool_name or 'ScanCode'
188-
doc.creation_info.add_creator(Tool(tool_name + ' ' + tool_version))
236+
doc.creation_info.add_creator(Tool(f'{tool_name} {tool_version}'))
189237
doc.creation_info.set_created_now()
190238

191239
package = doc.package = Package(
192-
name=basename(input_path),
193-
download_location=NoAssert()
240+
name=package_name,
241+
download_location=download_location
194242
)
195243

196244
# Use a set of unique copyrights for the package.
@@ -222,26 +270,30 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
222270

223271
spdx_id = file_license.get('spdx_license_key')
224272
if not spdx_id:
225-
spdx_id = 'LicenseRef-scancode-' + license_key
273+
spdx_id = f'LicenseRef-scancode-{license_key}'
226274
is_license_ref = spdx_id.lower().startswith('licenseref-')
227275

228276
if not is_license_ref:
229277
spdx_license = License.from_identifier(spdx_id)
230278
else:
231279
spdx_license = ExtractedLicense(spdx_id)
232280
spdx_license.name = file_license.get('short_name')
233-
comment = ('See details at https://github.com/nexB/scancode-toolkit'
234-
'/blob/develop/src/licensedcode/data/licenses/%s.yml\n' % license_key)
281+
# FIXME: replace this with the licensedb URL
282+
comment = (
283+
f'See details at https://github.com/nexB/scancode-toolkit'
284+
f'/blob/develop/src/licensedcode/data/licenses/{license_key}.yml\n'
285+
)
235286
spdx_license.comment = comment
236287
text = file_license.get('matched_text')
237-
# always set some text, even if we did not extract the matched text
288+
# always set some text, even if we did not extract the
289+
# matched text
238290
if not text:
239291
text = comment
240292
spdx_license.text = text
241293
doc.add_extr_lic(spdx_license)
242294

243-
# Add licenses in the order they appear in the file. Maintaining the order
244-
# might be useful for provenance purposes.
295+
# Add licenses in the order they appear in the file. Maintaining
296+
# the order might be useful for provenance purposes.
245297
file_entry.add_lics(spdx_license)
246298
package.add_lics_from_file(spdx_license)
247299

@@ -263,8 +315,9 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
263315

264316
package.cr_text.update(file_entry.copyright)
265317

266-
# Create a text of copyright statements in the order they appear in the file.
267-
# Maintaining the order might be useful for provenance purposes.
318+
# Create a text of copyright statements in the order they appear in
319+
# the file. Maintaining the order might be useful for provenance
320+
# purposes.
268321
file_entry.copyright = '\n'.join(file_entry.copyright) + '\n'
269322

270323
elif file_copyrights is None:
@@ -285,7 +338,7 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
285338
output_file.write(msg)
286339

287340
# Remove duplicate licenses from the list for the package.
288-
unique_licenses = {(l.identifier, l.full_name): l for l in package.licenses_from_files}
341+
unique_licenses = {l.identifier: l for l in package.licenses_from_files}
289342
unique_licenses = list(unique_licenses.values())
290343
if not len(package.licenses_from_files):
291344
if all_files_have_no_license:
@@ -294,7 +347,10 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
294347
package.licenses_from_files = [NoAssert()]
295348
else:
296349
# List license identifiers alphabetically for the package.
297-
package.licenses_from_files = sorted(unique_licenses, key=lambda x: x.identifier)
350+
package.licenses_from_files = sorted(
351+
unique_licenses,
352+
key=lambda x: x.identifier,
353+
)
298354

299355
if len(package.cr_text) == 0:
300356
if all_files_have_no_copyright:

tests/formattedcode/data/spdx/license_known/expected.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/scan-ab8a6f7e-3a9a-466f-964f-be56571a3c19
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

tests/formattedcode/data/spdx/license_known/expected_with_text.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/scan-8f1122d0-e528-466c-a22e-04bc0b174984
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

tests/formattedcode/data/spdx/license_ref/expected.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/scan-25498bb4-1cfc-461c-a7cf-389008e49b5a
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

tests/formattedcode/data/spdx/license_ref/expected_with_text.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/scan-ff644ef5-7b50-4793-ac2e-69cb1a0bece3
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

tests/formattedcode/data/spdx/simple/expected.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/simple
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

tests/formattedcode/data/spdx/tree/expected.tv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SPDXVersion: SPDX-2.1
33
DataLicense: CC0-1.0
44
SPDXID: SPDXRef-DOCUMENT
5+
DocumentNamespace: http://spdx.org/spdxdocs/scan-3ef222be-8dfb-4d4f-b12c-a42e51156151
56
DocumentComment: <text>Generated with ScanCode and provided on an "AS IS" BASIS, WITHOUT WARRANTIES
67
OR CONDITIONS OF ANY KIND, either express or implied. No content created from
78
ScanCode should be considered or used as legal advice. Consult an Attorney

0 commit comments

Comments
 (0)