66# See https://github.com/nexB/scancode-toolkit for support or download.
77# See https://aboutcode.org for more information about nexB OSS projects.
88#
9- from os .path import abspath
10- from os .path import basename
11- from os .path import dirname
12- from os .path import isdir
139import sys
14-
10+ import uuid
1511from io import BytesIO
16- try :
17- from StringIO import StringIO
18- except ImportError :
19- from io import StringIO
12+ from io import StringIO
2013
2114from spdx .checksum import Algorithm
2215from spdx .creationinfo import Tool
16+ from spdx .document import ExtractedLicense
2317from spdx .document import Document
2418from spdx .document import License
25- from spdx .document import ExtractedLicense
2619from spdx .file import File
2720from spdx .package import Package
2821from spdx .utils import NoAssert
2922from spdx .utils import SPDXNone
3023from spdx .version import Version
3124
32- from formattedcode import FileOptionType
3325from commoncode .cliutils import OUTPUT_GROUP
3426from commoncode .cliutils import PluggableCommandLineOption
27+ from commoncode .fileutils import file_name
28+ from commoncode .text import python_safe_name
29+ from formattedcode import FileOptionType
3530from plugincode .output import output_impl
3631from plugincode .output import OutputPlugin
32+ from commoncode .fileutils import parent_directory
33+ import os
3734
3835# Tracing flags
3936TRACE = False
@@ -64,8 +61,8 @@ def logger_debug(*args):
6461
6562def _patch_license_list ():
6663 """
67- Patch the SPDX library license list to match the list of ScanCode known SPDX
68- licenses.
64+ Patch the SPDX Python library license list to match the list of ScanCode
65+ known SPDX licenses.
6966 """
7067 global _spdx_list_is_patched
7168 if not _spdx_list_is_patched :
@@ -79,8 +76,8 @@ def _patch_license_list():
7976
8077def get_licenses_by_spdx_key (licenses ):
8178 """
82- Return a mapping of {spdx_key: license object} given a sequence of License
83- objects.
79+ Return a mapping of {spdx_key: license object} given a ``license`` sequence
80+ of License objects.
8481 """
8582 spdx_licenses = {}
8683 for lic in licenses :
@@ -118,16 +115,14 @@ def is_enabled(self, spdx_tv, **kwargs):
118115 return spdx_tv
119116
120117 def process_codebase (self , codebase , spdx_tv , ** kwargs ):
121- check_sha1 (codebase )
122- files = self .get_files (codebase , ** kwargs )
123- header = codebase .get_or_create_current_header ()
124- tool_name = header .tool_name
125- tool_version = header .tool_version
126- notice = header .notice
127- input = kwargs .get ('input' , '' ) # NOQA
128-
129- write_spdx (
130- spdx_tv , files , tool_name , tool_version , notice , input , as_tagvalue = True )
118+ _process_codebase (
119+ spdx_plugin = self ,
120+ codebase = codebase ,
121+ input_path = kwargs .get ('input' , '' ),
122+ output_file = spdx_tv ,
123+ as_tagvalue = True ,
124+ ** kwargs
125+ )
131126
132127
133128@output_impl
@@ -145,16 +140,55 @@ def is_enabled(self, spdx_rdf, **kwargs):
145140 return spdx_rdf
146141
147142 def process_codebase (self , codebase , spdx_rdf , ** kwargs ):
148- check_sha1 (codebase )
149- files = self .get_files (codebase , ** kwargs )
150- header = codebase .get_or_create_current_header ()
151- tool_name = header .tool_name
152- tool_version = header .tool_version
153- notice = header .notice
154- input = kwargs .get ('input' , '' ) # NOQA
143+ _process_codebase (
144+ spdx_plugin = self ,
145+ codebase = codebase ,
146+ input_path = kwargs .get ('input' , '' ),
147+ output_file = spdx_rdf ,
148+ as_tagvalue = False ,
149+ ** kwargs
150+ )
155151
156- write_spdx (
157- spdx_rdf , files , tool_name , tool_version , notice , input , as_tagvalue = False )
152+
153+ def _process_codebase (
154+ spdx_plugin ,
155+ codebase ,
156+ input_path ,
157+ output_file ,
158+ as_tagvalue = True ,
159+ ** kwargs ,
160+ ):
161+ check_sha1 (codebase )
162+ files = spdx_plugin .get_files (codebase , ** kwargs )
163+ header = codebase .get_or_create_current_header ()
164+ tool_name = header .tool_name
165+ tool_version = header .tool_version
166+ notice = header .notice
167+ package_name = build_package_name (input_path )
168+
169+ write_spdx (
170+ output_file = output_file ,
171+ files = files ,
172+ tool_name = tool_name ,
173+ tool_version = tool_version ,
174+ notice = notice ,
175+ package_name = package_name ,
176+ as_tagvalue = as_tagvalue ,
177+ )
178+
179+
180+ def build_package_name (input_path ):
181+ """
182+ Return a package name built from an ``input_path`` path.
183+
184+ """
185+ if input_path :
186+ absinput = absinput = os .path .abspath (input_path )
187+ if os .path .isfile (absinput ):
188+ input_path = parent_directory (absinput )
189+ return python_safe_name (file_name (input_path ))
190+
191+ return 'scancode-toolkit-analyzed-package'
158192
159193
160194def check_sha1 (codebase ):
@@ -166,31 +200,45 @@ def check_sha1(codebase):
166200 'WARNING: Files are missing a SHA1 attribute. '
167201 'Incomplete SPDX document created.' ,
168202 err = True ,
169- fg = 'red' )
203+ fg = 'red' ,
204+ )
170205
171206
172- def write_spdx (output_file , files , tool_name , tool_version , notice , input_file , as_tagvalue = True ):
207+ def write_spdx (
208+ output_file ,
209+ files ,
210+ tool_name ,
211+ tool_version ,
212+ notice ,
213+ package_name = '' ,
214+ download_location = NoAssert (),
215+ as_tagvalue = True ,
216+ ):
173217 """
174- Write scan output as SPDX Tag/value or RDF.
218+ Write scan output as SPDX Tag/value to ``output_file`` file-like
219+ object using the ``files`` list of scanned file data.
220+ Write as RDF XML if ``as_tagvalue`` is False.
221+
222+ Use the ``notice`` string as a notice included in a document comment.
223+ Include the ``tool_name`` and ``tool_version`` to indicate which tool is
224+ producing this SPDX document.
225+ Use ``package_name`` as a Package name and as a namespace prefix base.
175226 """
176227 as_rdf = not as_tagvalue
177228 _patch_license_list ()
178- absinput = abspath (input_file )
179229
180- if isdir (absinput ):
181- input_path = absinput
182- else :
183- input_path = dirname (absinput )
230+ ns_prefix = '_' .join (package_name .lower ().split ())
184231
185232 doc = Document (Version (2 , 1 ), License .from_identifier ('CC0-1.0' ))
186233 doc .comment = notice
234+ doc .namespace = f'http://spdx.org/spdxdocs/{ ns_prefix } -{ uuid .uuid4 ()} '
187235 tool_name = tool_name or 'ScanCode'
188- doc .creation_info .add_creator (Tool (tool_name + ' ' + tool_version ))
236+ doc .creation_info .add_creator (Tool (f' { tool_name } { tool_version } ' ))
189237 doc .creation_info .set_created_now ()
190238
191239 package = doc .package = Package (
192- name = basename ( input_path ) ,
193- download_location = NoAssert ()
240+ name = package_name ,
241+ download_location = download_location
194242 )
195243
196244 # Use a set of unique copyrights for the package.
@@ -222,26 +270,30 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
222270
223271 spdx_id = file_license .get ('spdx_license_key' )
224272 if not spdx_id :
225- spdx_id = 'LicenseRef-scancode-' + license_key
273+ spdx_id = f 'LicenseRef-scancode-{ license_key } '
226274 is_license_ref = spdx_id .lower ().startswith ('licenseref-' )
227275
228276 if not is_license_ref :
229277 spdx_license = License .from_identifier (spdx_id )
230278 else :
231279 spdx_license = ExtractedLicense (spdx_id )
232280 spdx_license .name = file_license .get ('short_name' )
233- comment = ('See details at https://github.com/nexB/scancode-toolkit'
234- '/blob/develop/src/licensedcode/data/licenses/%s.yml\n ' % license_key )
281+ # FIXME: replace this with the licensedb URL
282+ comment = (
283+ f'See details at https://github.com/nexB/scancode-toolkit'
284+ f'/blob/develop/src/licensedcode/data/licenses/{ license_key } .yml\n '
285+ )
235286 spdx_license .comment = comment
236287 text = file_license .get ('matched_text' )
237- # always set some text, even if we did not extract the matched text
288+ # always set some text, even if we did not extract the
289+ # matched text
238290 if not text :
239291 text = comment
240292 spdx_license .text = text
241293 doc .add_extr_lic (spdx_license )
242294
243- # Add licenses in the order they appear in the file. Maintaining the order
244- # might be useful for provenance purposes.
295+ # Add licenses in the order they appear in the file. Maintaining
296+ # the order might be useful for provenance purposes.
245297 file_entry .add_lics (spdx_license )
246298 package .add_lics_from_file (spdx_license )
247299
@@ -263,8 +315,9 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
263315
264316 package .cr_text .update (file_entry .copyright )
265317
266- # Create a text of copyright statements in the order they appear in the file.
267- # Maintaining the order might be useful for provenance purposes.
318+ # Create a text of copyright statements in the order they appear in
319+ # the file. Maintaining the order might be useful for provenance
320+ # purposes.
268321 file_entry .copyright = '\n ' .join (file_entry .copyright ) + '\n '
269322
270323 elif file_copyrights is None :
@@ -285,7 +338,7 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
285338 output_file .write (msg )
286339
287340 # Remove duplicate licenses from the list for the package.
288- unique_licenses = {( l .identifier , l . full_name ) : l for l in package .licenses_from_files }
341+ unique_licenses = {l .identifier : l for l in package .licenses_from_files }
289342 unique_licenses = list (unique_licenses .values ())
290343 if not len (package .licenses_from_files ):
291344 if all_files_have_no_license :
@@ -294,7 +347,10 @@ def write_spdx(output_file, files, tool_name, tool_version, notice, input_file,
294347 package .licenses_from_files = [NoAssert ()]
295348 else :
296349 # List license identifiers alphabetically for the package.
297- package .licenses_from_files = sorted (unique_licenses , key = lambda x : x .identifier )
350+ package .licenses_from_files = sorted (
351+ unique_licenses ,
352+ key = lambda x : x .identifier ,
353+ )
298354
299355 if len (package .cr_text ) == 0 :
300356 if all_files_have_no_copyright :
0 commit comments