@@ -262,6 +262,10 @@ def scancode(ctx, input, output_file, copyright, license, package,
262262 license = True
263263 package = True
264264
265+ # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s.
266+ if format in ('spdx-tv' , 'spdx-rdf' ):
267+ info = True
268+
265269 scans_cache_class = get_scans_cache_class ()
266270 try :
267271 files_count , results = scan (input_path = input ,
@@ -628,7 +632,7 @@ def save_results(files_count, scanned_files, format, input, output_file):
628632 output_file .write (unicode (json .dumps (meta , separators = (',' , ':' ), iterable_as_array = True , encoding = 'utf-8' )))
629633 output_file .write ('\n ' )
630634
631- elif format == 'spdx-tv' or format == 'spdx-rdf' :
635+ elif format in ( 'spdx-tv' , 'spdx-rdf' ) :
632636 from spdx .checksum import Algorithm
633637 from spdx .creationinfo import Tool
634638 from spdx .document import Document , License
@@ -645,10 +649,15 @@ def save_results(files_count, scanned_files, format, input, output_file):
645649 doc .package = Package (input , NoAssert ())
646650
647651 for file_data in scanned_files :
648- file_entry = File (file_data ['path' ])
649- # FIXME: should we really compue the checcksum here rather than get it from the scan?
650- file_entry .chk_sum = Algorithm ('SHA1' , file_entry .calc_chksum ())
651- for file_license in file_data ['licenses' ]:
652+ file_sha1 = file_data .get ('sha1' )
653+ if not file_sha1 :
654+ # Skip directories.
655+ continue
656+
657+ file_entry = File (file_data .get ('path' ))
658+ file_entry .chk_sum = Algorithm ('SHA1' , file_sha1 )
659+
660+ for file_license in file_data .get ('licenses' ):
652661 spdx_id = file_license .get ('spdx_license_key' )
653662 # TODO: we should create a "LicenseRef:xxx" identifier
654663 # if the license is not known to SPDX
0 commit comments