@@ -312,6 +312,10 @@ def scancode(ctx, input, output_file, copyright, license, package,
312312 license = True
313313 package = True
314314
315+ # A hack to force info being exposed for SPDX output in order to reuse calculated file SHA1s.
316+ if format in ('spdx-tv' , 'spdx-rdf' ):
317+ info = True
318+
315319 scans_cache_class = get_scans_cache_class ()
316320 try :
317321 files_count , results = scan (input_path = input ,
@@ -678,7 +682,7 @@ def save_results(files_count, scanned_files, format, input, output_file):
678682 output_file .write (unicode (json .dumps (meta , separators = (',' , ':' ), iterable_as_array = True , encoding = 'utf-8' )))
679683 output_file .write ('\n ' )
680684
681- elif format == 'spdx-tv' or format == 'spdx-rdf' :
685+ elif format in ( 'spdx-tv' , 'spdx-rdf' ) :
682686 from spdx .checksum import Algorithm
683687 from spdx .creationinfo import Tool
684688 from spdx .document import Document , License
@@ -695,10 +699,15 @@ def save_results(files_count, scanned_files, format, input, output_file):
695699 doc .package = Package (input , NoAssert ())
696700
697701 for file_data in scanned_files :
698- file_entry = File (file_data ['path' ])
699- # FIXME: should we really compute the checksum here rather than getting it from the scan?
700- file_entry .chk_sum = Algorithm ('SHA1' , file_entry .calc_chksum ())
701- for file_license in file_data ['licenses' ]:
702+ file_sha1 = file_data .get ('sha1' )
703+ if not file_sha1 :
704+ # Skip directories.
705+ continue
706+
707+ file_entry = File (file_data .get ('path' ))
708+ file_entry .chk_sum = Algorithm ('SHA1' , file_sha1 )
709+
710+ for file_license in file_data .get ('licenses' ):
702711 spdx_id = file_license .get ('spdx_license_key' )
703712 # TODO: we should create a "LicenseRef:xxx" identifier
704713 # if the license is not known to SPDX
0 commit comments