Skip to content

Commit c044d08

Browse files
authored
Merge pull request #937 from nexB/919-load-codebase-from-scan
Add the ability to load a codebase from a scan to ScanCode #919
2 parents 7054c23 + cad3882 commit c044d08

File tree

8 files changed

+669
-73
lines changed

8 files changed

+669
-73
lines changed

src/scancode/cli.py

Lines changed: 64 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@
7979
from scancode.interrupt import interruptible
8080
from scancode.resource import Codebase
8181
from scancode.resource import Resource
82+
from scancode.resource import VirtualCodebase
8283
from scancode.utils import BaseCommand
8384
from scancode.utils import path_progress_message
8485
from scancode.utils import progressmanager
@@ -282,10 +283,15 @@ def print_plugins(ctx, param, value):
282283
@click.option('--verbose',
283284
is_flag=True,
284285
conflicts=['quiet'],
285-
help='Print progress as file-by-file path instead of a progress bar. '
286+
help='Print progress as file-by-file path instead of a progress bar. '
286287
'Print a verbose scan summary.',
287288
help_group=CORE_GROUP, sort_order=20, cls=CommandLineOption)
288289

290+
@click.option('--from-json',
291+
is_flag=True,
292+
help='Load codebase from an existing JSON scan',
293+
help_group=CORE_GROUP, sort_order=25, cls=CommandLineOption)
294+
289295
@click.option('--cache-dir',
290296
type=click.Path(
291297
exists=True, file_okay=False, dir_okay=True,
@@ -374,6 +380,7 @@ def scancode(ctx, input, # NOQA
374380
strip_root, full_root,
375381
processes, timeout,
376382
quiet, verbose,
383+
from_json,
377384
cache_dir, temp_dir,
378385
timing,
379386
max_in_memory,
@@ -463,6 +470,7 @@ def scancode(ctx, input, # NOQA
463470
timeout=timeout,
464471
quiet=quiet,
465472
verbose=verbose,
473+
from_json=from_json,
466474
cache_dir=cache_dir,
467475
temp_dir=temp_dir,
468476
timing=timing,
@@ -522,10 +530,15 @@ def scancode(ctx, input, # NOQA
522530
output_filter_plugins = enabled_plugins[output_filter.stage]
523531
output_plugins = enabled_plugins[output.stage]
524532

525-
if not scanner_plugins:
526-
msg = ('Missing scan option(s): at least one scan '
527-
'option is required.')
528-
raise click.UsageError(msg)
533+
if from_json:
534+
if scanner_plugins:
535+
msg = ('Data loaded from JSON: no scan options can be selected.')
536+
raise click.UsageError(msg)
537+
else:
538+
if not scanner_plugins:
539+
msg = ('Missing scan option(s): at least one scan '
540+
'option is required.')
541+
raise click.UsageError(msg)
529542

530543
if not output_plugins:
531544
msg = ('Missing output option(s): at least one output '
@@ -565,7 +578,7 @@ def scancode(ctx, input, # NOQA
565578
setup_timings['setup'] = time() - plugins_setup_start
566579

567580
########################################################################
568-
# 2.5. Create a new Resource subclass for this scan
581+
# 2.5. Collect attributes requested for this scan
569582
########################################################################
570583
# Craft a new Resource class with the attributes contributed by plugins
571584
sortable_attributes = []
@@ -602,9 +615,6 @@ def scancode(ctx, input, # NOQA
602615
for a in attributes.items():
603616
logger_debug(a)
604617

605-
resource_class = attr.make_class(
606-
name=b'ScannedResource', attrs=attributes, bases=(Resource,))
607-
608618
########################################################################
609619
# 3. collect codebase inventory
610620
########################################################################
@@ -614,23 +624,39 @@ def scancode(ctx, input, # NOQA
614624
if not quiet:
615625
echo_stderr('Collect file inventory...', fg='green')
616626

617-
# TODO: add progress indicator
618-
# note: inventory timing collection is built in Codebase initialization
619-
# TODO: this should also collect the basic size/dates
620-
try:
621-
codebase = Codebase(
622-
location=input,
623-
resource_class=resource_class,
624-
full_root=full_root,
625-
strip_root=strip_root,
626-
temp_dir=temp_dir,
627-
max_in_memory=max_in_memory
628-
)
629-
except:
630-
msg = 'ERROR: failed to collect codebase at: %(input)r' % locals()
631-
echo_stderr(msg, fg='red')
632-
echo_stderr(traceback.format_exc())
633-
ctx.exit(2)
627+
if not from_json:
628+
resource_class = attr.make_class(name=b'ScannedResource',
629+
attrs=attributes, bases=(Resource,))
630+
# TODO: add progress indicator
631+
# note: inventory timing collection is built in Codebase initialization
632+
# TODO: this should also collect the basic size/dates
633+
try:
634+
codebase = Codebase(
635+
location=input,
636+
resource_class=resource_class,
637+
full_root=full_root,
638+
strip_root=strip_root,
639+
temp_dir=temp_dir,
640+
max_in_memory=max_in_memory
641+
)
642+
except:
643+
msg = 'ERROR: failed to collect codebase at: %(input)r' % locals()
644+
echo_stderr(msg, fg='red')
645+
echo_stderr(traceback.format_exc())
646+
ctx.exit(2)
647+
else:
648+
try:
649+
codebase = VirtualCodebase(
650+
json_scan_location=input,
651+
plugin_attributes=attributes,
652+
temp_dir=temp_dir,
653+
max_in_memory=max_in_memory
654+
)
655+
except:
656+
msg = 'ERROR: failed to load codebase from scan: %(input)r' % locals()
657+
echo_stderr(msg, fg='red')
658+
echo_stderr(traceback.format_exc())
659+
ctx.exit(2)
634660

635661
# TODO: this is weird: may be the timings should NOt be stored on the
636662
# codebase, since they exist in abstract of it??
@@ -652,9 +678,9 @@ def scancode(ctx, input, # NOQA
652678
pre_scan_plugins.values(), scanner_plugins)
653679

654680
success = success and run_scanners(early_scan_plugins , codebase,
655-
processes, timeout, timing,
656-
quiet, verbose,
657-
stage='pre-scan-scan', kwargs=kwargs)
681+
processes, timeout, timing,
682+
quiet, verbose,
683+
stage='pre-scan-scan', kwargs=kwargs)
658684

659685
########################################################################
660686
# 5. run prescans
@@ -676,9 +702,9 @@ def scancode(ctx, input, # NOQA
676702
if p not in early_scan_plugins]
677703

678704
success = success and run_scanners(scan_plugins, codebase,
679-
processes, timeout, timing,
680-
quiet, verbose,
681-
stage='scan', kwargs=kwargs)
705+
processes, timeout, timing,
706+
quiet, verbose,
707+
stage='scan', kwargs=kwargs)
682708

683709
########################################################################
684710
# 7. run postscans
@@ -1018,7 +1044,7 @@ def display_summary(codebase, scan_names, processes, verbose):
10181044
initial_size_count = codebase.summary.get('initial:size_count', 0)
10191045
if initial_size_count:
10201046
initial_size_count = format_size(initial_size_count)
1021-
initial_size_count = 'for %(initial:size_count)s' % locals()
1047+
initial_size_count = 'for %(initial_size_count)s' % locals()
10221048
else:
10231049
initial_size_count = ''
10241050

@@ -1045,7 +1071,11 @@ def display_summary(codebase, scan_names, processes, verbose):
10451071
scan_time = codebase.timings.get('scan', 0.)
10461072

10471073
scan_files_count = codebase.summary.get('scan:files_count', 0)
1048-
scan_file_speed = round(float(scan_files_count) / scan_time , 2)
1074+
1075+
if scan_time > 0:
1076+
scan_file_speed = round(float(scan_files_count) / scan_time , 2)
1077+
else:
1078+
scan_file_speed = 0
10491079

10501080
scan_size_count = codebase.summary.get('scan:size_count', 0)
10511081

0 commit comments

Comments
 (0)