Skip to content

Commit 08982b7

Browse files
Show license detection issues using --todo option
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 645d5ae commit 08982b7

23 files changed

+634
-241
lines changed

scanpipe/filters.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,7 @@ class LicenseFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
857857
choices=[(EMPTY_VAR, "None")] + CodebaseResource.Compliance.choices,
858858
)
859859
is_license_clue = StrictBooleanFilter()
860+
needs_review = StrictBooleanFilter()
860861

861862
class Meta:
862863
model = DiscoveredLicense
@@ -868,6 +869,7 @@ class Meta:
868869
"license_expression_spdx",
869870
"compliance_alert",
870871
"is_license_clue",
872+
"needs_review",
871873
]
872874

873875

scanpipe/migrations/0074_discovered_license_models.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,20 @@ class Migration(migrations.Migration):
102102
help_text='True if this is not a proper license detection which should be considered in the license_expression for the parent resource/package. A license match is considered as a clue if it could be a possiblefalse positives or the matched rule is tagged as a clue explicitly.',
103103
),
104104
),
105+
(
106+
"from_package",
107+
models.BooleanField(
108+
default=False,
109+
help_text='True if this was discovered in a extracted license statement and False if this was discovered in a file.',
110+
),
111+
),
112+
(
113+
"needs_review",
114+
models.BooleanField(
115+
default=False,
116+
help_text='True if this was license detection needs to be reviewed as there might be a license detection issue.',
117+
),
118+
),
105119
(
106120
"project",
107121
models.ForeignKey(
@@ -111,6 +125,15 @@ class Migration(migrations.Migration):
111125
to="scanpipe.project",
112126
),
113127
),
128+
(
129+
"review_comments",
130+
models.JSONField(
131+
blank=True,
132+
default=list,
133+
help_text='A list of review comments for license detection issues which needs review. These descriptive comments are based on ambigous detection types and could also offers helpful suggestions on how to review/report these detection issues.',
134+
verbose_name='Review Comments',
135+
),
136+
),
114137
],
115138
options={
116139
"ordering": ["detection_count", "identifier"],
@@ -130,6 +153,18 @@ class Migration(migrations.Migration):
130153
fields=["detection_count"],
131154
name="scanpipe_di_detecti_d87ff1_idx",
132155
),
156+
models.Index(
157+
fields=['is_license_clue'],
158+
name='scanpipe_di_is_lice_f4922a_idx'
159+
),
160+
models.Index(
161+
fields=['from_package'],
162+
name='scanpipe_di_from_pa_6485b2_idx'
163+
),
164+
models.Index(
165+
fields=['needs_review'],
166+
name='scanpipe_di_needs_r_5cff82_idx'
167+
),
133168
],
134169
},
135170
bases=(

scanpipe/models.py

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4146,6 +4146,12 @@ class DiscoveredLicenseQuerySet(
41464146
ComplianceAlertQuerySetMixin,
41474147
ProjectRelatedQuerySet,
41484148
):
4149+
def needs_review(self):
4150+
return self.filter(needs_review=True)
4151+
4152+
def does_not_need_review(self):
4153+
return self.filter(needs_review=False)
4154+
41494155
def order_by_count_and_expression(self):
41504156
"""Order by detection count and license expression (identifer) fields."""
41514157
return self.order_by("-detection_count", "identifier")
@@ -4214,9 +4220,13 @@ class DiscoveredLicense(
42144220

42154221
license_expression_field = "license_expression"
42164222

4217-
# If this license was discovered in a extracted license statement
4218-
# this is True, and False if this was discovered in a file.
4219-
from_package = None
4223+
from_package = models.BooleanField(
4224+
default=False,
4225+
help_text=_(
4226+
"True if this was discovered in a extracted license statement "
4227+
"and False if this was discovered in a file."
4228+
),
4229+
)
42204230

42214231
is_license_clue = models.BooleanField(
42224232
default=False,
@@ -4246,6 +4256,26 @@ class DiscoveredLicense(
42464256
),
42474257
)
42484258

4259+
needs_review = models.BooleanField(
4260+
default=False,
4261+
help_text=_(
4262+
"True if this was license detection needs to be reviewed "
4263+
"as there might be a license detection issue."
4264+
),
4265+
)
4266+
4267+
review_comments = models.JSONField(
4268+
_("Review Comments"),
4269+
default=list,
4270+
blank=True,
4271+
help_text=_(
4272+
"A list of review comments for license detection issues which "
4273+
"needs review. These descriptive comments are based on ambigous "
4274+
"detection types and could also offers helpful suggestions on "
4275+
"how to review/report these detection issues."
4276+
),
4277+
)
4278+
42494279
objects = DiscoveredLicenseQuerySet.as_manager()
42504280

42514281
class Meta:
@@ -4255,6 +4285,9 @@ class Meta:
42554285
models.Index(fields=["license_expression"]),
42564286
models.Index(fields=["license_expression_spdx"]),
42574287
models.Index(fields=["detection_count"]),
4288+
models.Index(fields=["is_license_clue"]),
4289+
models.Index(fields=["from_package"]),
4290+
models.Index(fields=["needs_review"]),
42584291
]
42594292
constraints = [
42604293
models.UniqueConstraint(
@@ -4268,7 +4301,7 @@ def __str__(self):
42684301
return self.identifier
42694302

42704303
@classmethod
4271-
def create_from_data(cls, project, detection_data):
4304+
def create_from_data(cls, project, detection_data, from_package=False):
42724305
"""
42734306
Create and returns a DiscoveredLicense for a `project` from the
42744307
`detection_data`. If one of the values of the required fields is not
@@ -4302,7 +4335,9 @@ def create_from_data(cls, project, detection_data):
43024335
if field_name in cls.model_fields() and value not in EMPTY_VALUES
43034336
}
43044337

4305-
discovered_license = cls(project=project, **cleaned_data)
4338+
discovered_license = cls(
4339+
project=project, from_package=from_package, **cleaned_data
4340+
)
43064341
# Using save_error=False to not capture potential errors at this level but
43074342
# rather in the CodebaseResource.create_and_add_license_data method so
43084343
# resource data can be injected in the ProjectMessage record.

scanpipe/pipelines/scan_single_package.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,14 @@ def steps(cls):
6161
"info": True,
6262
"license": True,
6363
"license_text": True,
64+
"license_diagnostics": True,
65+
"license_text_diagnostics": True,
6466
"license_references": True,
6567
"package": True,
6668
"url": True,
6769
"classify": True,
6870
"summary": True,
71+
"todo": True,
6972
}
7073

7174
def get_package_input(self):

scanpipe/pipes/__init__.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ def update_or_create_license_detection(
329329
from_package=False,
330330
count_detection=True,
331331
is_license_clue=False,
332+
check_todo=False,
332333
):
333334
"""
334335
Get, update or create a DiscoveredLicense object then return it.
@@ -355,8 +356,9 @@ def update_or_create_license_detection(
355356
license_detection.update_from_data(detection_data)
356357
else:
357358
license_detection = DiscoveredLicense.create_from_data(
358-
project,
359-
detection_data,
359+
project=project,
360+
detection_data=detection_data,
361+
from_package=from_package,
360362
)
361363

362364
if not license_detection:
@@ -377,7 +379,9 @@ def update_or_create_license_detection(
377379
count_detection=count_detection,
378380
)
379381

380-
license_detection.from_package = from_package
382+
if check_todo:
383+
scancode.check_license_detection_for_issues(license_detection)
384+
381385
return license_detection
382386

383387

@@ -399,6 +403,23 @@ def _clean_license_detection_data(detection_data):
399403
return detection_data
400404

401405

406+
def update_license_detection_with_issue(project, todo_issue):
407+
detection_data = todo_issue.get("detection")
408+
if "identifier" not in detection_data:
409+
return
410+
411+
detection_identifier = detection_data.get("identifier")
412+
license_detection = project.discoveredlicenses.get_or_none(
413+
identifier=detection_identifier,
414+
)
415+
if license_detection:
416+
review_comments = todo_issue.get("review_comments").values()
417+
license_detection.update(
418+
needs_review=True,
419+
review_comments=list(review_comments),
420+
)
421+
422+
402423
def get_dependencies(project, dependency_data):
403424
"""
404425
Given a `dependency_data` mapping, get a list of DiscoveredDependency objects

scanpipe/pipes/input.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ def load_inventory_from_toolkit_scan(project, input_location):
108108
scancode.create_discovered_dependencies(
109109
project, scanned_codebase, strip_datafile_path_root=True
110110
)
111+
scancode.load_todo_issues(project, scanned_codebase)
111112

112113

113114
def load_inventory_from_scanpipe(project, scan_data, extra_data_prefix=None):

scanpipe/pipes/scancode.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,16 @@
4343
from licensedcode.detection import FileRegion
4444
from licensedcode.detection import LicenseDetectionFromResult
4545
from licensedcode.detection import LicenseMatchFromResult
46+
from licensedcode.detection import UniqueDetection
47+
from licensedcode.detection import get_ambiguous_license_detections_by_type
4648
from packagedcode import get_package_handler
4749
from packagedcode import models as packagedcode_models
4850
from scancode import Scanner
4951
from scancode import api as scancode_api
5052
from scancode import cli as scancode_cli
5153
from scancode.cli import run_scan as scancode_run_scan
54+
from summarycode.todo import ReviewComments
55+
from summarycode.todo import get_review_comments
5256

5357
from aboutcode.pipeline import LoopProgress
5458
from scanpipe import pipes
@@ -489,6 +493,7 @@ def collect_and_create_license_detections(project):
489493
project=project,
490494
detection_data=detection_data,
491495
resource_path=resource.path,
496+
check_todo=True,
492497
)
493498

494499
for clue_data in resource.license_clues:
@@ -497,6 +502,7 @@ def collect_and_create_license_detections(project):
497502
detection_data=clue_data,
498503
resource_path=resource.path,
499504
is_license_clue=True,
505+
check_todo=True,
500506
)
501507

502508
for resource in project.codebaseresources.has_package_data():
@@ -511,6 +517,7 @@ def collect_and_create_license_detections(project):
511517
detection_data=detection,
512518
resource_path=resource.path,
513519
from_package=True,
520+
check_todo=True,
514521
)
515522

516523
for detection in package_data.other_license_detections:
@@ -519,6 +526,7 @@ def collect_and_create_license_detections(project):
519526
detection_data=detection,
520527
resource_path=resource.path,
521528
from_package=True,
529+
check_todo=True,
522530
)
523531

524532

@@ -1022,6 +1030,54 @@ def create_discovered_licenses(project, scanned_codebase):
10221030
pipes.update_or_create_license_detection(project, detection_data)
10231031

10241032

1033+
def load_todo_issues(project, scanned_codebase):
1034+
if hasattr(scanned_codebase.attributes, "todo"):
1035+
for todo_issue in scanned_codebase.attributes.todo:
1036+
pipes.update_license_detection_with_issue(project, todo_issue)
1037+
1038+
license_clues = project.discoveredlicenses.filter(
1039+
is_license_clue=True,
1040+
)
1041+
license_clues.update(
1042+
needs_review=True,
1043+
review_comments=[ReviewComments.LICENSE_CLUES.value],
1044+
)
1045+
1046+
1047+
def check_license_detection_for_issues(discovered_license):
1048+
file_regions = [
1049+
FileRegion(
1050+
path=file_region.get("path"),
1051+
start_line=file_region.get("start_line"),
1052+
end_line=file_region.get("end_line"),
1053+
)
1054+
for file_region in discovered_license.file_regions
1055+
]
1056+
matches = [
1057+
LicenseMatchFromResult.from_dict(license_match)
1058+
for license_match in discovered_license.matches
1059+
]
1060+
unique_detection = UniqueDetection(
1061+
identifier=discovered_license.identifier,
1062+
license_expression=discovered_license.license_expression,
1063+
license_expression_spdx=discovered_license.license_expression_spdx,
1064+
detection_count=discovered_license.detection_count,
1065+
detection_log=discovered_license.detection_log,
1066+
matches=matches,
1067+
file_regions=file_regions,
1068+
)
1069+
detections_by_issue_type = get_ambiguous_license_detections_by_type(
1070+
unique_license_detections=[unique_detection],
1071+
)
1072+
if detections_by_issue_type:
1073+
issue_type = next(iter(detections_by_issue_type))
1074+
review_comments = get_review_comments(detection_log=[issue_type])
1075+
discovered_license.update(
1076+
needs_review=True,
1077+
review_comments=list(review_comments.values()),
1078+
)
1079+
1080+
10251081
def set_codebase_resource_for_package(codebase_resource, discovered_package):
10261082
"""
10271083
Assign the `discovered_package` to the `codebase_resource` and set its

scanpipe/templates/scanpipe/license_detection_list.html

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
<td style="min-width: 300px;" title="{{ license_detection.identifier }}">
2525
{# CAUTION: Avoid relying on get_absolute_url to prevent unnecessary query triggers #}
2626
<a href="{% url 'license_detail' project.slug license_detection.identifier %}">{{ license_detection.identifier }}</a>
27+
{% if license_detection.needs_review %}
28+
<a href="{% url 'license_detail' project.slug license_detection.identifier %}#detection">
29+
<i class="fa-solid fa-magnifying-glass fa-sm has-text-danger" title="License Detection Issues"></i>
30+
</a>
31+
{% endif %}
2732
{% if license_detection.has_compliance_alert %}
2833
<a href="{% url 'license_detail' project.slug license_detection.identifier %}#detection">
2934
<i class="fa-solid fa-scale-balanced fa-sm has-text-danger" title="License Compliance Alerts"></i>
@@ -42,6 +47,9 @@
4247
<td>
4348
{{ license_detection.is_license_clue }}
4449
</td>
50+
<td>
51+
{{ license_detection.needs_review }}
52+
</td>
4553
{% if display_compliance_alert %}
4654
<td>
4755
<a href="?compliance_alert={{ license_detection.compliance_alert }}" class="is-black-link">

scanpipe/templates/scanpipe/panels/license_detections_summary.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
<a class="panel-block is-align-items-flex-start break-word is-flex is-align-items-center" href="{{ project_licenses_url }}?is_license_clue=False" target="_blank">
1919
See all license detections
2020
<span class="tag is-rounded ml-1">{{ total_counts.all|intcomma }}</span>
21+
{% if total_counts.needs_review %}
22+
<span class="has-text-danger is-size-6 ml-2">
23+
<i class="fa-solid fa-magnifying-glass"></i>
24+
{{ total_counts.needs_review|intcomma }}
25+
</span>
26+
{% endif %}
2127
{% if total_counts.with_compliance_error %}
2228
<span class="has-text-danger is-size-6 ml-2">
2329
<i class="fa-solid fa-scale-balanced fa-sm"></i>
@@ -30,6 +36,12 @@
3036
<a class="panel-block is-align-items-flex-start break-word is-flex is-align-items-center" href="{{ project_licenses_url }}?is_license_clue=True" target="_blank">
3137
See all license clues
3238
<span class="tag is-rounded ml-1">{{ clue_counts.all|intcomma }}</span>
39+
{% if clue_counts.needs_review %}
40+
<span class="has-text-danger is-size-6 ml-2">
41+
<i class="fa-solid fa-magnifying-glass"></i>
42+
{{ clue_counts.needs_review|intcomma }}
43+
</span>
44+
{% endif %}
3345
{% if clue_counts.with_compliance_error %}
3446
<span class="has-text-danger is-size-6 ml-2">
3547
<i class="fa-solid fa-scale-balanced fa-sm"></i>

0 commit comments

Comments
 (0)