Skip to content

Commit b32166e

Browse files
authored
feat: add option to infer_download_urls on product importers (#444)
Signed-off-by: tdruez <[email protected]>
1 parent 0a0bb1f commit b32166e

File tree

13 files changed

+233
-42
lines changed

13 files changed

+233
-42
lines changed

component_catalog/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1719,6 +1719,10 @@ def has_package_url(self):
17191719
"""Return objects with Package URL defined."""
17201720
return self.filter(~models.Q(type="") & ~models.Q(name=""))
17211721

1722+
def has_download_url(self):
1723+
"""Return objects with download URL defined."""
1724+
return self.filter(~models.Q(download_url=""))
1725+
17221726
def annotate_sortable_identifier(self):
17231727
"""
17241728
Annotate the QuerySet with a `sortable_identifier` value that combines
@@ -2036,9 +2040,14 @@ def package_url_filename(self):
20362040

20372041
@property
20382042
def inferred_repo_url(self):
2039-
"""Return the URL deduced from the information available in a Package URL (purl)."""
2043+
"""Return the repo URL deduced from the Package URL (purl)."""
20402044
return purl2url.get_repo_url(self.package_url)
20412045

2046+
def infer_download_url(self):
2047+
"""Infer the download URL deduced from the Package URL (purl)."""
2048+
if self.package_url:
2049+
return download.infer_download_url(self.package_url)
2050+
20422051
def get_url(self, name, params=None, include_identifier=False):
20432052
if not params:
20442053
params = [self.dataspace.name, quote_plus(str(self.uuid))]

component_catalog/tests/test_models.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2576,6 +2576,15 @@ def test_package_model_inferred_repo_url_property(self):
25762576
expected = "https://github.com/package-url/packageurl-python/tree/v0.10.4"
25772577
self.assertEqual(expected, package1.inferred_repo_url)
25782578

2579+
def test_package_model_infer_download_url(self):
2580+
package1 = make_package(self.dataspace, filename="package")
2581+
self.assertIsNone(package1.infer_download_url())
2582+
2583+
package1.set_package_url("pkg:nuget/[email protected]")
2584+
package1.save()
2585+
expected_download_url = "https://www.nuget.org/api/v2/package/Azure.Core/1.45.0"
2586+
self.assertEqual(expected_download_url, package1.infer_download_url())
2587+
25792588
@mock.patch("dejacode_toolkit.purldb.PurlDB.find_packages")
25802589
def test_package_model_get_purldb_entries(self, mock_find_packages):
25812590
purl1 = "pkg:pypi/[email protected]"
@@ -2758,6 +2767,12 @@ def test_package_queryset_has_package_url(self):
27582767
qs = Package.objects.has_package_url()
27592768
self.assertQuerySetEqual(qs, [package1])
27602769

2770+
def test_package_queryset_has_download_url(self):
2771+
package1 = make_package(self.dataspace, download_url="https://download.url")
2772+
make_package(self.dataspace)
2773+
qs = Package.objects.has_download_url()
2774+
self.assertQuerySetEqual(qs, [package1])
2775+
27612776
def test_package_queryset_annotate_sortable_identifier(self):
27622777
package1 = make_package(self.dataspace, package_url="pkg:pypi/[email protected]")
27632778
package2 = make_package(self.dataspace)

product_portfolio/api.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,12 @@ class LoadSBOMsFormSerializer(serializers.Serializer):
227227
required=True,
228228
help_text=LoadSBOMsForm.base_fields["input_file"].label,
229229
)
230+
infer_download_urls = serializers.BooleanField(
231+
required=False,
232+
initial=True,
233+
default=True,
234+
help_text=LoadSBOMsForm.base_fields["infer_download_urls"].help_text,
235+
)
230236
update_existing_packages = serializers.BooleanField(
231237
required=False,
232238
default=False,
@@ -246,6 +252,12 @@ class ImportManifestsFormSerializer(serializers.Serializer):
246252
required=True,
247253
help_text=ImportManifestsForm.base_fields["input_file"].label,
248254
)
255+
infer_download_urls = serializers.BooleanField(
256+
required=False,
257+
initial=True,
258+
default=True,
259+
help_text=ImportManifestsForm.base_fields["infer_download_urls"].help_text,
260+
)
249261
update_existing_packages = serializers.BooleanField(
250262
required=False,
251263
default=False,

product_portfolio/forms.py

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,6 +641,15 @@ class BaseProductImportFormView(forms.Form):
641641
"for all of the packages assigned to your product."
642642
),
643643
)
644+
infer_download_urls = forms.BooleanField(
645+
label=_("Infer missing download URLs"),
646+
required=False,
647+
initial=True,
648+
help_text=_(
649+
"When a download URL is missing from the input data, attempt to infer it "
650+
"from the Package URL (purl). A download URL is required for package scanning."
651+
),
652+
)
644653

645654
@property
646655
def helper(self):
@@ -652,6 +661,7 @@ def helper(self):
652661
Fieldset(
653662
None,
654663
"input_file",
664+
"infer_download_urls",
655665
"update_existing_packages",
656666
"scan_all_packages",
657667
StrictSubmit("submit", _("Import"), css_class="btn-success col-2"),
@@ -667,6 +677,7 @@ def submit(self, product, user):
667677
input_file=self.cleaned_data.get("input_file"),
668678
update_existing_packages=self.cleaned_data.get("update_existing_packages"),
669679
scan_all_packages=self.cleaned_data.get("scan_all_packages"),
680+
infer_download_urls=self.cleaned_data.get("infer_download_urls"),
670681
created_by=user,
671682
)
672683

@@ -716,7 +727,7 @@ class LoadSBOMsForm(BaseProductImportFormView):
716727

717728
class ImportManifestsForm(BaseProductImportFormView):
718729
project_type = ScanCodeProject.ProjectType.IMPORT_FROM_MANIFEST
719-
pipeline_name = "resolve_dependencies"
730+
pipeline_name = "resolve_dependencies:StaticResolver,DynamicResolver"
720731

721732
input_file = SmartFileField(
722733
label=_("Manifest file or zip archive"),
@@ -1005,3 +1016,25 @@ def submit(self, product, user):
10051016
scancodeproject_uuid=scancode_project.uuid,
10061017
)
10071018
)
1019+
1020+
1021+
class ScanAllPackagesForm(forms.Form):
1022+
infer_download_urls = forms.BooleanField(
1023+
label=_("Infer missing download URLs"),
1024+
required=False,
1025+
initial=True,
1026+
help_text=_(
1027+
"When a download URL is missing for packages, attempt to infer it "
1028+
"from the Package URL (purl). "
1029+
"A download URL is required for package scanning."
1030+
),
1031+
)
1032+
1033+
@property
1034+
def helper(self):
1035+
helper = FormHelper()
1036+
helper.form_method = "post"
1037+
helper.form_id = "scan-all-packages-form"
1038+
helper.attrs = {"autocomplete": "off"}
1039+
helper.layout = Layout("infer_download_urls")
1040+
return helper

product_portfolio/importers.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from component_catalog.models import PACKAGE_URL_FIELDS
2727
from component_catalog.models import Component
2828
from component_catalog.models import Package
29+
from dejacode_toolkit import download
2930
from dejacode_toolkit.scancodeio import ScanCodeIO
3031
from dje.copier import copy_object
3132
from dje.importers import BaseImporter
@@ -649,7 +650,15 @@ class ImportPackageFromScanCodeIO:
649650
"filename",
650651
]
651652

652-
def __init__(self, user, project_uuid, product, update_existing=False, scan_all_packages=False):
653+
def __init__(
654+
self,
655+
user,
656+
project_uuid,
657+
product,
658+
update_existing=False,
659+
scan_all_packages=False,
660+
infer_download_urls=False,
661+
):
653662
self.licensing = Licensing()
654663
self.created = defaultdict(list)
655664
self.existing = defaultdict(list)
@@ -662,12 +671,13 @@ def __init__(self, user, project_uuid, product, update_existing=False, scan_all_
662671
self.product = product
663672
self.update_existing = update_existing
664673
self.scan_all_packages = scan_all_packages
674+
self.infer_download_urls = infer_download_urls
665675

666676
scancodeio = ScanCodeIO(user.dataspace)
667677
self.packages = scancodeio.fetch_project_packages(self.project_uuid)
668-
if not self.packages:
669-
raise Exception("Packages could not be fetched from ScanCode.io")
670678
self.dependencies = scancodeio.fetch_project_dependencies(self.project_uuid)
679+
if not self.packages and not self.dependencies:
680+
raise Exception("Packages could not be fetched from ScanCode.io")
671681

672682
def save(self):
673683
self.import_packages()
@@ -696,6 +706,15 @@ def import_package(self, package_data):
696706
# Check if the package already exists to prevent duplication.
697707
package = self.look_for_existing_package(package_data)
698708

709+
# Infer a download URL from the Package URL
710+
if (
711+
self.infer_download_urls
712+
and not package_data.get("download_url")
713+
and (purl := package_data.get("purl"))
714+
and (download_url := download.infer_download_url(purl))
715+
):
716+
package_data["download_url"] = download_url
717+
699718
if license_expression := package_data.get("declared_license_expression"):
700719
license_expression = str(self.licensing.dedup(license_expression))
701720
package_data["license_expression"] = license_expression
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Generated by Django 5.2.8 on 2025-12-16 04:14
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("product_portfolio", "0013_productstatus_is_locked_and_more"),
10+
]
11+
12+
operations = [
13+
migrations.AddField(
14+
model_name="scancodeproject",
15+
name="infer_download_urls",
16+
field=models.BooleanField(default=False),
17+
),
18+
]

product_portfolio/models.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -557,25 +557,40 @@ def assign_objects(self, related_objects, user, replace_version=False):
557557

558558
return created_count, updated_count, unchanged_count
559559

560-
def scan_all_packages_task(self, user):
560+
def scan_all_packages_task(self, user, infer_download_urls=False):
561561
"""
562562
Submit a Scan request to ScanCode.io for each package assigned to this Product.
563563
Only packages with a proper download URL are sent.
564564
"""
565-
package_urls = [
565+
if infer_download_urls:
566+
self.improve_packages_from_purl()
567+
568+
package_download_urls = [
566569
package.download_url
567-
for package in self.all_packages
570+
for package in self.all_packages.has_download_url()
568571
if package.download_url.startswith(("http", "https"))
569572
]
570573

571574
tasks.scancodeio_submit_scan.delay(
572-
uris=package_urls,
575+
uris=package_download_urls,
573576
user_uuid=user.uuid,
574577
dataspace_uuid=user.dataspace.uuid,
575578
)
576579

580+
def improve_packages_from_purl(self):
581+
"""Infer missing packages download URL using the Package URL when possible."""
582+
updated_packages = []
583+
584+
packages = self.all_packages.has_package_url().filter(models.Q(download_url=""))
585+
for package in packages:
586+
if download_url := package.infer_download_url():
587+
package.update(download_url=download_url)
588+
updated_packages.append(package)
589+
590+
return updated_packages
591+
577592
def improve_packages_from_purldb(self, user):
578-
"""Update all Packages assigned to the Product using PurlDB data."""
593+
"""Update all packages assigned to thepProduct using PurlDB data."""
579594
updated_packages = []
580595
for package in self.packages.all():
581596
updated_fields = package.update_from_purldb(user)
@@ -1555,6 +1570,9 @@ class Status(models.TextChoices):
15551570
scan_all_packages = models.BooleanField(
15561571
default=False,
15571572
)
1573+
infer_download_urls = models.BooleanField(
1574+
default=False,
1575+
)
15581576
status = models.CharField(
15591577
max_length=10,
15601578
choices=Status.choices,
@@ -1615,6 +1633,7 @@ def import_data_from_scancodeio(self):
16151633
product=self.product,
16161634
update_existing=self.update_existing_packages,
16171635
scan_all_packages=self.scan_all_packages,
1636+
infer_download_urls=self.infer_download_urls,
16181637
)
16191638
created, existing, errors = importer.save()
16201639

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,35 @@
1+
{% load crispy_forms_tags %}
12
<div class="modal" tabindex="-1" role="dialog" id="scan-all-packages-modal">
2-
<div class="modal-dialog" role="document">
3-
<div class="modal-content">
4-
<div class="modal-header">
5-
<h5 class="modal-title">Scan all Packages</h5>
6-
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
7-
</div>
8-
<div class="modal-body bg-body-tertiary">
9-
You are about to initiate multiple scans on the ScanCode.io server for all of the
10-
Packages assigned to your Product, either by direct assignment or by assignment to a
11-
Component assigned to your Product.<br><br>
12-
<strong>Note that this may take some time to complete.</strong><br><br>
13-
You can view the status of all the scans by selecting the
14-
<a target="_blank" href="{% url 'component_catalog:scan_list' %}">Scans</a> option
15-
from the DejaCode Tools dropdown menu, where you can also select each Package in the list
16-
to view scan results details in the "Scan" tab of that Package.
17-
</div>
18-
<div class="modal-footer">
19-
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
20-
<a id="scan_all_packages_submit" href="{{ object.get_scan_all_packages_url }}" class="btn btn-success">Submit Scan Request</a>
3+
<form autocomplete="off" method="{{ scan_all_packages_form.helper.form_method }}" action="{{ object.get_scan_all_packages_url }}" id="{{ scan_all_packages_form.helper.form_id }}">
4+
<div class="modal-dialog" role="document">
5+
<div class="modal-content">
6+
<div class="modal-header">
7+
<h5 class="modal-title">Scan all Packages</h5>
8+
<button type="button" class="btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
9+
</div>
10+
<div class="modal-body bg-body-tertiary">
11+
<p>
12+
You are about to initiate multiple scans on the ScanCode.io server for all the
13+
Packages assigned to your Product, either by direct assignment or by assignment to a
14+
Component assigned to your Product.
15+
</p>
16+
<p>
17+
<strong>Note that this may take some time to complete.</strong>
18+
</p>
19+
<p>
20+
You can view the status of all the scans by selecting the
21+
<a target="_blank" href="{% url 'component_catalog:scan_list' %}">Scans</a> option
22+
from the DejaCode Tools dropdown menu, where you can also select each Package in the list
23+
to view scan results details in the "Scan" tab of that Package.
24+
</p>
25+
<hr>
26+
{% crispy scan_all_packages_form %}
27+
</div>
28+
<div class="modal-footer">
29+
<input type="button" name="close" value="Close" class="btn btn-secondary" data-bs-dismiss="modal">
30+
<input type="submit" id="scan_all_packages_submit" value="Submit Scan Request" class="btn btn-primary btn-success">
31+
</div>
2132
</div>
2233
</div>
23-
</div>
34+
</form>
2435
</div>

product_portfolio/tests/test_api.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,7 @@ def test_api_product_endpoint_load_sboms_action(self):
407407

408408
data = {
409409
"input_file": ContentFile("{}", name="sbom.json"),
410+
"infer_download_urls": True,
410411
"update_existing_packages": False,
411412
"scan_all_packages": False,
412413
}
@@ -436,6 +437,7 @@ def test_api_product_endpoint_import_manifests_action(self):
436437

437438
data = {
438439
"input_file": ContentFile("Content", name="requirements.txt"),
440+
"infer_download_urls": True,
439441
"update_existing_packages": False,
440442
"scan_all_packages": False,
441443
}

0 commit comments

Comments
 (0)