From 1485a94bf271b93697b0dc72bb0ebbd909aaf944 Mon Sep 17 00:00:00 2001 From: tdruez Date: Fri, 17 Oct 2025 16:24:05 +0400 Subject: [PATCH 1/4] Add ability to download all output results formats #1880 Signed-off-by: tdruez --- CHANGELOG.rst | 6 +++ scanpipe/pipes/output.py | 42 +++++++++++++++++++ .../dropdowns/project_download_dropdown.html | 4 ++ .../scanpipe/includes/project_downloads.html | 24 +++++------ scanpipe/tests/pipes/test_output.py | 19 +++++++++ scanpipe/views.py | 19 ++++----- 6 files changed, 90 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 33b411a875..65c7e95336 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,12 @@ Changelog ========= +v35.4.1 (unreleased) +-------------------- + +- Add ability to download all output results formats as a zipfile for a given project. + https://github.com/aboutcode-org/scancode.io/issues/1880 + v35.4.0 (2025-09-30) -------------------- diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index ccf5fc70e7..a932438b52 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -26,10 +26,12 @@ import json import re import uuid +import zipfile from operator import attrgetter from pathlib import Path from django.apps import apps +from django.core.files.base import ContentFile from django.core.serializers.json import DjangoJSONEncoder from django.forms.models import model_to_dict from django.template import Context @@ -1138,3 +1140,43 @@ def to_ort_package_list_yml(project): "attribution": to_attribution, "ort-package-list": to_ort_package_list_yml, } + + +def make_zip_from_files(files): + """Return an in-memory zipfile given a list of (filename, file_path) pairs.""" + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for filename, file_path in files: + with open(file_path, "rb") as f: + zip_file.writestr(filename, f.read()) + zip_buffer.seek(0) + return zip_buffer + + +# def to_all_formats(project): +# """Generate all output formats for a project and return a zipfile.""" +# files = [] +# for output_function in FORMAT_TO_FUNCTION_MAPPING.values(): +# output_file = output_function(project) +# filename = safe_filename(f"{project.name}_{output_file.name}") +# files.append((filename, output_file)) +# zip_buffer = make_zip_from_files(files) +# zip_buffer.name = "scancodeio_output_files.zip" +# return zip_buffer + + +def to_all_formats(project): + """Generate all output formats for a project and return a Django File-like zip.""" + files = [] + for output_function in FORMAT_TO_FUNCTION_MAPPING.values(): + output_file = output_function(project) + filename = safe_filename(f"{project.name}_{output_file.name}") + files.append((filename, output_file)) + + zip_buffer = make_zip_from_files(files) + + # Wrap it into a Django File-like object + zip_file = ContentFile(zip_buffer.getvalue()) + zip_file.name = safe_filename(f"{project.name}_outputs.zip") + + return zip_file diff --git a/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html b/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html index b2de6d49d0..1b89786966 100644 --- a/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html +++ b/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html @@ -40,6 +40,10 @@ ORT (package-list) + + + All formats + \ No newline at end of file diff --git a/scanpipe/templates/scanpipe/includes/project_downloads.html b/scanpipe/templates/scanpipe/includes/project_downloads.html index 409da4c7b7..51e43fb6d1 100644 --- a/scanpipe/templates/scanpipe/includes/project_downloads.html +++ b/scanpipe/templates/scanpipe/includes/project_downloads.html @@ -1,18 +1,16 @@ \ No newline at end of file diff --git a/scanpipe/tests/pipes/test_output.py b/scanpipe/tests/pipes/test_output.py index fd8b897e46..5e85fe94b6 100644 --- a/scanpipe/tests/pipes/test_output.py +++ b/scanpipe/tests/pipes/test_output.py @@ -26,6 +26,7 @@ import shutil import tempfile import uuid +import zipfile from dataclasses import dataclass from pathlib import Path from unittest import mock @@ -633,6 +634,24 @@ def test_scanpipe_pipes_outputs_to_to_ort_package_list_yml(self): expected_file = self.data / "asgiref" / "asgiref-3.3.0.package-list.yml" self.assertResultsEqual(expected_file, output_file.read_text()) + def test_scanpipe_pipes_outputs_to_all_formats(self): + fixtures = self.data / "asgiref" / "asgiref-3.3.0_fixtures.json" + call_command("loaddata", fixtures, **{"verbosity": 0}) + project = Project.objects.get(name="asgiref") + + with self.assertNumQueries(35): + output_file = output.to_all_formats(project=project) + + self.assertEqual("asgiref_outputs.zip", output_file.name) + + output_file.seek(0) # Important for reading from start + with zipfile.ZipFile(output_file, "r") as zip_ref: + zip_contents = zip_ref.namelist() + file_count = len(zip_contents) + + expected_file_count = len(output.FORMAT_TO_FUNCTION_MAPPING) + self.assertEqual(file_count, expected_file_count) + def test_scanpipe_pipes_outputs_make_unknown_license_object(self): licensing = get_licensing() parsed_expression = licensing.parse("some-unknown-license") diff --git a/scanpipe/views.py b/scanpipe/views.py index 470b115c63..5f3d4a4f3b 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -24,7 +24,6 @@ import io import json import operator -import zipfile from collections import Counter from contextlib import suppress from pathlib import Path @@ -1444,19 +1443,17 @@ def get_project_queryset(selected_project_ids=None, action_form=None): @staticmethod def download_outputs_zip_response(project_qs, action_form): + """Generate and return a zip file response for selected projects.""" output_format = action_form.cleaned_data["output_format"] output_function = output.FORMAT_TO_FUNCTION_MAPPING.get(output_format) - # In-memory file storage for the zip archive - zip_buffer = io.BytesIO() - with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: - for project in project_qs: - output_file = output_function(project) - filename = output.safe_filename(f"{project.name}_{output_file.name}") - with open(output_file, "rb") as f: - zip_file.writestr(filename, f.read()) + files = [] + for project in project_qs: + output_file = output_function(project) + filename = output.safe_filename(f"{project.name}_{output_file.name}") + files.append((filename, output_file)) - zip_buffer.seek(0) + zip_buffer = output.make_zip_from_files(files) return FileResponse( zip_buffer, as_attachment=True, @@ -1633,6 +1630,8 @@ def get(self, request, *args, **kwargs): output_file = output.to_attribution(project) elif format == "ort-package-list": output_file = output.to_ort_package_list_yml(project) + elif format == "all": + output_file = output.to_all_formats(project) else: raise Http404("Format not supported.") From 1fbe437ba25561fe9d2d0073299ce9b0b7d02655 Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 20 Oct 2025 17:55:06 +0400 Subject: [PATCH 2/4] Add ability to download all existing outputs for a project #1880 Signed-off-by: tdruez --- scanpipe/pipes/output.py | 24 +++++++++---------- .../dropdowns/project_download_dropdown.html | 2 +- .../scanpipe/includes/project_downloads.html | 2 +- .../scanpipe/panels/project_outputs.html | 6 +++++ scanpipe/tests/pipes/test_output.py | 19 +++++++++++++-- scanpipe/views.py | 4 +++- 6 files changed, 40 insertions(+), 17 deletions(-) diff --git a/scanpipe/pipes/output.py b/scanpipe/pipes/output.py index a932438b52..03fc4ca51c 100644 --- a/scanpipe/pipes/output.py +++ b/scanpipe/pipes/output.py @@ -1153,18 +1153,6 @@ def make_zip_from_files(files): return zip_buffer -# def to_all_formats(project): -# """Generate all output formats for a project and return a zipfile.""" -# files = [] -# for output_function in FORMAT_TO_FUNCTION_MAPPING.values(): -# output_file = output_function(project) -# filename = safe_filename(f"{project.name}_{output_file.name}") -# files.append((filename, output_file)) -# zip_buffer = make_zip_from_files(files) -# zip_buffer.name = "scancodeio_output_files.zip" -# return zip_buffer - - def to_all_formats(project): """Generate all output formats for a project and return a Django File-like zip.""" files = [] @@ -1180,3 +1168,15 @@ def to_all_formats(project): zip_file.name = safe_filename(f"{project.name}_outputs.zip") return zip_file + + +def to_all_outputs(project): + """Return a Django File-like zip containing all existing project's output/ files.""" + files = [(path.name, path) for path in project.output_path.glob("*")] + zip_buffer = make_zip_from_files(files) + + # Wrap it into a Django File-like object + zip_file = ContentFile(zip_buffer.getvalue()) + zip_file.name = safe_filename(f"{project.name}_outputs.zip") + + return zip_file diff --git a/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html b/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html index 1b89786966..e7c9137865 100644 --- a/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html +++ b/scanpipe/templates/scanpipe/dropdowns/project_download_dropdown.html @@ -41,7 +41,7 @@ ORT (package-list) - + All formats diff --git a/scanpipe/templates/scanpipe/includes/project_downloads.html b/scanpipe/templates/scanpipe/includes/project_downloads.html index 51e43fb6d1..e7fed550a8 100644 --- a/scanpipe/templates/scanpipe/includes/project_downloads.html +++ b/scanpipe/templates/scanpipe/includes/project_downloads.html @@ -72,7 +72,7 @@ | - + All formats diff --git a/scanpipe/templates/scanpipe/panels/project_outputs.html b/scanpipe/templates/scanpipe/panels/project_outputs.html index f0d9da8bcc..dba58ca041 100644 --- a/scanpipe/templates/scanpipe/panels/project_outputs.html +++ b/scanpipe/templates/scanpipe/panels/project_outputs.html @@ -18,4 +18,10 @@ {% endfor %} + \ No newline at end of file diff --git a/scanpipe/tests/pipes/test_output.py b/scanpipe/tests/pipes/test_output.py index 5e85fe94b6..ef5a31d2a8 100644 --- a/scanpipe/tests/pipes/test_output.py +++ b/scanpipe/tests/pipes/test_output.py @@ -644,13 +644,28 @@ def test_scanpipe_pipes_outputs_to_all_formats(self): self.assertEqual("asgiref_outputs.zip", output_file.name) - output_file.seek(0) # Important for reading from start with zipfile.ZipFile(output_file, "r") as zip_ref: zip_contents = zip_ref.namelist() file_count = len(zip_contents) expected_file_count = len(output.FORMAT_TO_FUNCTION_MAPPING) - self.assertEqual(file_count, expected_file_count) + self.assertEqual(expected_file_count, file_count) + + def test_scanpipe_pipes_outputs_to_all_outputs(self): + fixtures = self.data / "asgiref" / "asgiref-3.3.0_fixtures.json" + call_command("loaddata", fixtures, **{"verbosity": 0}) + project = Project.objects.get(name="asgiref") + + with self.assertNumQueries(0): + output_file = output.to_all_outputs(project=project) + + self.assertEqual("asgiref_outputs.zip", output_file.name) + + with zipfile.ZipFile(output_file, "r") as zip_ref: + zip_contents = zip_ref.namelist() + file_count = len(zip_contents) + + self.assertEqual(len(project.output_root), file_count) def test_scanpipe_pipes_outputs_make_unknown_license_object(self): licensing = get_licensing() diff --git a/scanpipe/views.py b/scanpipe/views.py index 5f3d4a4f3b..a9b7de1375 100644 --- a/scanpipe/views.py +++ b/scanpipe/views.py @@ -1630,8 +1630,10 @@ def get(self, request, *args, **kwargs): output_file = output.to_attribution(project) elif format == "ort-package-list": output_file = output.to_ort_package_list_yml(project) - elif format == "all": + elif format == "all_formats": output_file = output.to_all_formats(project) + elif format == "all_outputs": + output_file = output.to_all_outputs(project) else: raise Http404("Format not supported.") From ced13f720a0eeb5d14d4b3f5619b3e56b3b2e7e5 Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 20 Oct 2025 18:08:52 +0400 Subject: [PATCH 3/4] Add support for all_formats/all_outputs in REST API #1880 Signed-off-by: tdruez --- docs/command-line-interface.rst | 7 ++++--- docs/output-files.rst | 2 -- docs/rest-api.rst | 8 +++++++- scanpipe/api/views.py | 4 ++++ scanpipe/tests/test_api.py | 10 ++++++++++ 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index 5200d1c52d..3ff114410b 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -419,10 +419,11 @@ Displays status information about the ``PROJECT`` project. .. _cli_output: -`$ scanpipe output --project PROJECT --format {json,csv,xlsx,spdx,cyclonedx,attribution}` ------------------------------------------------------------------------------------------ +`$ scanpipe output --project PROJECT --format {json,csv,xlsx,spdx,cyclonedx,attribution,ort-package-list}` +---------------------------------------------------------------------------------------------------------- -Outputs the ``PROJECT`` results as JSON, XLSX, CSV, SPDX, CycloneDX, and Attribution. +Outputs the ``PROJECT`` results as JSON, XLSX, CSV, SPDX, CycloneDX, +ORT package-list.yml, and Attribution. The output files are created in the ``PROJECT`` :guilabel:`output/` directory. Multiple formats can be provided at once:: diff --git a/docs/output-files.rst b/docs/output-files.rst index 00a43333d7..c5ffb4b1e0 100644 --- a/docs/output-files.rst +++ b/docs/output-files.rst @@ -285,7 +285,6 @@ Additional sheets are included **only when relevant** (i.e., when data is availa SPDX ^^^^ - ScanCode.io can generate Software Bill of Materials (SBOM) in the **SPDX** format, which is an open standard for communicating software component information. SPDX is widely used for license compliance, security analysis, and software supply @@ -309,7 +308,6 @@ The SPDX output includes: CycloneDX ^^^^^^^^^ - ScanCode.io can generate **CycloneDX** SBOMs, a lightweight standard designed for security and dependency management. CycloneDX is optimized for vulnerability analysis and software supply chain risk assessment. diff --git a/docs/rest-api.rst b/docs/rest-api.rst index 84e9068053..dcfc55c315 100644 --- a/docs/rest-api.rst +++ b/docs/rest-api.rst @@ -694,10 +694,16 @@ Finally, use this action to download the project results in the provided ``output_format`` as an attachment file. Data: - - ``output_format``: ``json``, ``xlsx``, ``spdx``, ``cyclonedx``, ``attribution`` + - ``output_format``: ``json``, ``xlsx``, ``spdx``, ``cyclonedx``, ``attribution``, + ``all_formats``, ``all_outputs`` ``GET /api/projects/d4ed9405-5568-45ad-99f6-782a9b82d1d2/results_download/?output_format=cyclonedx`` +.. note:: + Use ``all_formats`` to generate a zip file containing all output formats for a + project, while ``all_outputs`` can be used to obtain a zip file of all existing + output files for that project. + .. tip:: Refer to :ref:`output_files` to learn more about the available output formats. diff --git a/scanpipe/api/views.py b/scanpipe/api/views.py index c46eaebbdf..b07ea8772f 100644 --- a/scanpipe/api/views.py +++ b/scanpipe/api/views.py @@ -171,6 +171,10 @@ def results_download(self, request, *args, **kwargs): output_file = output.to_attribution(project) elif format == "ort-package-list": output_file = output.to_ort_package_list_yml(project) + elif format == "all_formats": + output_file = output.to_all_formats(project) + elif format == "all_outputs": + output_file = output.to_all_outputs(project) else: message = {"status": f"Format {format} not supported."} return Response(message, status=status.HTTP_400_BAD_REQUEST) diff --git a/scanpipe/tests/test_api.py b/scanpipe/tests/test_api.py index 41ab650aaf..8362e6a3ff 100644 --- a/scanpipe/tests/test_api.py +++ b/scanpipe/tests/test_api.py @@ -669,6 +669,16 @@ def test_scanpipe_api_project_action_results_download_output_formats(self): # to prevent a "ResourceWarning: unclosed file" self.assertTrue(response.getvalue().startswith(b"PK")) + data = {"output_format": "all_formats"} + response = self.csrf_client.get(url, data=data) + expected = ["application/zip"] + self.assertIn(response["Content-Type"], expected) + + data = {"output_format": "all_outputs"} + response = self.csrf_client.get(url, data=data) + expected = ["application/zip"] + self.assertIn(response["Content-Type"], expected) + def test_scanpipe_api_project_action_pipelines(self): url = reverse("project-pipelines") response = self.csrf_client.get(url) From 0063203f8d1276fd4fe15d2074cc2946281840b3 Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 20 Oct 2025 18:19:36 +0400 Subject: [PATCH 4/4] Fix format #1880 Signed-off-by: tdruez --- docs/command-line-interface.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/command-line-interface.rst b/docs/command-line-interface.rst index 3ff114410b..1f2f75dab1 100644 --- a/docs/command-line-interface.rst +++ b/docs/command-line-interface.rst @@ -419,8 +419,8 @@ Displays status information about the ``PROJECT`` project. .. _cli_output: -`$ scanpipe output --project PROJECT --format {json,csv,xlsx,spdx,cyclonedx,attribution,ort-package-list}` ----------------------------------------------------------------------------------------------------------- +`$ scanpipe output --project PROJECT --format {json,csv,xlsx,spdx,cyclonedx,attribution,...}` +--------------------------------------------------------------------------------------------- Outputs the ``PROJECT`` results as JSON, XLSX, CSV, SPDX, CycloneDX, ORT package-list.yml, and Attribution.