Skip to content

Commit c482498

Browse files
Merge branch 'main' into update-federatedcode-pipes
2 parents f3a5811 + 473c6fe commit c482498

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+3756
-242
lines changed
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: Generate SBOM with ORT and load into ScanCode.io
2+
3+
# This workflow:
4+
# 1. Generates a CycloneDX SBOM for a requirement.txt file using ORT.
5+
# 2. Uploads the SBOM as a GitHub artifact for future inspection.
6+
# 3. Loads the SBOM into ScanCode.io for further analysis.
7+
# 4. Runs assertions to verify that the SBOM was properly processed in ScanCode.io.
8+
#
9+
# It runs on demand, and once a week (scheduled).
10+
11+
on:
12+
workflow_dispatch:
13+
schedule:
14+
# Run once a week (every 7 days) at 00:00 UTC on Sunday
15+
- cron: "0 0 * * 0"
16+
pull_request:
17+
push:
18+
branches:
19+
- main
20+
21+
permissions:
22+
contents: read
23+
24+
jobs:
25+
generate-and-load-sbom:
26+
runs-on: ubuntu-24.04
27+
steps:
28+
- name: Create a Python requirements.txt
29+
run: |
30+
cat << 'EOF' > requirements.txt
31+
amqp==5.1.1
32+
appdirs==1.4.4
33+
asgiref==3.5.2
34+
urllib3==1.26.0
35+
EOF
36+
37+
- name: Run GitHub Action for ORT
38+
uses: oss-review-toolkit/ort-ci-github-action@v1
39+
40+
- name: Import SBOM into ScanCode.io
41+
uses: aboutcode-org/scancode-action@main
42+
with:
43+
pipelines: "load_sbom"
44+
inputs-path: "${{ env.ORT_RESULTS_PATH }}/bom.cyclonedx.json"
45+
scancodeio-repo-branch: "main"
46+
47+
- name: Verify SBOM Analysis Results in ScanCode.io
48+
shell: bash
49+
run: |
50+
scanpipe shell --command "from scanpipe.models import DiscoveredPackage, DiscoveredDependency; package_manager = DiscoveredPackage.objects; assert package_manager.count() >= 5; assert package_manager.vulnerable().count() >= 1; assert DiscoveredDependency.objects.count() >= 1"

CHANGELOG.rst

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,38 @@ Changelog
44
v35.4.0 (unreleased)
55
--------------------
66

7+
- Use deterministic UID/GID in Dockerfile.
8+
A temporary ``chown`` service is now started in the ``docker-compose`` stack
9+
to fix the permissions. This process is only fully run once.
10+
You may manually run this process using the following:
11+
``$ chown -R 1000:1000 /var/scancodeio/``
12+
https://github.com/aboutcode-org/scancode.io/issues/1555
13+
714
- Resolve and load dependencies from SPDX SBOMs.
815
https://github.com/aboutcode-org/scancode.io/issues/1145
916

17+
- Display the optional steps in the Pipelines autodoc.
18+
https://github.com/aboutcode-org/scancode.io/issues/1822
19+
20+
- Add new ``benchmark_purls`` pipeline.
21+
https://github.com/aboutcode-org/scancode.io/issues/1804
22+
23+
- Add a Resources tree view.
24+
https://github.com/aboutcode-org/scancode.io/issues/1682
25+
26+
- Improve CycloneDX SBOM support.
27+
* Upgrade the cyclonedx-python-lib to 11.0.0
28+
* Fix the validate_document following library upgrade.
29+
* Add support when the "components" entry is missing.
30+
https://github.com/aboutcode-org/scancode.io/issues/1727
31+
1032
- Split the functionality of
1133
``scanpipe.pipes.federatedcode.commit_and_push_changes`` into
1234
``scanpipe.pipes.federatedcode.commit_changes`` and
1335
``scanpipe.pipes.federatedcode.push_changes``. Add
1436
``scanpipe.pipes.federatedcode.write_data_as_yaml``.
1537

38+
1639
v35.3.0 (2025-08-20)
1740
--------------------
1841

Dockerfile

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,23 @@ LABEL org.opencontainers.image.source="https://github.com/aboutcode-org/scancode
2626
LABEL org.opencontainers.image.description="ScanCode.io"
2727
LABEL org.opencontainers.image.licenses="Apache-2.0"
2828

29-
ENV APP_NAME scancodeio
30-
ENV APP_USER app
31-
ENV APP_DIR /opt/$APP_NAME
32-
ENV VENV_LOCATION /opt/$APP_NAME/.venv
29+
# Set default values for APP_UID and APP_GID at build-time
30+
ARG APP_UID=1000
31+
ARG APP_GID=1000
32+
33+
ENV APP_NAME=scancodeio
34+
ENV APP_USER=app
35+
ENV APP_UID=${APP_UID}
36+
ENV APP_GID=${APP_GID}
37+
ENV APP_DIR=/opt/$APP_NAME
38+
ENV VENV_LOCATION=/opt/$APP_NAME/.venv
3339

3440
# Force Python unbuffered stdout and stderr (they are flushed to terminal immediately)
35-
ENV PYTHONUNBUFFERED 1
41+
ENV PYTHONUNBUFFERED=1
3642
# Do not write Python .pyc files
37-
ENV PYTHONDONTWRITEBYTECODE 1
43+
ENV PYTHONDONTWRITEBYTECODE=1
3844
# Add the app dir in the Python path for entry points availability
39-
ENV PYTHONPATH $PYTHONPATH:$APP_DIR
45+
ENV PYTHONPATH=$PYTHONPATH:$APP_DIR
4046

4147
# OS requirements as per
4248
# https://scancode-toolkit.readthedocs.io/en/latest/getting-started/install.html
@@ -64,27 +70,24 @@ RUN apt-get update \
6470
&& apt-get clean \
6571
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
6672

67-
# Create the APP_USER group and user
68-
RUN addgroup --system $APP_USER \
69-
&& adduser --system --group --home=$APP_DIR $APP_USER \
70-
&& chown $APP_USER:$APP_USER $APP_DIR
71-
72-
# Create the /var/APP_NAME directory with proper permission for APP_USER
73-
RUN mkdir -p /var/$APP_NAME \
73+
# Create the APP_USER group, user, and directory with specific UID and GID
74+
RUN groupadd --gid $APP_GID --system $APP_USER \
75+
&& useradd --uid $APP_UID --gid $APP_GID --home-dir $APP_DIR --system --create-home $APP_USER \
76+
&& chown $APP_USER:$APP_USER $APP_DIR \
77+
&& mkdir -p /var/$APP_NAME \
7478
&& chown $APP_USER:$APP_USER /var/$APP_NAME
7579

7680
# Setup the work directory and the user as APP_USER for the remaining stages
7781
WORKDIR $APP_DIR
7882
USER $APP_USER
7983

84+
# Create static/ and workspace/ directories
85+
RUN mkdir -p /var/$APP_NAME/static/ /var/$APP_NAME/workspace/
86+
8087
# Create the virtualenv
8188
RUN python -m venv $VENV_LOCATION
8289
# Enable the virtualenv, similar effect as "source activate"
83-
ENV PATH $VENV_LOCATION/bin:$PATH
84-
85-
# Create static/ and workspace/ directories
86-
RUN mkdir -p /var/$APP_NAME/static/ \
87-
&& mkdir -p /var/$APP_NAME/workspace/
90+
ENV PATH=$VENV_LOCATION/bin:$PATH
8891

8992
# Install the dependencies before the codebase COPY for proper Docker layer caching
9093
COPY --chown=$APP_USER:$APP_USER pyproject.toml $APP_DIR/

docker-compose.yml

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ services:
88
- db_data:/var/lib/postgresql/data/
99
shm_size: "1gb"
1010
restart: always
11+
healthcheck:
12+
test: [ "CMD-SHELL", "pg_isready -U $${POSTGRES_USER} -d $${POSTGRES_DB}" ]
13+
interval: 10s
14+
timeout: 5s
15+
retries: 5
1116

1217
redis:
1318
image: docker.io/library/redis:latest
@@ -18,12 +23,45 @@ services:
1823
- redis_data:/data
1924
restart: always
2025

26+
# This service is responsible for ensuring the correct ownership of files
27+
# in the shared volumes used by the application (static and workspace).
28+
# It ensures that all files inside the `/var/scancodeio/` directory are owned
29+
# by the user and group with the UID and GID defined in the environment variables
30+
# APP_UID and APP_GID, which default to 1000 if not set.
31+
#
32+
# The service runs only once (due to "restart: no") and performs a `chown` operation
33+
# to change the ownership of the static and workspace directories, ensuring proper
34+
# file access rights for the running application containers.
35+
#
36+
# Volumes mounted:
37+
# - static: Ensures the ownership of static files in the /var/scancodeio/static directory
38+
# - media: Ensures the ownership of media files in the /var/scancodeio/workspace directory
39+
#
40+
# Notes: This service can be removed in future ScanCode.io release.
41+
chown:
42+
image: docker.io/library/alpine:latest
43+
restart: "no"
44+
command: sh -c "
45+
if [ ! -f /var/scancodeio/workspace/.chown_done ]; then
46+
chown -R ${APP_UID:-1000}:${APP_GID:-1000} /var/scancodeio/ &&
47+
touch /var/scancodeio/workspace/.chown_done;
48+
echo 'Chown applied!';
49+
else
50+
echo 'Chown already applied, skipping...';
51+
fi"
52+
env_file:
53+
- docker.env
54+
volumes:
55+
- static:/var/scancodeio/static/
56+
- workspace:/var/scancodeio/workspace/
57+
2158
web:
2259
build: .
23-
command: wait-for-it --strict --timeout=60 db:5432 -- sh -c "
60+
command: sh -c "
2461
./manage.py migrate &&
2562
./manage.py collectstatic --no-input --verbosity 0 --clear &&
26-
gunicorn scancodeio.wsgi:application --bind :8000 --timeout 600 --workers 8 ${GUNICORN_RELOAD_FLAG:-}"
63+
gunicorn scancodeio.wsgi:application --bind :8000 --timeout 600 \
64+
--workers 8 ${GUNICORN_RELOAD_FLAG:-}"
2765
env_file:
2866
- docker.env
2967
expose:
@@ -34,12 +72,17 @@ services:
3472
- workspace:/var/scancodeio/workspace/
3573
- static:/var/scancodeio/static/
3674
depends_on:
37-
- db
75+
db:
76+
condition: service_healthy
77+
redis:
78+
condition: service_started
79+
chown:
80+
condition: service_completed_successfully
3881

3982
worker:
4083
build: .
4184
# Ensure that potential db migrations run first by waiting until "web" is up
42-
command: wait-for-it --strict --timeout=120 web:8000 -- sh -c "
85+
command: wait-for-it --strict --timeout=600 web:8000 -- sh -c "
4386
./manage.py rqworker --worker-class scancodeio.worker.ScanCodeIOWorker
4487
--queue-class scancodeio.worker.ScanCodeIOQueue
4588
--verbosity 1"
@@ -53,6 +96,7 @@ services:
5396
- redis
5497
- db
5598
- web
99+
- chown
56100

57101
nginx:
58102
image: docker.io/library/nginx:alpine

docs/built-in-pipelines.rst

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,47 @@ Analyse Docker Windows Image
4646
:members:
4747
:member-order: bysource
4848

49+
.. _pipeline_benchmark_purls:
50+
51+
Benchmark PURLs (addon)
52+
-----------------------
53+
54+
To check an **SBOM against a list of expected Package URLs (PURLs)**:
55+
56+
1. **Create a new project** and provide two inputs:
57+
58+
* The SBOM file you want to check.
59+
* A list of expected PURLs in a ``*-purls.txt`` file with one PURL per line.
60+
61+
.. tip:: You may also flag any filename using the ``purls`` input tag.
62+
63+
2. **Run the pipelines**:
64+
65+
* Select and run the ``load_sbom`` pipeline to load the SBOM.
66+
* Run the ``benchmark_purls`` pipeline to validate against the expected PURLs.
67+
68+
3. **Download the results** from the "output" section of the project.
69+
70+
The output file contains only the differences between the discovered PURLs and
71+
the expected PURLs:
72+
73+
* Lines starting with ``-`` are missing from the project.
74+
* Lines starting with ``+`` are unexpected in the project.
75+
76+
.. note::
77+
The ``load_sbom`` pipeline is provided as an example to benchmark external
78+
tools using SBOMs as inputs. You can also run ``benchmark_purls`` directly
79+
after any ScanCode.io pipeline to validate the discovered PURLs.
80+
81+
.. tip::
82+
You can provide multiple expected PURLs files.
83+
84+
85+
.. autoclass:: scanpipe.pipelines.benchmark_purls.BenchmarkPurls()
86+
:members:
87+
:member-order: bysource
88+
89+
4990
.. _pipeline_collect_strings_gettext:
5091

5192
Collect string with Xgettext (addon)

docs/conf.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,23 @@
8181

8282
# user starts in light mode (Default Mode)
8383
default_dark_mode = False
84+
85+
86+
# Display the optional steps in the Pipelines autodoc.
87+
def autodoc_process_docstring(app, what, name, obj, options, lines):
88+
"""
89+
Sphinx autodoc extension hook to insert `@optional_step` groups
90+
into the generated documentation.
91+
92+
If a function or method has been decorated with ``@optional_step``,
93+
the decorator attaches a ``.groups`` attribute to the object.
94+
This hook inspects that attribute during autodoc processing and prepends
95+
a short note at the top of the function’s docstring.
96+
"""
97+
if hasattr(obj, "groups"):
98+
groups_str = " ".join(f":guilabel:`{group}`" for group in sorted(obj.groups))
99+
lines[:0] = [f"**Optional step:** {groups_str}", ""]
100+
101+
102+
def setup(app):
103+
app.connect("autodoc-process-docstring", autodoc_process_docstring)

docs/faq.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,9 +376,10 @@ are actively supported and tested::
376376
- Anchore: https://anchore.com/sbom/
377377
- CycloneDX cdxgen: https://cyclonedx.github.io/cdxgen/
378378
- OWASP dep-scan: https://owasp.org/www-project-dep-scan/
379+
- OSS Review Toolkit (ORT): https://oss-review-toolkit.org/ort/
380+
- OSV-Scanner: https://osv.dev/
379381
- SBOM tool: https://github.com/microsoft/sbom-tool/
380382
- Trivy: https://trivy.dev/
381-
- OSV-Scanner: https://osv.dev/
382383

383384
.. note:: Imported SBOMs must follow the SPDX or CycloneDX standards, in JSON format.
384385
You can use the ``load_sbom`` pipeline to process and enhance these SBOMs in your

docs/scanpipe-pipes.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ Generic
88
.. automodule:: scanpipe.pipes
99
:members:
1010

11+
Benchmark
12+
---------
13+
.. automodule:: scanpipe.pipes.benchmark
14+
:members:
15+
1116
ClamAV
1217
------
1318
.. automodule:: scanpipe.pipes.clamav

pyproject.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,8 @@ dependencies = [
8282
# Profiling
8383
"pyinstrument==5.1.1",
8484
# CycloneDX
85-
"cyclonedx-python-lib==10.2.0",
86-
"jsonschema==4.24.0",
85+
"cyclonedx-python-lib==11.0.0",
86+
"jsonschema==4.25.1",
8787
# MatchCode-toolkit
8888
"matchcode-toolkit==7.2.2",
8989
# Univers
@@ -135,6 +135,7 @@ run = "scancodeio:combined_run"
135135
analyze_docker_image = "scanpipe.pipelines.analyze_docker:Docker"
136136
analyze_root_filesystem_or_vm_image = "scanpipe.pipelines.analyze_root_filesystem:RootFS"
137137
analyze_windows_docker_image = "scanpipe.pipelines.analyze_docker_windows:DockerWindows"
138+
benchmark_purls = "scanpipe.pipelines.benchmark_purls:BenchmarkPurls"
138139
collect_strings_gettext = "scanpipe.pipelines.collect_strings_gettext:CollectStringsGettext"
139140
collect_symbols_ctags = "scanpipe.pipelines.collect_symbols_ctags:CollectSymbolsCtags"
140141
collect_symbols_pygments = "scanpipe.pipelines.collect_symbols_pygments:CollectSymbolsPygments"

0 commit comments

Comments
 (0)