From 610689c2c01db4d1aaa09753b9b9aab34748c86c Mon Sep 17 00:00:00 2001 From: Ayan Sinha Mahapatra Date: Fri, 25 Oct 2024 16:18:33 +0700 Subject: [PATCH 1/8] Reference: https://github.com/aboutcode-org/scancode-toolkit/issues/3954 Signed-off-by: Ayan Sinha Mahapatra Signed-off-by: Jono Yang Signed-off-by: Jono Yang addded support to parse labels in dockerfile Signed-off-by: Varsha U N --- requirements-linux.txt | 2 +- requirements.txt | 2 +- setup-mini.cfg | 2 +- setup.cfg | 4 +- src/licensedcode/tokenize.py | 1 + src/packagedcode/__init__.py | 2 + src/packagedcode/dockerfile.py | 59 +++++++++++++ src/packagedcode/recognize.py | 8 ++ ...ct_toml.cpython-312-pytest-8.3.3.pyc.26520 | Bin 0 -> 2086 bytes .../data/docker/containerfile-expected.json | 13 +++ .../data/docker/psql-expected.json | 10 +++ .../packagedcode/data/docker/psql.dockerfile | 5 ++ .../data/docker/test-dockerfile-expected.json | 18 ++++ .../data/docker/test.containerfile | 80 ++++++++++++++++++ .../packagedcode/data/docker/test.dockerfile | 36 ++++++++ tests/packagedcode/test_dockerfile.py | 60 +++++++++++++ 16 files changed, 297 insertions(+), 5 deletions(-) create mode 100644 src/packagedcode/dockerfile.py create mode 100644 tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 create mode 100644 tests/packagedcode/data/docker/containerfile-expected.json create mode 100644 tests/packagedcode/data/docker/psql-expected.json create mode 100644 tests/packagedcode/data/docker/psql.dockerfile create mode 100644 tests/packagedcode/data/docker/test-dockerfile-expected.json create mode 100644 tests/packagedcode/data/docker/test.containerfile create mode 100644 tests/packagedcode/data/docker/test.dockerfile create mode 100644 tests/packagedcode/test_dockerfile.py diff --git a/requirements-linux.txt b/requirements-linux.txt index cd7dbabc2fe..9269a7bd04b 100644 --- a/requirements-linux.txt +++ b/requirements-linux.txt @@ -1,4 +1,4 @@ packagedcode-msitools==0.101.210706 regipy==3.1.0 rpm-inspector-rpm==4.16.1.3.210404 -go-inspector==0.3.1 +go-inspector==0.5.0 diff --git a/requirements.txt b/requirements.txt index 38dfdc65b7c..fb3ec03fb7f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -35,7 +35,7 @@ jaraco.functools==4.1.0 javaproperties==0.8.1 Jinja2==3.1.3 jsonstreams==0.6.0 -license-expression==30.3.0 +license-expression==30.4.0 lxml==5.1.0 MarkupSafe==2.1.5 more-itertools==8.13.0 diff --git a/setup-mini.cfg b/setup-mini.cfg index e435bd76c75..cd48f30d97c 100644 --- a/setup-mini.cfg +++ b/setup-mini.cfg @@ -149,7 +149,7 @@ packages = rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux' regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' - go-inspector >= 0.3.1; platform_system == 'Linux' + go-inspector >= 0.5.0; platform_system == 'Linux' [options.entry_points] diff --git a/setup.cfg b/setup.cfg index b73f1d931db..8d1b28e381f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -84,7 +84,7 @@ install_requires = javaproperties >= 0.5 jinja2 >= 2.7.0 jsonstreams >= 0.5.0 - license_expression >= 30.1.1 + license_expression >= 30.4.0 lxml >= 4.9.2 MarkupSafe >= 2.1.2 packageurl_python >= 0.9.0 @@ -149,7 +149,7 @@ packages = rpm_inspector_rpm >= 4.16.1.3; platform_system == 'Linux' regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' - go-inspector >= 0.3.1; platform_system == 'Linux' + go-inspector >= 0.5.0; platform_system == 'Linux' [options.entry_points] diff --git a/src/licensedcode/tokenize.py b/src/licensedcode/tokenize.py index 9ac885941db..fdcc06fb45d 100644 --- a/src/licensedcode/tokenize.py +++ b/src/licensedcode/tokenize.py @@ -336,6 +336,7 @@ def select_ngrams(ngrams, with_pos=False): >>> list(select_ngrams(x for x in [(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)])) [(2, 1, 3), (1, 1, 3), (5, 1, 3), (2, 6, 1), (7, 3, 4)] """ + ngram = None last = None for pos, ngram in enumerate(ngrams): # FIXME: use a proper hash diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 6692087aa4d..69fc770bec4 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -20,6 +20,7 @@ from packagedcode import debian from packagedcode import debian_copyright from packagedcode import distro +from packagedcode import dockerfile from packagedcode import conda from packagedcode import conan from packagedcode import cocoapods @@ -97,6 +98,7 @@ debian.DebianSourcePackageTarballHandler, distro.EtcOsReleaseHandler, + dockerfile.DockerfileHandler, freebsd.CompactManifestHandler, diff --git a/src/packagedcode/dockerfile.py b/src/packagedcode/dockerfile.py new file mode 100644 index 00000000000..cc97bdc1477 --- /dev/null +++ b/src/packagedcode/dockerfile.py @@ -0,0 +1,59 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + + + +import io +from pathlib import Path +from dockerfile_parse import DockerfileParser +from packagedcode import models +from packagedcode import utils +import fnmatch + + +class DockerfileHandler(models.DatafileHandler): + datasource_id = 'dockerfile_oci_labels' + + @classmethod + def is_datafile(cls, path): + patterns = ['Dockerfile', 'containerfile', '*.dockerfile'] + filename=os.path.basename(path) + for pattern in patterns: + if fnmatch.fnmatch(filename, pattern): + return True + return False + + @classmethod + def parse(cls, location, package_only=False): + """ + Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata. + """ + labels = cls.extract_oci_labels_from_dockerfile(location) + package_data = { + 'datasource_id': cls.datasource_id, + 'type': cls.default_package_type, + 'name': labels.get('name', 'None'), + 'version': labels.get('version', 'None'), + 'license_expression': labels.get('license', 'None'), + 'labels': labels, + } + + yield models.PackageData.from_data(package_data, package_only) + + @classmethod + def extract_oci_labels_from_dockerfile(cls, dockerfile_path): + """ + Extract OCI labels from the Dockerfile using DockerfileParser. + """ + labels = {} + parser = DockerfileParser() + with open(dockerfile_path, 'r') as dockerfile: + parser.content = dockerfile.read() + labels = parser.labels + return labels diff --git a/src/packagedcode/recognize.py b/src/packagedcode/recognize.py index 9dd7c643482..9c98b414e19 100644 --- a/src/packagedcode/recognize.py +++ b/src/packagedcode/recognize.py @@ -113,3 +113,11 @@ def _parse( if TRACE: raise + + except Exception as e: + # We should continue when an Exception has occured when trying to + # recognize a package + if TRACE: + logger_debug(f'_parse: Exception: {str(e)}') + + continue diff --git a/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 b/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 new file mode 100644 index 0000000000000000000000000000000000000000..1eb645b2da6cf3b0de46e3c067534f642a59b285 GIT binary patch literal 2086 zcmdT_&2Jk;6rb4-+v~*na6?E@ft8w;_)w=UEr+V9KwznnP~ienElaDZXB|6P@2+Mh zO>M2!3VO%^Au0|n3Wyv*`4f=1^~9x8Z$qUbIB^R~rAR*UX6=cs0)hh^7<=cF+Ues}Ft1EF7(q6g^dIJgYrKB9=?09A1+jkZ}gFlt*>8xK60Y9{q$ ztJzfMsyS-hMOCMROXjaC(6xFRZa8H@n(!Qa59dI1+(XppVSzO$LDbwEa=3_khEV8q ztXq0Dl8Lc5+}ShXeE8|T7>&im&&5=O|8EPpu6k zwGz95shyaK)iCyk>}f_6G;V!LuSRljxV(w@3uEzTMhLx1Or1L$M;5hzBuB^)(+ti2 zYYeP^%-{$K46-!$rP0km6*FTQ&W-r$6?GDG2hn_D?VzuYBG8F00O(|rY2h?VR=lWp zjKy2fwvkM-ny;20L0|Xm4myd>A&$48x341%zt8X|?k)2=+QNV0F$bPf;aayUd@lHM zOzInC&8PK<`pc9EvK)8fmYP_!?$^Z`5w(M5E=WiTqhJ@!DD-01x=DoZ4R?>=Wd-jx z|GU^{{97^5@@aXUN1?>5g!{(IN(pz1rw zXf>VhzVSX`+^4R>xis>*8%Dxy6H#xvqUpPOX9_6i2k1*~cOqXIk^yBVe}!=FGm##C z0S<@Q7fJLds}=EtlawPV73ISFp>#%3!J zt&q!15CMYA1&=qQ&48v*Z!L&!kiZK`+n0{VNyAqWAX+10K;Ih>YV$QY^;XKQ6@?YX zA|{V%2;>%!kO=UUa>IJXL^NelXYDDraMNJ38m+)*(Ef<2_mG@W90!M-8z_T=UmsU4 z@*H5@;pmgMFV#MPI`P_vQ1vFM@j3}r_d%yc!A48`(U5jiZM;)!Y2lzw9C}c{o9m-|_ zCXd5ysN$CgO#w|+X8smy3FyxwlgtinY#R13^(A4GYPS*b^n$)cKnRSlz|_xOZbx)8 m@UO7fKvkbiuHKLPCdT+FLU+-}6F(yOI|tS@zG|@3;QSqlyYlP+ literal 0 HcmV?d00001 diff --git a/tests/packagedcode/data/docker/containerfile-expected.json b/tests/packagedcode/data/docker/containerfile-expected.json new file mode 100644 index 00000000000..92fe52b8421 --- /dev/null +++ b/tests/packagedcode/data/docker/containerfile-expected.json @@ -0,0 +1,13 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "name": "Unknown", + "version": "Unknown", + "license_expression": "GPL-2.0-only AND BSD-2-Clause", + "labels": { + "source": "https://github.com/kubernetes-sigs/blixt", + "licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +] diff --git a/tests/packagedcode/data/docker/psql-expected.json b/tests/packagedcode/data/docker/psql-expected.json new file mode 100644 index 00000000000..d9f8bb4c5ce --- /dev/null +++ b/tests/packagedcode/data/docker/psql-expected.json @@ -0,0 +1,10 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "license_expression": "MIT", + "labels": { + "source": "https://github.com/kreneskyp/ix" + } + } +] diff --git a/tests/packagedcode/data/docker/psql.dockerfile b/tests/packagedcode/data/docker/psql.dockerfile new file mode 100644 index 00000000000..cbdd9e54eb5 --- /dev/null +++ b/tests/packagedcode/data/docker/psql.dockerfile @@ -0,0 +1,5 @@ +FROM postgres:15.3 +LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix + +RUN apt update -y && \ + apt install -y postgresql-15-pgvector \ diff --git a/tests/packagedcode/data/docker/test-dockerfile-expected.json b/tests/packagedcode/data/docker/test-dockerfile-expected.json new file mode 100644 index 00000000000..2a843acee2d --- /dev/null +++ b/tests/packagedcode/data/docker/test-dockerfile-expected.json @@ -0,0 +1,18 @@ +[ + { + "datasource_id": "dockerfile_oci_labels", + "type": "default", + "name": "Kanboard", + "version": "1.2.42", + "license_expression": "MIT", + "labels": { + "source": "https://github.com/kanboard/kanboard", + "title": "Kanboard", + "description": "Kanboard is project management software that focuses on the Kanban methodology", + "vendor": "Kanboard", + "licenses": "MIT", + "url": "https://kanboard.org", + "documentation": "https://docs.kanboard.org" + } + } +] diff --git a/tests/packagedcode/data/docker/test.containerfile b/tests/packagedcode/data/docker/test.containerfile new file mode 100644 index 00000000000..2145e4476d0 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile @@ -0,0 +1,80 @@ +#Copied from https://github.com/kubernetes-sigs/blixt/blob + + +FROM rust:1.79-slim-bookworm as builder + +ARG TARGETARCH +ARG LLVM_VERSION=19 + +RUN apt-get update +RUN apt-get install --yes \ + build-essential \ + protobuf-compiler \ + pkg-config \ + musl-tools \ + clang \ + wget + +RUN apt install --yes lsb-release software-properties-common gnupg +RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh +RUN chmod +x /tmp/llvm.sh +RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all" + +RUN rustup default stable +RUN rustup install nightly +RUN rustup component add rust-src --toolchain nightly +RUN --mount=type=cache,target=/root/.cargo/registry \ + cargo install bpf-linker + +WORKDIR /workspace +# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention. +# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's +# convention to a file for later usage. +RUN if [ "$TARGETARCH" = "amd64" ]; \ + then echo "x86_64" >> arch; \ + else echo "aarch64" >> arch; \ + fi +RUN rustup target add $(eval cat arch)-unknown-linux-musl + +COPY dataplane dataplane +COPY tools/udp-test-server tools/udp-test-server +COPY xtask xtask +COPY Cargo.toml Cargo.toml +COPY Cargo.lock Cargo.lock +COPY .cargo .cargo + +# We need to tell bpf-linker where it can find LLVM's shared library file. +# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48 +ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib" +ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" +ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" +ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" +ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" +ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld" + +RUN --mount=type=cache,target=/workspace/target/ \ + --mount=type=cache,target=/root/.cargo/registry \ + cargo xtask build-ebpf --release +RUN --mount=type=cache,target=/workspace/target/ \ + --mount=type=cache,target=/root/.cargo/registry \ + RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \ + --workspace \ + --exclude ebpf \ + --release \ + --target=$(eval cat arch)-unknown-linux-musl +RUN --mount=type=cache,target=/workspace/target/ \ + cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release + +FROM alpine + +LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt +LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause + +WORKDIR /opt/blixt/ + +COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane + +COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 +COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause + +ENTRYPOINT ["/opt/blixt/dataplane"] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile b/tests/packagedcode/data/docker/test.dockerfile new file mode 100644 index 00000000000..bb5efa2459e --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile @@ -0,0 +1,36 @@ +#Copied from https://github.com/kanboard/kanboard + +FROM alpine:3.21 + +LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard +LABEL org.opencontainers.image.title=Kanboard +LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology" +LABEL org.opencontainers.image.vendor=Kanboard +LABEL org.opencontainers.image.licenses=MIT +LABEL org.opencontainers.image.url=https://kanboard.org +LABEL org.opencontainers.image.documentation=https://docs.kanboard.org + +VOLUME /var/www/app/data +VOLUME /var/www/app/plugins +VOLUME /etc/nginx/ssl + +EXPOSE 80 443 + +ARG VERSION + +RUN apk --no-cache --update add \ + tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \ + php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \ + php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \ + php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \ + rm -rf /var/www/localhost && \ + rm -f /etc/php83/php-fpm.d/www.conf && \ + ln -sf /usr/bin/php83 /usr/bin/php + +ADD . /var/www/app +ADD docker/ / + +RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt + +ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] +CMD [] \ No newline at end of file diff --git a/tests/packagedcode/test_dockerfile.py b/tests/packagedcode/test_dockerfile.py new file mode 100644 index 00000000000..34353f056e5 --- /dev/null +++ b/tests/packagedcode/test_dockerfile.py @@ -0,0 +1,60 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from packagedcode import dockerfile +import pytest +import os.path +import json +from pathlib import Path +from packagedcode.dockerfile import DockerfileHandler + +class TestDockerfileHandler: + + def get_test_loc(self, path): + return Path(os.path.join(os.path.dirname(__file__), 'data')) + + def load_expected(self, expected_file): + with open(expected_file) as f: + return json.load(f) + + def test_is_datafile(self): + dockerfiles = [ + 'test.dockerfile', + 'test.containerfile', + 'psql.dockerfile' + ] + for dockerfile in dockerfiles: + test_file = self.get_test_loc(f'data/docker/{dockerfile}') + assert DockerfileHandler.is_datafile(str(test_file)) + + def test_parse_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test-dockerfile-expected.json'), + ('test.containerfile', 'containerfile-expected.json'), + ('psql.dockerfile', 'psql-expected.json') + ] + for dockerfile, expected in test_files: + test_file = self.get_test_loc(f'data/docker/{dockerfile}') + expected_loc = self.get_test_loc(f'data/docker/{expected}') + packages = list(DockerfileHandler.parse(str(test_file))) + expected_packages = self.load_expected(expected_loc) + assert packages == expected_packages + + def test_extract_oci_labels_from_dockerfile(self, mocker): + dockerfiles = [ + 'test.dockerfile', + 'test.containerfile', + 'psql.dockerfile' + ] + for dockerfile in dockerfiles: + dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}') + labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path)) + expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}') + expected_labels = self.load_expected(expected_loc)[0]['labels'] + assert labels == expected_labels From c70cde100120514f41de481eac525a8e90034251 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Tue, 18 Mar 2025 22:26:04 +0530 Subject: [PATCH 2/8] Update setup-mini.cfg --- setup-mini.cfg | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup-mini.cfg b/setup-mini.cfg index 9d24ba15ba7..b335a995f1d 100644 --- a/setup-mini.cfg +++ b/setup-mini.cfg @@ -150,9 +150,7 @@ packages = regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' go-inspector >= 0.5.0; platform_system == 'Linux' - rust-inspector >= 0.1.0; platform_system == 'Linux' - develop [options.entry_points] console_scripts = From 1e62f9995d5e28697103f44268281b0a5bfa30d4 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Tue, 18 Mar 2025 22:26:53 +0530 Subject: [PATCH 3/8] Update setup.cfg --- setup.cfg | 2 -- 1 file changed, 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 962943e75e5..c6b5d10e835 100644 --- a/setup.cfg +++ b/setup.cfg @@ -150,10 +150,8 @@ packages = regipy >= 3.1.0; platform_system == 'Linux' packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' go-inspector >= 0.5.0; platform_system == 'Linux' - support-OCI-labels rust-inspector >= 0.1.0; platform_system == 'Linux' - [options.entry_points] console_scripts = scancode = scancode.cli:scancode From 50aa7540935ba0763544feb6c4361b698367e369 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Tue, 18 Mar 2025 22:28:09 +0530 Subject: [PATCH 4/8] Update setup.cfg From 468f3de71c885424baa7e396b26d5d11bfedb1f0 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Wed, 19 Mar 2025 08:24:12 +0530 Subject: [PATCH 5/8] Update setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c6b5d10e835..31269077d12 100644 --- a/setup.cfg +++ b/setup.cfg @@ -151,7 +151,7 @@ packages = packagedcode_msitools >= 0.101.210706; platform_system == 'Linux' go-inspector >= 0.5.0; platform_system == 'Linux' rust-inspector >= 0.1.0; platform_system == 'Linux' - + [options.entry_points] console_scripts = scancode = scancode.cli:scancode From accb43b233ceeb031cfcb5d502aa2018ce679928 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Thu, 17 Apr 2025 18:16:12 +0530 Subject: [PATCH 6/8] Extend parsing logic to other handlers Signed-off-by: Varsha U N --- ...{dockerfile.py => dockerfile_ocilabels.py} | 23 ++--- ...ct_toml.cpython-312-pytest-8.3.3.pyc.26520 | Bin 2086 -> 0 bytes .../data/docker/psql-expected.json | 10 --- .../packagedcode/data/docker/psql.dockerfile | 5 -- .../test-containerfile/test.containerfile | 11 +++ .../test.containerfile-expected.json} | 0 .../test.containerfile-package.expected.json | 9 ++ .../test.containerfile-scan.expected.json | 7 ++ .../data/docker/test-dockerfile-expected.json | 18 ---- .../{ => test-dockerfile}/test.dockerfile | 19 +---- .../test.dockerfile-expected.json | 9 ++ .../test.dockerfile-package.expected.json | 18 ++++ .../test.dockerfile-scan.expected.json | 18 ++++ .../data/docker/test.containerfile | 80 ------------------ tests/packagedcode/test_dockerfile.py | 60 ------------- .../packagedcode/test_dockerfile_ocilabels.py | 66 +++++++++++++++ 16 files changed, 148 insertions(+), 205 deletions(-) rename src/packagedcode/{dockerfile.py => dockerfile_ocilabels.py} (67%) delete mode 100644 tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 delete mode 100644 tests/packagedcode/data/docker/psql-expected.json delete mode 100644 tests/packagedcode/data/docker/psql.dockerfile create mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile rename tests/packagedcode/data/docker/{containerfile-expected.json => test-containerfile/test.containerfile-expected.json} (100%) create mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json create mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json delete mode 100644 tests/packagedcode/data/docker/test-dockerfile-expected.json rename tests/packagedcode/data/docker/{ => test-dockerfile}/test.dockerfile (51%) create mode 100644 tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json create mode 100644 tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json create mode 100644 tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json delete mode 100644 tests/packagedcode/data/docker/test.containerfile delete mode 100644 tests/packagedcode/test_dockerfile.py create mode 100644 tests/packagedcode/test_dockerfile_ocilabels.py diff --git a/src/packagedcode/dockerfile.py b/src/packagedcode/dockerfile_ocilabels.py similarity index 67% rename from src/packagedcode/dockerfile.py rename to src/packagedcode/dockerfile_ocilabels.py index cc97bdc1477..07cf221bd04 100644 --- a/src/packagedcode/dockerfile.py +++ b/src/packagedcode/dockerfile_ocilabels.py @@ -11,23 +11,16 @@ import io from pathlib import Path -from dockerfile_parse import DockerfileParser +from dockerfile_parse import DockerfileParser from packagedcode import models from packagedcode import utils +from packagedcode.models import NonAssemblableDatafileHandler import fnmatch -class DockerfileHandler(models.DatafileHandler): +class DockerOCILabelsHandler(NonAssemblableDatafileHandler): datasource_id = 'dockerfile_oci_labels' - - @classmethod - def is_datafile(cls, path): - patterns = ['Dockerfile', 'containerfile', '*.dockerfile'] - filename=os.path.basename(path) - for pattern in patterns: - if fnmatch.fnmatch(filename, pattern): - return True - return False + path_patterns = ('Dockerfile', 'containerfile', '*.dockerfile') @classmethod def parse(cls, location, package_only=False): @@ -38,10 +31,10 @@ def parse(cls, location, package_only=False): package_data = { 'datasource_id': cls.datasource_id, 'type': cls.default_package_type, - 'name': labels.get('name', 'None'), - 'version': labels.get('version', 'None'), - 'license_expression': labels.get('license', 'None'), - 'labels': labels, + 'name': labels.get('name', 'None'), + 'version': labels.get('version', 'None'), + 'license_expression': labels.get('license'), + 'labels': labels, } yield models.PackageData.from_data(package_data, package_only) diff --git a/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 b/tests/packagedcode/__pycache__/test_parse_pyproject_toml.cpython-312-pytest-8.3.3.pyc.26520 deleted file mode 100644 index 1eb645b2da6cf3b0de46e3c067534f642a59b285..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2086 zcmdT_&2Jk;6rb4-+v~*na6?E@ft8w;_)w=UEr+V9KwznnP~ienElaDZXB|6P@2+Mh zO>M2!3VO%^Au0|n3Wyv*`4f=1^~9x8Z$qUbIB^R~rAR*UX6=cs0)hh^7<=cF+Ues}Ft1EF7(q6g^dIJgYrKB9=?09A1+jkZ}gFlt*>8xK60Y9{q$ ztJzfMsyS-hMOCMROXjaC(6xFRZa8H@n(!Qa59dI1+(XppVSzO$LDbwEa=3_khEV8q ztXq0Dl8Lc5+}ShXeE8|T7>&im&&5=O|8EPpu6k zwGz95shyaK)iCyk>}f_6G;V!LuSRljxV(w@3uEzTMhLx1Or1L$M;5hzBuB^)(+ti2 zYYeP^%-{$K46-!$rP0km6*FTQ&W-r$6?GDG2hn_D?VzuYBG8F00O(|rY2h?VR=lWp zjKy2fwvkM-ny;20L0|Xm4myd>A&$48x341%zt8X|?k)2=+QNV0F$bPf;aayUd@lHM zOzInC&8PK<`pc9EvK)8fmYP_!?$^Z`5w(M5E=WiTqhJ@!DD-01x=DoZ4R?>=Wd-jx z|GU^{{97^5@@aXUN1?>5g!{(IN(pz1rw zXf>VhzVSX`+^4R>xis>*8%Dxy6H#xvqUpPOX9_6i2k1*~cOqXIk^yBVe}!=FGm##C z0S<@Q7fJLds}=EtlawPV73ISFp>#%3!J zt&q!15CMYA1&=qQ&48v*Z!L&!kiZK`+n0{VNyAqWAX+10K;Ih>YV$QY^;XKQ6@?YX zA|{V%2;>%!kO=UUa>IJXL^NelXYDDraMNJ38m+)*(Ef<2_mG@W90!M-8z_T=UmsU4 z@*H5@;pmgMFV#MPI`P_vQ1vFM@j3}r_d%yc!A48`(U5jiZM;)!Y2lzw9C}c{o9m-|_ zCXd5ysN$CgO#w|+X8smy3FyxwlgtinY#R13^(A4GYPS*b^n$)cKnRSlz|_xOZbx)8 m@UO7fKvkbiuHKLPCdT+FLU+-}6F(yOI|tS@zG|@3;QSqlyYlP+ diff --git a/tests/packagedcode/data/docker/psql-expected.json b/tests/packagedcode/data/docker/psql-expected.json deleted file mode 100644 index d9f8bb4c5ce..00000000000 --- a/tests/packagedcode/data/docker/psql-expected.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "datasource_id": "dockerfile_oci_labels", - "type": "default", - "license_expression": "MIT", - "labels": { - "source": "https://github.com/kreneskyp/ix" - } - } -] diff --git a/tests/packagedcode/data/docker/psql.dockerfile b/tests/packagedcode/data/docker/psql.dockerfile deleted file mode 100644 index cbdd9e54eb5..00000000000 --- a/tests/packagedcode/data/docker/psql.dockerfile +++ /dev/null @@ -1,5 +0,0 @@ -FROM postgres:15.3 -LABEL org.opencontainers.image.source https://github.com/kreneskyp/ix - -RUN apt update -y && \ - apt install -y postgresql-15-pgvector \ diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile b/tests/packagedcode/data/docker/test-containerfile/test.containerfile new file mode 100644 index 00000000000..a1ed5fdab59 --- /dev/null +++ b/tests/packagedcode/data/docker/test-containerfile/test.containerfile @@ -0,0 +1,11 @@ +#Copied from https://github.com/kubernetes-sigs/blixt/blob + +FROM alpine + +WORKDIR /opt/blixt/ + +LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt +LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause + +COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 +COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause \ No newline at end of file diff --git a/tests/packagedcode/data/docker/containerfile-expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json similarity index 100% rename from tests/packagedcode/data/docker/containerfile-expected.json rename to tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json new file mode 100644 index 00000000000..1a33d46f6e7 --- /dev/null +++ b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json @@ -0,0 +1,9 @@ +[ + { + "datasource_id": "docker_oci_labels", + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json new file mode 100644 index 00000000000..d129f48022a --- /dev/null +++ b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json @@ -0,0 +1,7 @@ +{ + "datasource_id": "docker_oci_labels", + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } +} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile-expected.json b/tests/packagedcode/data/docker/test-dockerfile-expected.json deleted file mode 100644 index 2a843acee2d..00000000000 --- a/tests/packagedcode/data/docker/test-dockerfile-expected.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - { - "datasource_id": "dockerfile_oci_labels", - "type": "default", - "name": "Kanboard", - "version": "1.2.42", - "license_expression": "MIT", - "labels": { - "source": "https://github.com/kanboard/kanboard", - "title": "Kanboard", - "description": "Kanboard is project management software that focuses on the Kanban methodology", - "vendor": "Kanboard", - "licenses": "MIT", - "url": "https://kanboard.org", - "documentation": "https://docs.kanboard.org" - } - } -] diff --git a/tests/packagedcode/data/docker/test.dockerfile b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile similarity index 51% rename from tests/packagedcode/data/docker/test.dockerfile rename to tests/packagedcode/data/docker/test-dockerfile/test.dockerfile index bb5efa2459e..edbcbe058c1 100644 --- a/tests/packagedcode/data/docker/test.dockerfile +++ b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile @@ -1,36 +1,21 @@ #Copied from https://github.com/kanboard/kanboard FROM alpine:3.21 - -LABEL org.opencontainers.image.source https://github.com/kanboard/kanboard +LABEL org.opencontainers.image.source=https://github.com/kanboard/kanboard LABEL org.opencontainers.image.title=Kanboard LABEL org.opencontainers.image.description="Kanboard is project management software that focuses on the Kanban methodology" LABEL org.opencontainers.image.vendor=Kanboard LABEL org.opencontainers.image.licenses=MIT LABEL org.opencontainers.image.url=https://kanboard.org LABEL org.opencontainers.image.documentation=https://docs.kanboard.org - VOLUME /var/www/app/data VOLUME /var/www/app/plugins VOLUME /etc/nginx/ssl - EXPOSE 80 443 - ARG VERSION - -RUN apk --no-cache --update add \ - tzdata openssl unzip nginx bash ca-certificates s6 curl ssmtp mailx php83 php83-phar php83-curl \ - php83-fpm php83-json php83-zlib php83-xml php83-dom php83-ctype php83-opcache php83-zip php83-iconv \ - php83-pdo php83-pdo_mysql php83-pdo_sqlite php83-pdo_pgsql php83-mbstring php83-session php83-bcmath \ - php83-gd php83-openssl php83-sockets php83-posix php83-ldap php83-simplexml php83-xmlwriter && \ - rm -rf /var/www/localhost && \ - rm -f /etc/php83/php-fpm.d/www.conf && \ - ln -sf /usr/bin/php83 /usr/bin/php - +RUN apk --no-cache --update add ... ADD . /var/www/app ADD docker/ / - RUN rm -rf /var/www/app/docker && echo $VERSION > /var/www/app/app/version.txt - ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] CMD [] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json new file mode 100644 index 00000000000..26e97c47ab4 --- /dev/null +++ b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json @@ -0,0 +1,9 @@ +{ + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" +} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json new file mode 100644 index 00000000000..9926733d0a6 --- /dev/null +++ b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json @@ -0,0 +1,18 @@ +[ + { + "datasource_id": "docker_oci_labels", + "type": "default", + "name": "Kanboard", + "version": "1.2.42", + "license_expression": "MIT", + "labels": { + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" + } + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json new file mode 100644 index 00000000000..9926733d0a6 --- /dev/null +++ b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json @@ -0,0 +1,18 @@ +[ + { + "datasource_id": "docker_oci_labels", + "type": "default", + "name": "Kanboard", + "version": "1.2.42", + "license_expression": "MIT", + "labels": { + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" + } + } +] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile b/tests/packagedcode/data/docker/test.containerfile deleted file mode 100644 index 2145e4476d0..00000000000 --- a/tests/packagedcode/data/docker/test.containerfile +++ /dev/null @@ -1,80 +0,0 @@ -#Copied from https://github.com/kubernetes-sigs/blixt/blob - - -FROM rust:1.79-slim-bookworm as builder - -ARG TARGETARCH -ARG LLVM_VERSION=19 - -RUN apt-get update -RUN apt-get install --yes \ - build-essential \ - protobuf-compiler \ - pkg-config \ - musl-tools \ - clang \ - wget - -RUN apt install --yes lsb-release software-properties-common gnupg -RUN wget -O /tmp/llvm.sh https://apt.llvm.org/llvm.sh -RUN chmod +x /tmp/llvm.sh -RUN /bin/sh -c "/tmp/llvm.sh ${LLVM_VERSION} all" - -RUN rustup default stable -RUN rustup install nightly -RUN rustup component add rust-src --toolchain nightly -RUN --mount=type=cache,target=/root/.cargo/registry \ - cargo install bpf-linker - -WORKDIR /workspace -# Docker uses the amd64/arm64 convention while Rust uses the x86_64/aarch64 convention. -# Since Dockerfile doesn't support conditional variables (sigh), write the arch in Rust's -# convention to a file for later usage. -RUN if [ "$TARGETARCH" = "amd64" ]; \ - then echo "x86_64" >> arch; \ - else echo "aarch64" >> arch; \ - fi -RUN rustup target add $(eval cat arch)-unknown-linux-musl - -COPY dataplane dataplane -COPY tools/udp-test-server tools/udp-test-server -COPY xtask xtask -COPY Cargo.toml Cargo.toml -COPY Cargo.lock Cargo.lock -COPY .cargo .cargo - -# We need to tell bpf-linker where it can find LLVM's shared library file. -# Ref: https://github.com/aya-rs/rustc-llvm-proxy/blob/cbcb3c6/src/lib.rs#L48 -ENV LD_LIBRARY_PATH="/usr/lib/llvm-$LLVM_VERSION/lib" -ENV CC_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" -ENV AR_aarch64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" -ENV CC_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/clang" -ENV AR_x86_64_unknown_linux_musl="/usr/lib/llvm-$LLVM_VERSION/bin/llvm-ar" -ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_MUSL_RUSTFLAGS="-Clink-self-contained=yes -Clinker=rust-lld" - -RUN --mount=type=cache,target=/workspace/target/ \ - --mount=type=cache,target=/root/.cargo/registry \ - cargo xtask build-ebpf --release -RUN --mount=type=cache,target=/workspace/target/ \ - --mount=type=cache,target=/root/.cargo/registry \ - RUSTFLAGS=-Ctarget-feature=+crt-static cargo build \ - --workspace \ - --exclude ebpf \ - --release \ - --target=$(eval cat arch)-unknown-linux-musl -RUN --mount=type=cache,target=/workspace/target/ \ - cp /workspace/target/$(eval cat arch)-unknown-linux-musl/release/loader /workspace/dataplane-release - -FROM alpine - -LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt -LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause - -WORKDIR /opt/blixt/ - -COPY --from=builder /workspace/dataplane-release /opt/blixt/dataplane - -COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 -COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause - -ENTRYPOINT ["/opt/blixt/dataplane"] \ No newline at end of file diff --git a/tests/packagedcode/test_dockerfile.py b/tests/packagedcode/test_dockerfile.py deleted file mode 100644 index 34353f056e5..00000000000 --- a/tests/packagedcode/test_dockerfile.py +++ /dev/null @@ -1,60 +0,0 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/scancode-toolkit for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# - -from packagedcode import dockerfile -import pytest -import os.path -import json -from pathlib import Path -from packagedcode.dockerfile import DockerfileHandler - -class TestDockerfileHandler: - - def get_test_loc(self, path): - return Path(os.path.join(os.path.dirname(__file__), 'data')) - - def load_expected(self, expected_file): - with open(expected_file) as f: - return json.load(f) - - def test_is_datafile(self): - dockerfiles = [ - 'test.dockerfile', - 'test.containerfile', - 'psql.dockerfile' - ] - for dockerfile in dockerfiles: - test_file = self.get_test_loc(f'data/docker/{dockerfile}') - assert DockerfileHandler.is_datafile(str(test_file)) - - def test_parse_dockerfile(self): - test_files = [ - ('test.dockerfile', 'test-dockerfile-expected.json'), - ('test.containerfile', 'containerfile-expected.json'), - ('psql.dockerfile', 'psql-expected.json') - ] - for dockerfile, expected in test_files: - test_file = self.get_test_loc(f'data/docker/{dockerfile}') - expected_loc = self.get_test_loc(f'data/docker/{expected}') - packages = list(DockerfileHandler.parse(str(test_file))) - expected_packages = self.load_expected(expected_loc) - assert packages == expected_packages - - def test_extract_oci_labels_from_dockerfile(self, mocker): - dockerfiles = [ - 'test.dockerfile', - 'test.containerfile', - 'psql.dockerfile' - ] - for dockerfile in dockerfiles: - dockerfile_path = self.get_test_loc(f'data/docker/{dockerfile}') - labels = DockerfileHandler.extract_oci_labels_from_dockerfile(str(dockerfile_path)) - expected_loc = self.get_test_loc(f'data/docker/{dockerfile.replace(".dockerfile", "-expected.json").replace(".containerfile", "-expected.json")}') - expected_labels = self.load_expected(expected_loc)[0]['labels'] - assert labels == expected_labels diff --git a/tests/packagedcode/test_dockerfile_ocilabels.py b/tests/packagedcode/test_dockerfile_ocilabels.py new file mode 100644 index 00000000000..0bb608c7cba --- /dev/null +++ b/tests/packagedcode/test_dockerfile_ocilabels.py @@ -0,0 +1,66 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest +import json +from commoncode.testcase import FileDrivenTesting +from scancode.cli_test_utils import run_scan_click +from packagedcode.models import DockerOCILabelsHandler + +class TestDockerOCILabelsHandler(FileDrivenTesting): + + @pytest.mark.parametrize('test_file, expected', [ + ('docker/test.dockerfile', True), + ('docker/test.containerfile', True), + ]) + def test_is_datafile(self, test_file, expected): + test_file_path = self.get_test_loc(test_file) + assert DockerOCILabelsHandler.is_datafile(test_file_path) == expected + + def test_parse_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test.dockerfile-package.expected.json'), + ('test.containerfile', 'test.containerfile-package.expected.json'), + ] + for dockerfile, expected in test_files: + test_file = self.get_test_loc(f'docker/{dockerfile}') + expected_loc = self.get_test_loc(f'docker/{expected}') + packages = list(DockerOCILabelsHandler.parse(test_file)) + expected_packages = self.load_expected(expected_loc) + assert packages == expected_packages + + def test_extract_oci_labels_from_dockerfile(self): + test_files = [ + ('test.dockerfile', 'test.dockerfile-expected.json'), + ('test.containerfile', 'test.containerfile-expected.json'), + ] + for dockerfile, expected in test_files: + dockerfile_path = self.get_test_loc(f'docker/{dockerfile}') + labels = DockerOCILabelsHandler.extract_oci_labels_from_dockerfile(dockerfile_path) + expected_loc = self.get_test_loc(f'docker/{expected}') + expected_labels = self.load_expected(expected_loc) + assert labels == expected_labels + + def test_full_scan_docker_oci_labels_containerfile(self): + test_file = self.get_test_loc('docker/test.containerfile') + result_file = self.get_temp_file('json') + run_scan_click(['--package', test_file, '--json-pp', result_file]) + result = json.load(open(result_file)) + package_data = result.get('package_data', []) + assert len(package_data) == 1 + package = package_data[0] + assert package['datasource_id'] == 'docker_oci_labels' + assert package['labels'] == { + 'org.opencontainers.image.source': 'https://github.com/kubernetes-sigs/blixt', + 'org.opencontainers.image.licenses': 'GPL-2.0-only,BSD-2-Clause' + } + + def load_expected(self, expected_file): + with open(expected_file) as f: + return json.load(f) \ No newline at end of file From 0055fa7e67b8a3f6f8729a5da4ed976e4e334bf1 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Sun, 27 Apr 2025 11:22:05 +0530 Subject: [PATCH 7/8] Fix expectation files to match packageData Signed-off-by: Varsha U N --- Dockerfile | 53 +++---------------- src/packagedcode/__init__.py | 4 +- src/packagedcode/dockerfile_ocilabels.py | 14 +++-- .../test.containerfile-expected.json | 13 ----- .../test.containerfile-package.expected.json | 9 ---- .../test.containerfile-scan.expected.json | 7 --- .../test.dockerfile-scan.expected.json | 18 ------- .../test.containerfile | 0 .../docker/test.containerfile-expected.json | 4 ++ .../test.containerfile-package.expected.json | 17 ++++++ .../test.containerfile-scan.expected.json | 43 +++++++++++++++ .../{test-dockerfile => }/test.dockerfile | 0 .../test.dockerfile-expected.json | 0 .../test.dockerfile-package.expected.json | 23 ++++---- .../docker/test.dockerfile-scan.expected.json | 36 +++++++++++++ .../packagedcode/test_dockerfile_ocilabels.py | 13 +++-- 16 files changed, 138 insertions(+), 116 deletions(-) delete mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json delete mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json delete mode 100644 tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json delete mode 100644 tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json rename tests/packagedcode/data/docker/{test-containerfile => }/test.containerfile (100%) create mode 100644 tests/packagedcode/data/docker/test.containerfile-expected.json create mode 100644 tests/packagedcode/data/docker/test.containerfile-package.expected.json create mode 100644 tests/packagedcode/data/docker/test.containerfile-scan.expected.json rename tests/packagedcode/data/docker/{test-dockerfile => }/test.dockerfile (100%) rename tests/packagedcode/data/docker/{test-dockerfile => }/test.dockerfile-expected.json (100%) rename tests/packagedcode/data/docker/{test-dockerfile => }/test.dockerfile-package.expected.json (62%) create mode 100644 tests/packagedcode/data/docker/test.dockerfile-scan.expected.json diff --git a/Dockerfile b/Dockerfile index d4c641d7a2f..037a6f1b6d8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,50 +1,11 @@ -# -# Copyright (c) nexB Inc. and others. All rights reserved. -# ScanCode is a trademark of nexB Inc. -# SPDX-License-Identifier: Apache-2.0 -# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/scancode-toolkit for support or download. -# See https://aboutcode.org for more information about nexB OSS projects. -# +#Copied from https://github.com/kubernetes-sigs/blixt/blob -FROM --platform=linux/amd64 python:3.12-slim-bookworm +FROM alpine -# Python settings: Force unbuffered stdout and stderr (i.e. they are flushed to terminal immediately) -ENV PYTHONUNBUFFERED 1 -# Python settings: do not write pyc files -ENV PYTHONDONTWRITEBYTECODE 1 +WORKDIR /opt/blixt/ -# OS requirements as per -# https://scancode-toolkit.readthedocs.io/en/latest/getting-started/install.html -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - bzip2 \ - xz-utils \ - zlib1g \ - libxml2-dev \ - libxslt1-dev \ - libgomp1 \ - libsqlite3-0 \ - libgcrypt20 \ - libpopt0 \ - libzstd1 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* +LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt +LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause -# Create directory for scancode sources -WORKDIR /scancode-toolkit - -# Copy sources into docker container -COPY . /scancode-toolkit - -# Initial configuration using ./configure, scancode-reindex-licenses to build -# the base license index -RUN ./configure \ - && ./venv/bin/scancode-reindex-licenses - -# Add scancode to path -ENV PATH=/scancode-toolkit:$PATH - -# Set entrypoint to be the scancode command, allows to run the generated docker -# image directly with the scancode arguments: `docker run (...) ` -ENTRYPOINT ["./scancode"] +COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 +COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause \ No newline at end of file diff --git a/src/packagedcode/__init__.py b/src/packagedcode/__init__.py index 168b987caca..88683109166 100644 --- a/src/packagedcode/__init__.py +++ b/src/packagedcode/__init__.py @@ -18,7 +18,7 @@ from packagedcode import debian from packagedcode import debian_copyright from packagedcode import distro -from packagedcode import dockerfile +from packagedcode import dockerfile_ocilabels from packagedcode import conda from packagedcode import conan from packagedcode import cocoapods @@ -96,7 +96,7 @@ debian.DebianSourcePackageTarballHandler, distro.EtcOsReleaseHandler, - dockerfile.DockerfileHandler, + dockerfile_ocilabels.DockerOCILabelsHandler, freebsd.CompactManifestHandler, diff --git a/src/packagedcode/dockerfile_ocilabels.py b/src/packagedcode/dockerfile_ocilabels.py index 07cf221bd04..4f1351e315a 100644 --- a/src/packagedcode/dockerfile_ocilabels.py +++ b/src/packagedcode/dockerfile_ocilabels.py @@ -20,7 +20,8 @@ class DockerOCILabelsHandler(NonAssemblableDatafileHandler): datasource_id = 'dockerfile_oci_labels' - path_patterns = ('Dockerfile', 'containerfile', '*.dockerfile') + default_package_type = 'docker' + path_patterns = ('Dockerfile', 'containerfile', '*.dockerfile','*.containerfile') @classmethod def parse(cls, location, package_only=False): @@ -28,13 +29,16 @@ def parse(cls, location, package_only=False): Parse a Dockerfile and yield one or more PackageData objects with OCI labels and metadata. """ labels = cls.extract_oci_labels_from_dockerfile(location) + license_value = labels.get('org.opencontainers.image.licenses') + if license_value: + license_value = license_value.strip() package_data = { 'datasource_id': cls.datasource_id, 'type': cls.default_package_type, - 'name': labels.get('name', 'None'), - 'version': labels.get('version', 'None'), - 'license_expression': labels.get('license'), - 'labels': labels, + 'name': labels.get('org.opencontainers.image.title'), + 'version': None, + 'extracted_license_statement':license_value, + 'extra_data': {'labels': labels}, } yield models.PackageData.from_data(package_data, package_only) diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json deleted file mode 100644 index 92fe52b8421..00000000000 --- a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-expected.json +++ /dev/null @@ -1,13 +0,0 @@ -[ - { - "datasource_id": "dockerfile_oci_labels", - "type": "default", - "name": "Unknown", - "version": "Unknown", - "license_expression": "GPL-2.0-only AND BSD-2-Clause", - "labels": { - "source": "https://github.com/kubernetes-sigs/blixt", - "licenses": "GPL-2.0-only,BSD-2-Clause" - } - } -] diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json deleted file mode 100644 index 1a33d46f6e7..00000000000 --- a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-package.expected.json +++ /dev/null @@ -1,9 +0,0 @@ -[ - { - "datasource_id": "docker_oci_labels", - "labels": { - "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", - "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" - } - } -] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json b/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json deleted file mode 100644 index d129f48022a..00000000000 --- a/tests/packagedcode/data/docker/test-containerfile/test.containerfile-scan.expected.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "datasource_id": "docker_oci_labels", - "labels": { - "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", - "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" - } -} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json b/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json deleted file mode 100644 index 9926733d0a6..00000000000 --- a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-scan.expected.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - { - "datasource_id": "docker_oci_labels", - "type": "default", - "name": "Kanboard", - "version": "1.2.42", - "license_expression": "MIT", - "labels": { - "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", - "org.opencontainers.image.title": "Kanboard", - "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", - "org.opencontainers.image.vendor": "Kanboard", - "org.opencontainers.image.licenses": "MIT", - "org.opencontainers.image.url": "https://kanboard.org", - "org.opencontainers.image.documentation": "https://docs.kanboard.org" - } - } -] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-containerfile/test.containerfile b/tests/packagedcode/data/docker/test.containerfile similarity index 100% rename from tests/packagedcode/data/docker/test-containerfile/test.containerfile rename to tests/packagedcode/data/docker/test.containerfile diff --git a/tests/packagedcode/data/docker/test.containerfile-expected.json b/tests/packagedcode/data/docker/test.containerfile-expected.json new file mode 100644 index 00000000000..e08951101e2 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-expected.json @@ -0,0 +1,4 @@ +{ + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" +} \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile-package.expected.json b/tests/packagedcode/data/docker/test.containerfile-package.expected.json new file mode 100644 index 00000000000..75be712fa22 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-package.expected.json @@ -0,0 +1,17 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "GPL-2.0-only,BSD-2-Clause", + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.containerfile-scan.expected.json b/tests/packagedcode/data/docker/test.containerfile-scan.expected.json new file mode 100644 index 00000000000..7435523fcb6 --- /dev/null +++ b/tests/packagedcode/data/docker/test.containerfile-scan.expected.json @@ -0,0 +1,43 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": null, + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "GPL-2.0-only,BSD-2-Clause", + "declared_license_expression": "gpl-2.0 AND bsd-2-clause", + "declared_license_expression_spdx": "GPL-2.0 AND BSD-2-Clause", + "license_detections": [ + { + "license_expression": "gpl-2.0", + "matches": [{ + "license_expression": "gpl-2.0", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.containerfile", + "score": 100.0, + "matched_text": "GPL-2.0-only" + }] + }, + { + "license_expression": "bsd-2-clause", + "matches": [{ + "license_expression": "bsd-2-clause", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.containerfile", + "score": 100.0, + "matched_text": "BSD-2-Clause" + }] + } + ], + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kubernetes-sigs/blixt", + "org.opencontainers.image.licenses": "GPL-2.0-only,BSD-2-Clause" + } + } +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile b/tests/packagedcode/data/docker/test.dockerfile similarity index 100% rename from tests/packagedcode/data/docker/test-dockerfile/test.dockerfile rename to tests/packagedcode/data/docker/test.dockerfile diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json b/tests/packagedcode/data/docker/test.dockerfile-expected.json similarity index 100% rename from tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-expected.json rename to tests/packagedcode/data/docker/test.dockerfile-expected.json diff --git a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json b/tests/packagedcode/data/docker/test.dockerfile-package.expected.json similarity index 62% rename from tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json rename to tests/packagedcode/data/docker/test.dockerfile-package.expected.json index 9926733d0a6..bbcc9287516 100644 --- a/tests/packagedcode/data/docker/test-dockerfile/test.dockerfile-package.expected.json +++ b/tests/packagedcode/data/docker/test.dockerfile-package.expected.json @@ -1,10 +1,14 @@ -[ - { - "datasource_id": "docker_oci_labels", - "type": "default", - "name": "Kanboard", - "version": "1.2.42", - "license_expression": "MIT", +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": "Kanboard", + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "MIT", + "primary_language": null, + "extra_data": { "labels": { "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", "org.opencontainers.image.title": "Kanboard", @@ -14,5 +18,6 @@ "org.opencontainers.image.url": "https://kanboard.org", "org.opencontainers.image.documentation": "https://docs.kanboard.org" } - } -] \ No newline at end of file + }, + "purl": "pkg:docker/Kanboard" +}] \ No newline at end of file diff --git a/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json b/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json new file mode 100644 index 00000000000..ac926f173ad --- /dev/null +++ b/tests/packagedcode/data/docker/test.dockerfile-scan.expected.json @@ -0,0 +1,36 @@ +[{ + "datasource_id": "dockerfile_oci_labels", + "type": "docker", + "namespace": null, + "name": "Kanboard", + "version": null, + "qualifiers": null, + "subpath": null, + "extracted_license_statement": "MIT", + "declared_license_expression": "mit", + "declared_license_expression_spdx": "MIT", + "license_detections": [{ + "license_expression": "mit", + "matches": [{ + "license_expression": "mit", + "start_line": 1, + "end_line": 1, + "from_file": "tests/packagedcode/data/docker/test.dockerfile", + "score": 100.0, + "matched_text": "MIT" + }] + }], + "primary_language": null, + "extra_data": { + "labels": { + "org.opencontainers.image.source": "https://github.com/kanboard/kanboard", + "org.opencontainers.image.title": "Kanboard", + "org.opencontainers.image.description": "Kanboard is project management software that focuses on the Kanban methodology", + "org.opencontainers.image.vendor": "Kanboard", + "org.opencontainers.image.licenses": "MIT", + "org.opencontainers.image.url": "https://kanboard.org", + "org.opencontainers.image.documentation": "https://docs.kanboard.org" + } + }, + "purl": "pkg:docker/Kanboard" +}] \ No newline at end of file diff --git a/tests/packagedcode/test_dockerfile_ocilabels.py b/tests/packagedcode/test_dockerfile_ocilabels.py index 0bb608c7cba..d71e5d3ee45 100644 --- a/tests/packagedcode/test_dockerfile_ocilabels.py +++ b/tests/packagedcode/test_dockerfile_ocilabels.py @@ -9,11 +9,13 @@ import pytest import json +import os from commoncode.testcase import FileDrivenTesting from scancode.cli_test_utils import run_scan_click -from packagedcode.models import DockerOCILabelsHandler +from packagedcode.dockerfile_ocilabels import DockerOCILabelsHandler class TestDockerOCILabelsHandler(FileDrivenTesting): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') @pytest.mark.parametrize('test_file, expected', [ ('docker/test.dockerfile', True), @@ -52,14 +54,11 @@ def test_full_scan_docker_oci_labels_containerfile(self): result_file = self.get_temp_file('json') run_scan_click(['--package', test_file, '--json-pp', result_file]) result = json.load(open(result_file)) + expected_loc = self.get_test_loc('docker/test.containerfile-scan.expected.json') + expected_package_data = json.load(open(expected_loc)) package_data = result.get('package_data', []) assert len(package_data) == 1 - package = package_data[0] - assert package['datasource_id'] == 'docker_oci_labels' - assert package['labels'] == { - 'org.opencontainers.image.source': 'https://github.com/kubernetes-sigs/blixt', - 'org.opencontainers.image.licenses': 'GPL-2.0-only,BSD-2-Clause' - } + assert package_data == expected_package_data def load_expected(self, expected_file): with open(expected_file) as f: From ae8a099792d94386334af3b26798dd215f866090 Mon Sep 17 00:00:00 2001 From: Varsha U N Date: Wed, 30 Apr 2025 19:35:03 +0530 Subject: [PATCH 8/8] Restore modified Dockerfile Signed-off-by: Varsha U N --- Dockerfile | 53 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 46 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 037a6f1b6d8..d4c641d7a2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,50 @@ -#Copied from https://github.com/kubernetes-sigs/blixt/blob +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# -FROM alpine +FROM --platform=linux/amd64 python:3.12-slim-bookworm -WORKDIR /opt/blixt/ +# Python settings: Force unbuffered stdout and stderr (i.e. they are flushed to terminal immediately) +ENV PYTHONUNBUFFERED 1 +# Python settings: do not write pyc files +ENV PYTHONDONTWRITEBYTECODE 1 -LABEL org.opencontainers.image.source=https://github.com/kubernetes-sigs/blixt -LABEL org.opencontainers.image.licenses=GPL-2.0-only,BSD-2-Clause +# OS requirements as per +# https://scancode-toolkit.readthedocs.io/en/latest/getting-started/install.html +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + bzip2 \ + xz-utils \ + zlib1g \ + libxml2-dev \ + libxslt1-dev \ + libgomp1 \ + libsqlite3-0 \ + libgcrypt20 \ + libpopt0 \ + libzstd1 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* -COPY dataplane/LICENSE.GPL-2.0 /opt/blixt/LICENSE.GPL-2.0 -COPY dataplane/LICENSE.BSD-2-Clause /opt/blixt/LICENSE.BSD-2-Clause \ No newline at end of file +# Create directory for scancode sources +WORKDIR /scancode-toolkit + +# Copy sources into docker container +COPY . /scancode-toolkit + +# Initial configuration using ./configure, scancode-reindex-licenses to build +# the base license index +RUN ./configure \ + && ./venv/bin/scancode-reindex-licenses + +# Add scancode to path +ENV PATH=/scancode-toolkit:$PATH + +# Set entrypoint to be the scancode command, allows to run the generated docker +# image directly with the scancode arguments: `docker run (...) ` +ENTRYPOINT ["./scancode"]