Add JSON output for pip-audit for security pipeline

ArBridgeman · ArBridgeman · commit ebdbe4afc18c · 2025-03-25T09:04:05.000+01:00
diff --git a/doc/changes/unreleased.md b/doc/changes/unreleased.md
@@ -5,3 +5,4 @@
 * [#73](https://github.com/exasol/python-toolbox/issues/73): Added nox target for auditing work spaces in regard to known vulnerabilities
 * [#65](https://github.com/exasol/python-toolbox/issues/65): Added a Nox task for checking if the changelog got updated.
 * [#369](https://github.com/exasol/python-toolbox/issues/369): Removed option `-v` for `isort`
+* [#372](https://github.com/exasol/python-toolbox/issues/372): Added conversion from audited dependencies json to expected GitHub Issue format 
diff --git a/exasol/toolbox/nox/_dependencies.py b/exasol/toolbox/nox/_dependencies.py
@@ -1,8 +1,14 @@
 from __future__ import annotations
 
+import argparse
+import json
 import subprocess
 import tempfile
 from dataclasses import dataclass
+from enum import (
+    Enum,
+    auto,
+)
 from inspect import cleandoc
 from json import loads
 from pathlib import Path
@@ -211,8 +217,87 @@ def _normalize_package_name(name: str) -> str:
     return template.format(heading=heading(), rows=rows)
 
 
-def _audit(session: Session) -> None:
-    session.run("poetry", "run", "pip-audit")
+class PipAuditFormat(Enum):
+    columns = auto()
+    json = auto()
+
+    @classmethod
+    def _missing_(cls, value):
+        if isinstance(value, str):
+            for member in cls:
+                if member.name == value.lower():
+                    return member
+        return None
+
+    @classmethod
+    def name_tuple(cls) -> tuple:
+        return tuple(fmt.name for fmt in PipAuditFormat)
+
+
+class Audit:
+    @staticmethod
+    def _filter_json_for_vulnerabilities(audit_json_bytes: bytes) -> dict:
+        """filters json for only packages with vulnerabilities"""
+        audit_dict = json.loads(audit_json_bytes.decode("utf-8"))
+        return {
+            "dependencies": [
+                {
+                    "name": entry["name"],
+                    "version": entry["version"],
+                    "vulns": entry["vulns"],
+                }
+                for entry in audit_dict["dependencies"]
+                if entry["vulns"]
+            ]
+        }
+
+    @staticmethod
+    def _parse_format(session) -> argparse.Namespace:
+        parser = argparse.ArgumentParser(
+            description="Audits dependencies for security vulnerabilities",
+            usage="nox -s dependency:audit -- -- [options]",
+        )
+        parser.add_argument(
+            "-f",
+            "--format",
+            type=str,
+            default=PipAuditFormat.columns.name,
+            help="Format to emit audit results in",
+            choices=PipAuditFormat.name_tuple(),
+        )
+        parser.add_argument(
+            "-o",
+            "--output",
+            type=Path,
+            default=None,
+            help="Output results to the given file",
+        )
+        return parser.parse_args(args=session.posargs)
+
+    def run(self, session: Session) -> None:
+        args = self._parse_format(session)
+        audit_format = PipAuditFormat[args.format]
+
+        command = ["poetry", "run", "pip-audit", "-f", audit_format.name]
+        if audit_format == PipAuditFormat.columns:
+            if args.output:
+                command.extend(["-o", args.output])
+            session.run(*command)
+
+        elif audit_format == PipAuditFormat.json:
+            output = subprocess.run(command, capture_output=True)
+            audit_json = self._filter_json_for_vulnerabilities(output.stdout)
+
+            if args.output:
+                with open(args.output, "w") as file:
+                    json.dump(audit_json, file)
+            else:
+                print(audit_json)
+
+            if output.returncode != 0:
+                session.warn(
+                    f"Command {' '.join(command)} failed with exit code {output.returncode}",
+                )
 
 
 @nox.session(name="dependency:licenses", python=False)
@@ -227,4 +312,4 @@ def dependency_licenses(session: Session) -> None:
 @nox.session(name="dependency:audit", python=False)
 def audit(session: Session) -> None:
     """Check for known vulnerabilities"""
-    _audit(session)
+    Audit().run(session=session)
diff --git a/exasol/toolbox/tools/security.py b/exasol/toolbox/tools/security.py
@@ -16,7 +16,6 @@
 from functools import partial
 from inspect import cleandoc
 from pathlib import Path
-from typing import Tuple
 
 import typer
 
@@ -102,6 +101,63 @@ def from_maven(report: str) -> Iterable[Issue]:
             )
 
 
+class VulnerabilitySource(str, Enum):
+    CVE = "CVE"
+    CWE = "CWE"
+    GHSA = "GHSA"
+    PYSEC = "PYSEC"
+
+    def get_link(self, package: str, vuln_id: str) -> str:
+        if self == VulnerabilitySource.CWE:
+            cwe_id = vuln_id.upper().replace(f"{VulnerabilitySource.CWE.value}-", "")
+            return f"https://cwe.mitre.org/data/definitions/{cwe_id}.html"
+
+        map_link = {
+            VulnerabilitySource.CVE: "https://nvd.nist.gov/vuln/detail/{vuln_id}",
+            VulnerabilitySource.GHSA: "https://github.com/advisories/{vuln_id}",
+            VulnerabilitySource.PYSEC: "https://github.com/pypa/advisory-database/blob/main/vulns/{package}/{vuln_id}.yaml",
+        }
+        return map_link[self].format(package=package, vuln_id=vuln_id)
+
+
+def identify_pypi_references(
+    references: list[str], package_name: str
+) -> tuple[list[str], list[str], list[str]]:
+    ref_cves, ref_cwes, ref_links = [], [], []
+    for reference in references:
+        for source in VulnerabilitySource:
+            if reference.upper().startswith(source.value):
+                if source == VulnerabilitySource.CVE:
+                    ref_cves.append(reference)
+                elif source == VulnerabilitySource.CWE:
+                    ref_cwes.append(reference)
+                ref_links.append(
+                    source.get_link(package=package_name, vuln_id=reference)
+                )
+                continue
+    return ref_cves, ref_cwes, ref_links
+
+
+def from_python(report: str) -> Iterable[Issue]:
+    # Note: Consider adding warnings if there is the same cve with multiple coordinates
+    report_dict = json.loads(report)
+    dependencies = report_dict.get("dependencies", [])
+    for dependency in dependencies:
+        package = dependency["name"]
+        for v in dependency["vulns"]:
+            refs = [v["id"]] + v["aliases"]
+            cves, cwes, links = identify_pypi_references(
+                references=refs, package_name=package
+            )
+            yield Issue(
+                cve="None" if not cves else cves[0],
+                cwe="None" if not cwes else cwes[0],
+                description=v["description"],
+                coordinates=f"{package}:{dependency['version']}",
+                references=tuple(links),
+            )
+
+
 @dataclass(frozen=True)
 class SecurityIssue:
     file_name: str
@@ -220,6 +276,7 @@ def create_security_issue(issue: Issue, project="") -> tuple[str, str]:
 
 class Format(str, Enum):
     Maven = "maven"
+    Python = "python"
 
 
 # pylint: disable=redefined-builtin
@@ -243,7 +300,13 @@ def _maven(infile):
             stdout(issue)
         raise typer.Exit(code=0)
 
-    actions = {Format.Maven: _maven}
+    def _python(infile):
+        issues = from_python(infile.read())
+        for issue in _issues_as_json_str(issues):
+            stdout(issue)
+        raise typer.Exit(code=0)
+
+    actions = {Format.Maven: _maven, Format.Python: _python}
     action = actions[format]
     action(input_file)
 
diff --git a/test/unit/conftest.py b/test/unit/conftest.py
@@ -0,0 +1,90 @@
+from inspect import cleandoc
+
+import pytest
+
+from exasol.toolbox.tools import security
+
+
+@pytest.fixture(scope="session")
+def pip_audit_jinja2_issue():
+    return security.Issue(
+        cve="CVE-2025-27516",
+        cwe="None",
+        description=cleandoc(
+            """An oversight in how the Jinja sandboxed environment interacts with the 
+            `|attr` filter allows an attacker that controls the content of a template 
+            to execute arbitrary Python code.  To exploit the vulnerability, an 
+            attacker needs to control the content of a template. Whether that is the 
+            case depends on the type of application using Jinja. This vulnerability 
+            impacts users of applications which execute untrusted templates. Jinja's 
+            sandbox does catch calls to `str.format` and ensures they don't escape the 
+            sandbox. However, it's possible to use the `|attr` filter to get a 
+            reference to a string's plain format method, bypassing the sandbox. After 
+            the fix, the `|attr` filter no longer bypasses the environment's attribute 
+            lookup."""
+        ),
+        coordinates="jinja2:3.1.5",
+        references=(
+            "https://github.com/advisories/GHSA-cpwx-vrp4-4pq7",
+            "https://nvd.nist.gov/vuln/detail/CVE-2025-27516",
+        ),
+    )
+
+
+@pytest.fixture(scope="session")
+def pip_audit_cryptography_issue():
+    return security.Issue(
+        cve="CVE-2024-12797",
+        cwe="None",
+        description=cleandoc(
+            """pyca / cryptography's wheels include a statically linked copy of 
+            OpenSSL. The versions of OpenSSL included in  cryptography 42.0.0 - 44.0.0 
+            are vulnerable to a security issue. More details about the vulnerability 
+            itself can be found in https://openssl-library.org/news/secadv/20250211.txt. 
+            If you are building cryptography source(\"sdist\") then you are responsible 
+            for upgrading your copy of OpenSSL. Only users installing from wheels built 
+            by the cryptography project(i.e., those distributed on PyPI) need to update 
+            their cryptography versions."""
+        ),
+        coordinates="cryptography:43.0.3",
+        references=(
+            "https://github.com/advisories/GHSA-79v4-65xg-pq4g",
+            "https://nvd.nist.gov/vuln/detail/CVE-2024-12797",
+        ),
+    )
+
+
+@pytest.fixture(scope="session")
+def pip_audit_report(pip_audit_jinja2_issue, pip_audit_cryptography_issue):
+    jinja2_name, jinja2_version = pip_audit_jinja2_issue.coordinates.split(":")
+    cryptography_name, cryptography_version = (
+        pip_audit_cryptography_issue.coordinates.split(":")
+    )
+    return {
+        "dependencies": [
+            {
+                "name": jinja2_name,
+                "version": jinja2_version,
+                "vulns": [
+                    {
+                        "id": "GHSA-cpwx-vrp4-4pq7",
+                        "fix_versions": ["3.1.6"],
+                        "aliases": [pip_audit_jinja2_issue.cve],
+                        "description": pip_audit_jinja2_issue.description,
+                    }
+                ],
+            },
+            {
+                "name": cryptography_name,
+                "version": cryptography_version,
+                "vulns": [
+                    {
+                        "id": "GHSA-79v4-65xg-pq4g",
+                        "fix_versions": ["44.0.1"],
+                        "aliases": [pip_audit_cryptography_issue.cve],
+                        "description": pip_audit_cryptography_issue.description,
+                    }
+                ],
+            },
+        ]
+    }
diff --git a/test/unit/dependencies_test.py b/test/unit/dependencies_test.py
@@ -1,6 +1,9 @@
+import json
+
 import pytest
 
 from exasol.toolbox.nox._dependencies import (
+    Audit,
     Package,
     _dependencies,
     _normalize,
@@ -157,3 +160,24 @@ def test_packages_from_json(json, expected):
 def test_packages_to_markdown(dependencies, packages, expected):
     actual = _packages_to_markdown(dependencies, packages)
     assert actual == expected
+
+
+class TestFilterJsonForVulnerabilities:
+
+    @staticmethod
+    def test_no_vulnerability_returns_empty_list():
+        audit_dict = {
+            "dependencies": [{"name": "alabaster", "version": "0.7.16", "vulns": []}]
+        }
+        audit_json = json.dumps(audit_dict).encode("utf-8")
+        expected = {"dependencies": []}
+
+        actual = Audit._filter_json_for_vulnerabilities(audit_json)
+        assert actual == expected
+
+    @staticmethod
+    def test_vulnerabilities_returned_in_list(pip_audit_report):
+        audit_json = json.dumps(pip_audit_report).encode("utf-8")
+
+        actual = Audit._filter_json_for_vulnerabilities(audit_json)
+        assert actual == pip_audit_report
diff --git a/test/unit/security_test.py b/test/unit/security_test.py
@@ -462,3 +462,60 @@ def test_from_json(json_file, expected):
         references=expected["references"],
     )
     assert list(actual) == [expected_issue]
+
+
+@pytest.mark.parametrize(
+    "reference, expected",
+    [
+        pytest.param(
+            "CVE-2025-27516",
+            (
+                ["CVE-2025-27516"],
+                [],
+                ["https://nvd.nist.gov/vuln/detail/CVE-2025-27516"],
+            ),
+            id="CVE_identified_with_link",
+        ),
+        pytest.param(
+            "CWE-611",
+            ([], ["CWE-611"], ["https://cwe.mitre.org/data/definitions/611.html"]),
+            id="CWE_identified_with_link",
+        ),
+        pytest.param(
+            "GHSA-cpwx-vrp4-4pq7",
+            ([], [], ["https://github.com/advisories/GHSA-cpwx-vrp4-4pq7"]),
+            id="GHSA_link",
+        ),
+        pytest.param(
+            "PYSEC-2025-9",
+            (
+                [],
+                [],
+                [
+                    "https://github.com/pypa/advisory-database/blob/main/vulns/dummy/PYSEC-2025-9.yaml"
+                ],
+            ),
+            id="PYSEC_link",
+        ),
+    ],
+)
+def test_identify_pypi_references(reference: str, expected):
+    actual = security.identify_pypi_references([reference], package_name="dummy")
+    assert actual == expected
+
+
+class TestFromPython:
+    @staticmethod
+    def test_no_vulnerability_returns_empty_list():
+        actual = set(security.from_python("{}"))
+        assert actual == set()
+
+    @staticmethod
+    def test_convert_vulnerability_to_issue(
+        pip_audit_report, pip_audit_jinja2_issue, pip_audit_cryptography_issue
+    ):
+        audit_json = json.dumps(pip_audit_report)
+        expected = {pip_audit_jinja2_issue, pip_audit_cryptography_issue}
+
+        actual = set(security.from_python(audit_json))
+        assert actual == expected