diff --git a/classes/create-spdx-2.2.bbclass b/classes/create-spdx-2.2.bbclass new file mode 100644 index 0000000..d9b5a18 --- /dev/null +++ b/classes/create-spdx-2.2.bbclass @@ -0,0 +1,973 @@ +# +# Copyright OpenEmbedded Contributors +# +# SPDX-License-Identifier: GPL-2.0-only +# + +inherit spdx-common + +SPDX_VERSION = "2.2" + +SPDX_ORG ??= "OpenEmbedded ()" +SPDX_SUPPLIER ??= "Organization: ${SPDX_ORG}" +SPDX_SUPPLIER[doc] = "The SPDX PackageSupplier field for SPDX packages created from \ + this recipe. For SPDX documents create using this class during the build, this \ + is the contact information for the person or organization who is doing the \ + build." + +SPDX_ARCHIVE_SOURCES ??= "0" +SPDX_ARCHIVE_PACKAGED ??= "0" + +def get_namespace(d, name): + import uuid + namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE")) + return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX"), name, str(uuid.uuid5(namespace_uuid, name))) + +SPDX_PACKAGE_VERSION ??= "${PV}" +SPDX_PACKAGE_VERSION[doc] = "The version of a package, versionInfo in recipe, package and image" + +def create_annotation(d, comment): + import oe_sbom.spdx + from datetime import datetime, timezone + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + annotation = oe_sbom.spdx.SPDXAnnotation() + annotation.annotationDate = creation_time + annotation.annotationType = "OTHER" + annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) + annotation.comment = comment + return annotation + +def recipe_spdx_is_native(d, recipe): + return any(a.annotationType == "OTHER" and + a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME"), d.getVar("SPDX_TOOL_VERSION")) and + a.comment == "isNative" for a in recipe.annotations) + +def get_json_indent(d): + if d.getVar("SPDX_PRETTY") == "1": + return 2 + return None + + +def convert_license_to_spdx(lic, license_data, document, d, existing={}): + from pathlib import Path + import oe_sbom.spdx + + extracted = {} + + def add_extracted_license(ident, name): + nonlocal document + + if name in extracted: + return + + extracted_info = oe_sbom.spdx.SPDXExtractedLicensingInfo() + extracted_info.name = name + extracted_info.licenseId = ident + extracted_info.extractedText = None + + if name == "PD": + # Special-case this. + extracted_info.extractedText = "Software released to the public domain" + else: + # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH + for directory in [d.getVar('COMMON_LICENSE_DIR')] + (d.getVar('LICENSE_PATH') or '').split(): + try: + with (Path(directory) / name).open(errors="replace") as f: + extracted_info.extractedText = f.read() + break + except FileNotFoundError: + pass + if extracted_info.extractedText is None: + # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set + entry = d.getVarFlag('NO_GENERIC_LICENSE', name).split(';') + filename = entry[0] + params = {i.split('=')[0]: i.split('=')[1] for i in entry[1:] if '=' in i} + beginline = int(params.get('beginline', 1)) + endline = params.get('endline', None) + if endline: + endline = int(endline) + if filename: + filename = d.expand("${S}/" + filename) + with open(filename, errors="replace") as f: + extracted_info.extractedText = "".join(line for idx, line in enumerate(f, 1) if beginline <= idx and idx <= (endline or idx)) + else: + bb.fatal("Cannot find any text for license %s" % name) + + extracted[name] = extracted_info + document.hasExtractedLicensingInfos.append(extracted_info) + + def convert(l): + from oe_sbom.spdx_license_map import spdx_license_map + + if l == "(" or l == ")": + return l + + if l == "&": + return "AND" + + if l == "|": + return "OR" + + if l == "CLOSED": + return "NONE" + + spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l + # Some licenses are deprecated and need to be converted to formal SPDX licenses + if spdx_license in spdx_license_map.keys(): + return spdx_license_map[spdx_license] + + if spdx_license in license_data["licenses"]: + return spdx_license + + try: + spdx_license = existing[l] + except KeyError: + spdx_license = "LicenseRef-" + l + add_extracted_license(spdx_license, l) + + return spdx_license + + lic_split = lic.replace("(", " ( ").replace(")", " ) ").replace("|", " | ").replace("&", " & ").split() + + return ' '.join(convert(l) for l in lic_split) + +def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]): + from pathlib import Path + import oe_sbom.spdx + import oe_sbom.spdx_common + import hashlib + + source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") + if source_date_epoch: + source_date_epoch = int(source_date_epoch) + + sha1s = [] + spdx_files = [] + + file_counter = 1 + + check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1" + if check_compiled_sources: + compiled_sources, types = oe_sbom.spdx_common.get_compiled_sources(d) + bb.debug(1, "Total compiled files: %d" % len(compiled_sources)) + for subdir, dirs, files in os.walk(str(topdir)): + dirs[:] = [d for d in dirs if d not in ignore_dirs] + if subdir == str(topdir): + dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] + + for file in files: + filepath = Path(subdir) / file + filename = str(filepath.relative_to(topdir)) + + if not filepath.is_symlink() and filepath.is_file(): + # Check if file is compiled + if check_compiled_sources: + if not oe_sbom.spdx_common.is_compiled_source(filename, compiled_sources, types): + continue + spdx_file = oe_sbom.spdx.SPDXFile() + spdx_file.SPDXID = get_spdxid(file_counter) + for t in get_types(filepath): + spdx_file.fileTypes.append(t) + spdx_file.fileName = filename + + if archive is not None: + with filepath.open("rb") as f: + info = archive.gettarinfo(fileobj=f) + info.name = filename + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + if source_date_epoch is not None and info.mtime > source_date_epoch: + info.mtime = source_date_epoch + + archive.addfile(info, f) + + sha1 = bb.utils.sha1_file(filepath) + sha1s.append(sha1) + spdx_file.checksums.append(oe_sbom.spdx.SPDXChecksum( + algorithm="SHA1", + checksumValue=sha1, + )) + spdx_file.checksums.append(oe_sbom.spdx.SPDXChecksum( + algorithm="SHA256", + checksumValue=bb.utils.sha256_file(filepath), + )) + + if "SOURCE" in spdx_file.fileTypes: + extracted_lics = oe_sbom.spdx_common.extract_licenses(filepath) + if extracted_lics: + spdx_file.licenseInfoInFiles = extracted_lics + + doc.files.append(spdx_file) + doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file) + spdx_pkg.hasFiles.append(spdx_file.SPDXID) + + spdx_files.append(spdx_file) + + file_counter += 1 + + sha1s.sort() + verifier = hashlib.sha1() + for v in sha1s: + verifier.update(v.encode("utf-8")) + spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest() + + return spdx_files + + +def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources): + from pathlib import Path + import hashlib + import oe.packagedata + import oe_sbom.spdx + + debug_search_paths = [ + Path(d.getVar('PKGD')), + Path(d.getVar('STAGING_DIR_TARGET')), + Path(d.getVar('STAGING_DIR_NATIVE')), + Path(d.getVar('STAGING_KERNEL_DIR')), + ] + + pkg_data = oe.packagedata.read_subpkgdata_extended(package, d) + + if pkg_data is None: + return + + for file_path, file_data in pkg_data["files_info"].items(): + if not "debugsrc" in file_data: + continue + + for pkg_file in package_files: + if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"): + break + else: + bb.fatal("No package file found for %s in %s; SPDX found: %s" % (str(file_path), package, + " ".join(p.fileName for p in package_files))) + continue + + for debugsrc in file_data["debugsrc"]: + ref_id = "NOASSERTION" + for search in debug_search_paths: + if debugsrc.startswith("/usr/src/kernel"): + debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '') + else: + debugsrc_path = search / debugsrc.lstrip("/") + # We can only hash files below, skip directories, links, etc. + if not os.path.isfile(debugsrc_path): + continue + + file_sha256 = bb.utils.sha256_file(debugsrc_path) + + if file_sha256 in sources: + source_file = sources[file_sha256] + + doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace) + if doc_ref is None: + doc_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name + doc_ref.spdxDocument = source_file.doc.documentNamespace + doc_ref.checksum.algorithm = "SHA1" + doc_ref.checksum.checksumValue = source_file.doc_sha1 + package_doc.externalDocumentRefs.append(doc_ref) + + ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID) + else: + bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256)) + break + else: + bb.debug(1, "Debug source %s not found" % debugsrc) + + package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc) + +add_package_sources_from_debug[vardepsexclude] += "STAGING_KERNEL_DIR" + +def collect_dep_recipes(d, doc, spdx_recipe): + import json + from pathlib import Path + import oe_sbom.sbom + import oe_sbom.spdx + import oe_sbom.spdx_common + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + package_archs = d.getVar("SPDX_MULTILIB_SSTATE_ARCHS").split() + package_archs.reverse() + + dep_recipes = [] + + deps = oe_sbom.spdx_common.get_spdx_deps(d) + + for dep in deps: + # If this dependency is not calculated in the taskhash skip it. + # Otherwise, it can result in broken links since this task won't + # rebuild and see the new SPDX ID if the dependency changes + if not dep.in_taskhash: + continue + + dep_recipe_path = oe_sbom.sbom.doc_find_by_hashfn(deploy_dir_spdx, package_archs, "recipe-" + dep.pn, dep.hashfn) + if not dep_recipe_path: + bb.fatal("Cannot find any SPDX file for recipe %s, %s" % (dep.pn, dep.hashfn)) + + spdx_dep_doc, spdx_dep_sha1 = oe_sbom.sbom.read_doc(dep_recipe_path) + + for pkg in spdx_dep_doc.packages: + if pkg.name == dep.pn: + spdx_dep_recipe = pkg + break + else: + continue + + dep_recipes.append(oe_sbom.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe)) + + dep_recipe_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name + dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace + dep_recipe_ref.checksum.algorithm = "SHA1" + dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1 + + doc.externalDocumentRefs.append(dep_recipe_ref) + + doc.add_relationship( + "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID), + "BUILD_DEPENDENCY_OF", + spdx_recipe + ) + + return dep_recipes + +collect_dep_recipes[vardepsexclude] = "SPDX_MULTILIB_SSTATE_ARCHS" + +def collect_dep_sources(d, dep_recipes): + import oe_sbom.sbom + + sources = {} + for dep in dep_recipes: + # Don't collect sources from native recipes as they + # match non-native sources also. + if recipe_spdx_is_native(d, dep.recipe): + continue + recipe_files = set(dep.recipe.hasFiles) + + for spdx_file in dep.doc.files: + if spdx_file.SPDXID not in recipe_files: + continue + + if "SOURCE" in spdx_file.fileTypes: + for checksum in spdx_file.checksums: + if checksum.algorithm == "SHA256": + sources[checksum.checksumValue] = oe_sbom.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file) + break + + return sources + +def add_download_packages(d, doc, recipe): + import os.path + from bb.fetch2 import decodeurl, CHECKSUM_LIST + import bb.process + import oe_sbom.spdx + import oe_sbom.sbom + + for download_idx, src_uri in enumerate(d.getVar('SRC_URI').split()): + f = bb.fetch2.FetchData(src_uri, d) + + package = oe_sbom.spdx.SPDXPackage() + package.name = "%s-source-%d" % (d.getVar("PN"), download_idx + 1) + package.SPDXID = oe_sbom.sbom.get_download_spdxid(d, download_idx + 1) + + if f.type == "file": + continue + + if f.method.supports_checksum(f): + for checksum_id in CHECKSUM_LIST: + if checksum_id.upper() not in oe_sbom.spdx.SPDXPackage.ALLOWED_CHECKSUMS: + continue + + expected_checksum = getattr(f, "%s_expected" % checksum_id) + if expected_checksum is None: + continue + + c = oe_sbom.spdx.SPDXChecksum() + c.algorithm = checksum_id.upper() + c.checksumValue = expected_checksum + package.checksums.append(c) + + package.downloadLocation = oe_sbom.spdx_common.fetch_data_to_uri(f, f.name) + doc.packages.append(package) + doc.add_relationship(doc, "DESCRIBES", package) + # In the future, we might be able to do more fancy dependencies, + # but this should be sufficient for now + doc.add_relationship(package, "BUILD_DEPENDENCY_OF", recipe) + +def get_license_list_version(license_data, d): + # Newer versions of the SPDX license list are SemVer ("MAJOR.MINOR.MICRO"), + # but SPDX 2 only uses "MAJOR.MINOR". + return ".".join(license_data["licenseListVersion"].split(".")[:2]) + + +python do_create_spdx() { + from datetime import datetime, timezone + import oe.packagedata + import oe_sbom.sbom + import oe_sbom.spdx + import oe_sbom.spdx_common + import uuid + from pathlib import Path + from contextlib import contextmanager + import oe_sbom.cve_check + + license_data = oe_sbom.spdx_common.load_spdx_license_data(d) + + @contextmanager + def optional_tarfile(name, guard, mode="w"): + import tarfile + + if guard: + name.parent.mkdir(parents=True, exist_ok=True) + with gzip.open(str(name), mode=mode + "b", compresslevel=3) as f: + with tarfile.open(fileobj=f, mode=mode + "|") as tf: + yield tf + else: + yield None + + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + spdx_workdir = Path(d.getVar("SPDXWORK")) + include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1" + archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES") == "1" + archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED") == "1" + pkg_arch = d.getVar("SSTATE_PKGARCH") + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + doc = oe_sbom.spdx.SPDXDocument() + + doc.name = "recipe-" + d.getVar("PN") + doc.documentNamespace = get_namespace(d, doc.name) + doc.creationInfo.created = creation_time + doc.creationInfo.comment = "This document was created by analyzing recipe files during the build." + doc.creationInfo.licenseListVersion = get_license_list_version(license_data, d) + doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass") + doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG")) + doc.creationInfo.creators.append("Person: N/A ()") + + recipe = oe_sbom.spdx.SPDXPackage() + recipe.name = d.getVar("PN") + recipe.versionInfo = d.getVar("SPDX_PACKAGE_VERSION") + recipe.SPDXID = oe_sbom.sbom.get_recipe_spdxid(d) + recipe.supplier = d.getVar("SPDX_SUPPLIER") + if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d): + recipe.annotations.append(create_annotation(d, "isNative")) + + homepage = d.getVar("HOMEPAGE") + if homepage: + recipe.homepage = homepage + + license = d.getVar("LICENSE") + if license: + recipe.licenseDeclared = convert_license_to_spdx(license, license_data, doc, d) + + summary = d.getVar("SUMMARY") + if summary: + recipe.summary = summary + + description = d.getVar("DESCRIPTION") + if description: + recipe.description = description + + if d.getVar("SPDX_CUSTOM_ANNOTATION_VARS"): + for var in d.getVar('SPDX_CUSTOM_ANNOTATION_VARS').split(): + recipe.annotations.append(create_annotation(d, var + "=" + d.getVar(var))) + + # Some CVEs may be patched during the build process without incrementing the version number, + # so querying for CVEs based on the CPE id can lead to false positives. To account for this, + # save the CVEs fixed by patches to source information field in the SPDX. + patched_cves = oe_sbom.cve_check.get_patched_cves(d) + patched_cves = list(patched_cves) + patched_cves = ' '.join(patched_cves) + if patched_cves: + recipe.sourceInfo = "CVEs fixed: " + patched_cves + + cpe_ids = oe_sbom.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION")) + if cpe_ids: + for cpe_id in cpe_ids: + cpe = oe_sbom.spdx.SPDXExternalReference() + cpe.referenceCategory = "SECURITY" + cpe.referenceType = "cpe23Type" + cpe.referenceLocator = cpe_id + recipe.externalRefs.append(cpe) + + doc.packages.append(recipe) + doc.add_relationship(doc, "DESCRIBES", recipe) + + add_download_packages(d, doc, recipe) + + if oe_sbom.spdx_common.process_sources(d) and include_sources: + recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.gz") + with optional_tarfile(recipe_archive, archive_sources) as archive: + oe_sbom.spdx_common.get_patched_src(d) + + add_package_files( + d, + doc, + recipe, + spdx_workdir, + lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), file_counter), + lambda filepath: ["SOURCE"], + ignore_dirs=[".git"], + ignore_top_level_dirs=["temp"], + archive=archive, + ) + + if archive is not None: + recipe.packageFileName = str(recipe_archive.name) + + dep_recipes = collect_dep_recipes(d, doc, recipe) + + doc_sha1 = oe_sbom.sbom.write_doc(d, doc, pkg_arch, "recipes", indent=get_json_indent(d)) + dep_recipes.append(oe_sbom.sbom.DepRecipe(doc, doc_sha1, recipe)) + + recipe_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name + recipe_ref.spdxDocument = doc.documentNamespace + recipe_ref.checksum.algorithm = "SHA1" + recipe_ref.checksum.checksumValue = doc_sha1 + + sources = collect_dep_sources(d, dep_recipes) + found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos} + + if not recipe_spdx_is_native(d, recipe): + bb.build.exec_func("read_subpackage_metadata", d) + + pkgdest = Path(d.getVar("PKGDEST")) + for package in d.getVar("PACKAGES").split(): + if not oe.packagedata.packaged(package, d): + continue + + package_doc = oe_sbom.spdx.SPDXDocument() + pkg_name = d.getVar("PKG_%s" % package) or package + package_doc.name = pkg_name + package_doc.documentNamespace = get_namespace(d, package_doc.name) + package_doc.creationInfo.created = creation_time + package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build." + package_doc.creationInfo.licenseListVersion = get_license_list_version(license_data, d) + package_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass") + package_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG")) + package_doc.creationInfo.creators.append("Person: N/A ()") + package_doc.externalDocumentRefs.append(recipe_ref) + + package_license = d.getVar("LICENSE_%s" % package) or d.getVar("LICENSE") + + spdx_package = oe_sbom.spdx.SPDXPackage() + + spdx_package.SPDXID = oe_sbom.sbom.sanitize_spdx_id(oe_sbom.sbom.get_package_spdxid(pkg_name)) + spdx_package.name = pkg_name + spdx_package.versionInfo = d.getVar("SPDX_PACKAGE_VERSION") + spdx_package.licenseDeclared = convert_license_to_spdx(package_license, license_data, package_doc, d, found_licenses) + spdx_package.supplier = d.getVar("SPDX_SUPPLIER") + + package_doc.packages.append(spdx_package) + + package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID)) + package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package) + + package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.gz") + with optional_tarfile(package_archive, archive_packaged) as archive: + package_files = add_package_files( + d, + package_doc, + spdx_package, + pkgdest / package, + lambda file_counter: oe_sbom.sbom.sanitize_spdx_id(oe_sbom.sbom.get_packaged_file_spdxid(pkg_name, file_counter)), + lambda filepath: ["BINARY"], + ignore_top_level_dirs=['CONTROL', 'DEBIAN'], + archive=archive, + ) + + if archive is not None: + spdx_package.packageFileName = str(package_archive.name) + + add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources) + + oe_sbom.sbom.write_doc(d, package_doc, pkg_arch, "packages", indent=get_json_indent(d)) +} +do_create_spdx[vardepsexclude] += "BB_NUMBER_THREADS" +# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source +addtask do_create_spdx after do_package do_packagedata do_unpack do_collect_spdx_deps before do_populate_sdk do_build do_rm_work + +SSTATETASKS += "do_create_spdx" +do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}" +do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}" + +python do_create_spdx_setscene () { + sstate_setscene(d) +} +addtask do_create_spdx_setscene + +do_create_spdx[dirs] = "${SPDXWORK}" +do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}" +do_create_spdx[depends] += " \ + ${PATCHDEPENDENCY} \ + ${@create_spdx_source_deps(d)} \ +" + +python do_create_runtime_spdx() { + from datetime import datetime, timezone + import oe_sbom.sbom + import oe_sbom.spdx + import oe_sbom.spdx_common + import oe.packagedata + from pathlib import Path + + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY")) + is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d) + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + + license_data = oe_sbom.spdx_common.load_spdx_license_data(d) + + providers = oe_sbom.spdx_common.collect_package_providers(d) + pkg_arch = d.getVar("SSTATE_PKGARCH") + package_archs = d.getVar("SPDX_MULTILIB_SSTATE_ARCHS").split() + package_archs.reverse() + + if not is_native: + bb.build.exec_func("read_subpackage_metadata", d) + + dep_package_cache = {} + + pkgdest = Path(d.getVar("PKGDEST")) + for package in d.getVar("PACKAGES").split(): + localdata = bb.data.createCopy(d) + pkg_name = d.getVar("PKG_%s" % package) or package + localdata.setVar("PKG", pkg_name) + localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package) + + if not oe.packagedata.packaged(package, localdata): + continue + + pkg_spdx_path = oe_sbom.sbom.doc_path(deploy_dir_spdx, pkg_name, pkg_arch, "packages") + + package_doc, package_doc_sha1 = oe_sbom.sbom.read_doc(pkg_spdx_path) + + for p in package_doc.packages: + if p.name == pkg_name: + spdx_package = p + break + else: + bb.fatal("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path)) + + runtime_doc = oe_sbom.spdx.SPDXDocument() + runtime_doc.name = "runtime-" + pkg_name + runtime_doc.documentNamespace = get_namespace(localdata, runtime_doc.name) + runtime_doc.creationInfo.created = creation_time + runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies." + runtime_doc.creationInfo.licenseListVersion = get_license_list_version(license_data, d) + runtime_doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass") + runtime_doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG")) + runtime_doc.creationInfo.creators.append("Person: N/A ()") + + package_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + package_ref.externalDocumentId = "DocumentRef-package-" + package + package_ref.spdxDocument = package_doc.documentNamespace + package_ref.checksum.algorithm = "SHA1" + package_ref.checksum.checksumValue = package_doc_sha1 + + runtime_doc.externalDocumentRefs.append(package_ref) + + runtime_doc.add_relationship( + runtime_doc.SPDXID, + "AMENDS", + "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID) + ) + + deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "") + seen_deps = set() + for dep, _ in deps.items(): + if dep in seen_deps: + continue + + if dep not in providers: + continue + + (dep, dep_hashfn) = providers[dep] + + if not oe.packagedata.packaged(dep, localdata): + continue + + dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d) + dep_pkg = dep_pkg_data["PKG"] + + if dep in dep_package_cache: + (dep_spdx_package, dep_package_ref) = dep_package_cache[dep] + else: + dep_path = oe_sbom.sbom.doc_find_by_hashfn(deploy_dir_spdx, package_archs, dep_pkg, dep_hashfn) + if not dep_path: + bb.fatal("No SPDX file found for package %s, %s" % (dep_pkg, dep_hashfn)) + + spdx_dep_doc, spdx_dep_sha1 = oe_sbom.sbom.read_doc(dep_path) + + for pkg in spdx_dep_doc.packages: + if pkg.name == dep_pkg: + dep_spdx_package = pkg + break + else: + bb.fatal("Package '%s' not found in %s" % (dep_pkg, dep_path)) + + dep_package_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name + dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace + dep_package_ref.checksum.algorithm = "SHA1" + dep_package_ref.checksum.checksumValue = spdx_dep_sha1 + + dep_package_cache[dep] = (dep_spdx_package, dep_package_ref) + + runtime_doc.externalDocumentRefs.append(dep_package_ref) + + runtime_doc.add_relationship( + "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID), + "RUNTIME_DEPENDENCY_OF", + "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID) + ) + seen_deps.add(dep) + + oe_sbom.sbom.write_doc(d, runtime_doc, pkg_arch, "runtime", spdx_deploy, indent=get_json_indent(d)) +} + +do_create_runtime_spdx[vardepsexclude] += "OVERRIDES SPDX_MULTILIB_SSTATE_ARCHS" + +addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work +SSTATETASKS += "do_create_runtime_spdx" +do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}" +do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}" + +python do_create_runtime_spdx_setscene () { + sstate_setscene(d) +} +addtask do_create_runtime_spdx_setscene + +do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}" +do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}" +do_create_runtime_spdx[rdeptask] = "do_create_spdx" + +do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx" +do_rootfs[cleandirs] += "${SPDXIMAGEWORK}" + +ROOTFS_POSTUNINSTALL_COMMAND =+ "image_combine_spdx" + +do_populate_sdk[recrdeptask] += "do_create_spdx do_create_runtime_spdx" +do_populate_sdk[cleandirs] += "${SPDXSDKWORK}" +POPULATE_SDK_POST_HOST_COMMAND_append_task-populate-sdk = " sdk_host_combine_spdx" +POPULATE_SDK_POST_TARGET_COMMAND_append_task-populate-sdk = " sdk_target_combine_spdx" + +python image_combine_spdx() { + import os + import oe_sbom.sbom + from pathlib import Path + from oe.rootfs import image_list_installed_packages + + image_name = d.getVar("IMAGE_NAME") + image_link_name = d.getVar("IMAGE_LINK_NAME") + imgdeploydir = Path(d.getVar("IMGDEPLOYDIR")) + img_spdxid = oe_sbom.sbom.get_image_spdxid(image_name) + packages = image_list_installed_packages(d) + + combine_spdx(d, image_name, imgdeploydir, img_spdxid, packages, Path(d.getVar("SPDXIMAGEWORK"))) + + def make_image_link(target_path, suffix): + if image_link_name: + link = imgdeploydir / (image_link_name + suffix) + if link != target_path: + link.symlink_to(os.path.relpath(str(target_path), str(link.parent))) + + spdx_tar_path = imgdeploydir / (image_name + ".spdx.tar.gz") + make_image_link(spdx_tar_path, ".spdx.tar.gz") +} + +python sdk_host_combine_spdx() { + sdk_combine_spdx(d, "host") +} + +python sdk_target_combine_spdx() { + sdk_combine_spdx(d, "target") +} + +def sdk_combine_spdx(d, sdk_type): + import oe_sbom.sbom + from pathlib import Path + from oe.sdk import sdk_list_installed_packages + + sdk_name = d.getVar("TOOLCHAIN_OUTPUTNAME") + "-" + sdk_type + sdk_deploydir = Path(d.getVar("SDKDEPLOYDIR")) + sdk_spdxid = oe_sbom.sbom.get_sdk_spdxid(sdk_name) + sdk_packages = sdk_list_installed_packages(d, sdk_type == "target") + combine_spdx(d, sdk_name, sdk_deploydir, sdk_spdxid, sdk_packages, Path(d.getVar('SPDXSDKWORK'))) + +def combine_spdx(d, rootfs_name, rootfs_deploydir, rootfs_spdxid, packages, spdx_workdir): + import os + import oe_sbom.spdx + import oe_sbom.sbom + import oe_sbom.spdx_common + import io + import json + from datetime import timezone, datetime + from pathlib import Path + import tarfile + import gzip + + license_data = oe_sbom.spdx_common.load_spdx_license_data(d) + + providers = oe_sbom.spdx_common.collect_package_providers(d) + package_archs = d.getVar("SPDX_MULTILIB_SSTATE_ARCHS").split() + package_archs.reverse() + + creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + source_date_epoch = d.getVar("SOURCE_DATE_EPOCH") + + doc = oe_sbom.spdx.SPDXDocument() + doc.name = rootfs_name + doc.documentNamespace = get_namespace(d, doc.name) + doc.creationInfo.created = creation_time + doc.creationInfo.comment = "This document was created by analyzing the source of the Yocto recipe during the build." + doc.creationInfo.licenseListVersion = get_license_list_version(license_data, d) + doc.creationInfo.creators.append("Tool: OpenEmbedded Core create-spdx.bbclass") + doc.creationInfo.creators.append("Organization: %s" % d.getVar("SPDX_ORG")) + doc.creationInfo.creators.append("Person: N/A ()") + + image = oe_sbom.spdx.SPDXPackage() + image.name = d.getVar("PN") + image.versionInfo = d.getVar("SPDX_PACKAGE_VERSION") + image.SPDXID = rootfs_spdxid + image.supplier = d.getVar("SPDX_SUPPLIER") + + doc.packages.append(image) + + if packages: + for name in sorted(packages.keys()): + if name not in providers: + bb.fatal("Unable to find SPDX provider for '%s'" % name) + + pkg_name, pkg_hashfn = providers[name] + + pkg_spdx_path = oe_sbom.sbom.doc_find_by_hashfn(deploy_dir_spdx, package_archs, pkg_name, pkg_hashfn) + if not pkg_spdx_path: + bb.fatal("No SPDX file found for package %s, %s" % (pkg_name, pkg_hashfn)) + + pkg_doc, pkg_doc_sha1 = oe_sbom.sbom.read_doc(pkg_spdx_path) + + for p in pkg_doc.packages: + if p.name == name: + pkg_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + pkg_ref.externalDocumentId = "DocumentRef-%s" % pkg_doc.name + pkg_ref.spdxDocument = pkg_doc.documentNamespace + pkg_ref.checksum.algorithm = "SHA1" + pkg_ref.checksum.checksumValue = pkg_doc_sha1 + + doc.externalDocumentRefs.append(pkg_ref) + doc.add_relationship(image, "CONTAINS", "%s:%s" % (pkg_ref.externalDocumentId, p.SPDXID)) + break + else: + bb.fatal("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path)) + + runtime_spdx_path = oe_sbom.sbom.doc_find_by_hashfn(deploy_dir_spdx, package_archs, "runtime-" + name, pkg_hashfn) + if not runtime_spdx_path: + bb.fatal("No runtime SPDX document found for %s, %s" % (name, pkg_hashfn)) + + runtime_doc, runtime_doc_sha1 = oe_sbom.sbom.read_doc(runtime_spdx_path) + + runtime_ref = oe_sbom.spdx.SPDXExternalDocumentRef() + runtime_ref.externalDocumentId = "DocumentRef-%s" % runtime_doc.name + runtime_ref.spdxDocument = runtime_doc.documentNamespace + runtime_ref.checksum.algorithm = "SHA1" + runtime_ref.checksum.checksumValue = runtime_doc_sha1 + + # "OTHER" isn't ideal here, but I can't find a relationship that makes sense + doc.externalDocumentRefs.append(runtime_ref) + doc.add_relationship( + image, + "OTHER", + "%s:%s" % (runtime_ref.externalDocumentId, runtime_doc.SPDXID), + comment="Runtime dependencies for %s" % name + ) + bb.utils.mkdirhier(str(spdx_workdir)) + image_spdx_path = spdx_workdir / (rootfs_name + ".spdx.json") + + with image_spdx_path.open("wb") as f: + doc.to_json(f, sort_keys=True, indent=get_json_indent(d)) + + visited_docs = set() + + index = {"documents": []} + + spdx_tar_path = rootfs_deploydir / (rootfs_name + ".spdx.tar.gz") + with gzip.open(str(spdx_tar_path), "w", compresslevel=3) as f: + with tarfile.open(fileobj=f, mode="w|") as tar: + def collect_spdx_document(path): + nonlocal tar + nonlocal deploy_dir_spdx + nonlocal source_date_epoch + nonlocal index + + if path in visited_docs: + return + + visited_docs.add(path) + + with path.open("rb") as f: + doc, sha1 = oe_sbom.sbom.read_doc(f) + f.seek(0) + + if doc.documentNamespace in visited_docs: + return + + bb.note("Adding SPDX document %s" % path) + visited_docs.add(doc.documentNamespace) + info = tar.gettarinfo(fileobj=f) + + info.name = doc.name + ".spdx.json" + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + if source_date_epoch is not None and info.mtime > int(source_date_epoch): + info.mtime = int(source_date_epoch) + + tar.addfile(info, f) + + index["documents"].append({ + "filename": info.name, + "documentNamespace": doc.documentNamespace, + "sha1": sha1, + }) + + for ref in doc.externalDocumentRefs: + ref_path = oe_sbom.sbom.doc_find_by_namespace(deploy_dir_spdx, package_archs, ref.spdxDocument) + if not ref_path: + bb.fatal("Cannot find any SPDX file for document %s" % ref.spdxDocument) + collect_spdx_document(ref_path) + + collect_spdx_document(image_spdx_path) + + index["documents"].sort(key=lambda x: x["filename"]) + + index_str = io.BytesIO(json.dumps( + index, + sort_keys=True, + indent=get_json_indent(d), + ).encode("utf-8")) + + info = tarfile.TarInfo() + info.name = "index.json" + info.size = len(index_str.getvalue()) + info.uid = 0 + info.gid = 0 + info.uname = "root" + info.gname = "root" + + tar.addfile(info, fileobj=index_str) + +combine_spdx[vardepsexclude] += "BB_NUMBER_THREADS SPDX_MULTILIB_SSTATE_ARCHS" diff --git a/classes/sls-create-spdx.bbclass b/classes/sls-create-spdx.bbclass deleted file mode 100644 index db06cfc..0000000 --- a/classes/sls-create-spdx.bbclass +++ /dev/null @@ -1,1234 +0,0 @@ -# -# Copyright OpenEmbedded Contributors -# -# SPDX-License-Identifier: GPL-2.0-only -# - -DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${MACHINE}" - -# The product name that the CVE database uses. Defaults to BPN, but may need to -# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff). -CVE_PRODUCT ??= "${BPN}" -CVE_VERSION ??= "${PV}" - -SPDXDIR ??= "${WORKDIR}/spdx" -SPDXDEPLOY = "${SPDXDIR}/deploy" -SPDXWORK = "${SPDXDIR}/work" - -SPDX_TOOL_NAME ??= "oe-spdx-creator" -SPDX_TOOL_VERSION ??= "1.0" - -SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy" - -SPDX_INCLUDE_SOURCES ??= "0" -SPDX_INCLUDE_PACKAGED ??= "0" -SPDX_ARCHIVE_SOURCES ??= "0" -SPDX_ARCHIVE_PACKAGED ??= "0" - -SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org" -SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdoc" - -SPDX_LICENSES ??= "${WRSBOM_LAYER}/meta/files/spdx-licenses.json" - -BB_HASH_IGNORE_MISMATCH = '1' - -SPDX_BLACKLIST ??= "external-arm-toolchain" - -do_image_complete[depends] = "virtual/kernel:do_create_spdx" - -def get_doc_namespace(d, doc): - import uuid - namespace_uuid = uuid.uuid5(uuid.NAMESPACE_DNS, d.getVar("SPDX_UUID_NAMESPACE", True)) - return "%s/%s-%s" % (d.getVar("SPDX_NAMESPACE_PREFIX", True), doc.name, str(uuid.uuid5(namespace_uuid, doc.name))) - -def create_annotation(d, comment): - from datetime import datetime, timezone - import oe_sbom.spdx - - creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - annotation = oe_sbom.spdx.SPDXAnnotation() - annotation.annotationDate = creation_time - annotation.annotationType = "OTHER" - annotation.annotator = "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME", True), d.getVar("SPDX_TOOL_VERSION", True)) - annotation.comment = comment - return annotation - -def recipe_spdx_is_native(d, recipe): - return any(a.annotationType == "OTHER" and - a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME", True), d.getVar("SPDX_TOOL_VERSION", True)) and - a.comment == "isNative" for a in recipe.annotations) - -def get_spdxdir_from_annotation(d, recipe): - for a in recipe.annotations: - if (a.annotationType == "OTHER" and - a.annotator == "Tool: %s - %s" % (d.getVar("SPDX_TOOL_NAME", True), d.getVar("SPDX_TOOL_VERSION", True)) and - a.comment.startswith("SPDXDIR:")): - return a.comment.replace('SPDXDIR:', '') - -def is_work_shared_spdx(d): - return bb.data.inherits_class('kernel', d) or ('work-shared' in d.getVar('WORKDIR', True)) - -python() { - import json - if d.getVar("SPDX_LICENSE_DATA", True): - return - - with open(d.getVar("SPDX_LICENSES", True), "r") as f: - data = json.load(f) - # Transform the license array to a dictionary - data["licenses"] = {l["licenseId"]: l for l in data["licenses"]} - d.setVar("SPDX_LICENSE_DATA", data) -} - -# json.load() may not load ${SPDX_LICENSES} *deterministically*, so ignoring its value when calculating signature for SPDX_LICENSE_DATA. -SPDX_LICENSE_DATA[vardepvalue] = "" - -# idstring shall only contain letters, numbers, . and/or -. -# replace other character with "-" -def clean_idstring(id): - import re - return re.sub(r'[^a-zA-Z0-9.-]', '-', id) - -def convert_license_to_spdx(lic, document, d, existing={}): - from pathlib import Path - import oe_sbom.spdx - - available_licenses = (d.getVar("AVAILABLE_LICENSES", True) or '').split() - license_data = d.getVar("SPDX_LICENSE_DATA", True) - extracted = {} - - def add_extracted_license(ident, name): - nonlocal document - - if name in extracted: - return - - extracted_info = oe_sbom.spdx.SPDXExtractedLicensingInfo() - extracted_info.name = name - extracted_info.licenseId = clean_idstring(ident) - extracted_info.extractedText = "None" - - if name == "PD": - # Special-case this. - extracted_info.extractedText = "Software released to the public domain" - elif name in available_licenses: - # This license can be found in COMMON_LICENSE_DIR or LICENSE_PATH - for directory in [d.getVar('COMMON_LICENSE_DIR', True)] + d.getVar('LICENSE_PATH', True).split(): - try: - with (Path(directory) / name).open(errors="replace") as f: - extracted_info.extractedText = f.read() - break - except FileNotFoundError: - pass - if extracted_info.extractedText == "None": - # Error out, as the license was in available_licenses so should - # be on disk somewhere. - bb.warn("Cannot find text for license %s" % name) - else: - # If it's not SPDX, or PD, or in available licenses, then NO_GENERIC_LICENSE must be set - filename = d.getVarFlag('NO_GENERIC_LICENSE', name, True) - if filename: - filename = d.expand("${S}/" + filename) - with open(filename, errors="replace") as f: - extracted_info.extractedText = f.read() - else: - bb.warn("Cannot find any text for license %s" % name) - - extracted[name] = extracted_info - document.hasExtractedLicensingInfos.append(extracted_info) - - def convert(l): - from oe_sbom.spdx_license_map import spdx_license_map - - if l == "(" or l == ")": - return l - - if l == "&": - return "AND" - - if l == "|": - return "OR" - - if l == "CLOSED": - return "NONE" - - if l in spdx_license_map.keys(): - spdx_license = spdx_license_map[l] - else: - spdx_license = l - - if spdx_license in license_data["licenses"]: - return spdx_license - - try: - spdx_license = existing[l] - except KeyError: - spdx_license = "LicenseRef-" + l - add_extracted_license(spdx_license, l) - - return spdx_license - - lic_split = lic.replace("(", " ( ").replace(")", " ) ").split() - - return ' '.join(convert(l) for l in lic_split) - -def get_distro_type(d): - if 'Yocto' in d.getVar("DISTRO_NAME", True): - return "yocto" - elif 'Wind River' in d.getVar("DISTRO_NAME", True): - return "wrlinux" - else: - wr_version = d.getVar("WRLINUX_VERSION", True) - if wr_version: - return "wrlinux" - else: - return "yocto" - -def get_final_pkg_name(d, package): - distro_ver = d.getVar("DISTRO_VERSION", True) - if 'Wind River' in d.getVar("DISTRO_NAME", True): - if (distro_ver.split('.')[0] == '10') and (int(distro_ver.split('.')[1]) > 21): - pkg_name = d.getVar("PKG:%s" % package, True) or package - elif (distro_ver.split('.')[0] == '10') and (distro_ver.split('.')[1] == '21') and (int(distro_ver.split('.')[3]) >= 5): - pkg_name = d.getVar("PKG:%s" % package, True) or package - else: - pkg_name = d.getVar("PKG_%s" % package, True) or package - else: - if d.getVar("BB_VERSION", True) > '1.50.0': - pkg_name = d.getVar("PKG:%s" % package, True) or package - else: - pkg_name = d.getVar("PKG_%s" % package, True) or package - return pkg_name - -def process_sources(d): - pn = d.getVar('PN', True) - assume_provided = (d.getVar("ASSUME_PROVIDED", True) or "").split() - if pn in assume_provided: - for p in d.getVar("PROVIDES", True).split(): - if p != pn: - pn = p - break - - # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted, - # so avoid archiving source here. - if pn.startswith('glibc-locale'): - return False - if d.getVar('PN', True) == "libtool-cross": - return False - if d.getVar('PN', True) == "libgcc-initial": - return False - if d.getVar('PN', True) == "shadow-sysroot": - return False - - # We just archive gcc-source for all the gcc related recipes - if d.getVar('BPN', True) in ['gcc', 'libgcc']: - bb.debug(1, 'spdx: There is bug in scan of %s is, do nothing' % pn) - return False - - return True - - -def add_package_files(d, doc, spdx_pkg, topdir, get_spdxid, get_types, *, archive=None, ignore_dirs=[], ignore_top_level_dirs=[]): - from pathlib import Path - import oe_sbom.spdx - import hashlib - - source_date_epoch = d.getVar("SOURCE_DATE_EPOCH", True) - if source_date_epoch: - source_date_epoch = int(source_date_epoch) - - sha1s = [] - spdx_files = [] - - file_counter = 1 - for subdir, dirs, files in os.walk(str(topdir)): - dirs[:] = [d for d in dirs if d not in ignore_dirs] - if subdir == str(topdir): - dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs] - - for file in files: - filepath = Path(subdir) / file - filename = str(filepath.relative_to(topdir)) - - if filepath.is_file() and not filepath.is_symlink(): - spdx_file = oe_sbom.spdx.SPDXFile() - spdx_file.SPDXID = get_spdxid(file_counter) - for t in get_types(filepath): - spdx_file.fileTypes.append(t) - spdx_file.fileName = filename - - if archive is not None: - with filepath.open("rb") as f: - info = archive.gettarinfo(fileobj=f) - info.name = filename - info.uid = 0 - info.gid = 0 - info.uname = "root" - info.gname = "root" - - if source_date_epoch is not None and info.mtime > source_date_epoch: - info.mtime = source_date_epoch - - archive.addfile(info, f) - - sha1 = bb.utils.sha1_file(str(filepath)) - sha1s.append(sha1) - spdx_file.checksums.append(oe_sbom.spdx.SPDXChecksum( - algorithm="SHA1", - checksumValue=sha1, - )) - spdx_file.checksums.append(oe_sbom.spdx.SPDXChecksum( - algorithm="SHA256", - checksumValue=bb.utils.sha256_file(str(filepath)), - )) - - doc.files.append(spdx_file) - doc.add_relationship(spdx_pkg, "CONTAINS", spdx_file) - spdx_pkg.hasFiles.append(spdx_file.SPDXID) - - spdx_files.append(spdx_file) - - file_counter += 1 - - sha1s.sort() - verifier = hashlib.sha1() - for v in sha1s: - verifier.update(v.encode("utf-8")) - spdx_pkg.packageVerificationCode.packageVerificationCodeValue = verifier.hexdigest() - - return spdx_files - -def add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources, search_paths): - from pathlib import Path - import hashlib - import oe_sbom.packagedata - import oe_sbom.spdx - - debug_search_paths = [ - Path(d.getVar('PKGD', True)), - Path(d.getVar('STAGING_DIR_TARGET', True)), - Path(d.getVar('STAGING_DIR_NATIVE', True)), - Path(d.getVar('STAGING_KERNEL_DIR', True)), - ] - topdir = d.getVar('TOPDIR', True) - for path in search_paths: - debug_search_paths.append(Path(topdir + '/' + path)) - - pkg_data = oe_sbom.packagedata.read_subpkgdata_extended(package, d) - - if pkg_data is None: - return - - for file_path, file_data in pkg_data["files_info"].items(): - if not "debugsrc" in file_data: - continue - - for pkg_file in package_files: - if file_path.lstrip("/") == pkg_file.fileName.lstrip("/"): - break - else: - bb.warn("No package file found for %s" % str(file_path)) - continue - - for debugsrc in file_data["debugsrc"]: - ref_id = "NOASSERTION" - for search in debug_search_paths: - if debugsrc.startswith("/usr/src/kernel"): - debugsrc_path = search / debugsrc.replace('/usr/src/kernel/', '') - else: - debugsrc_path = search / debugsrc.lstrip("/") - if not debugsrc_path.exists(): - continue - - file_sha256 = bb.utils.sha256_file(debugsrc_path) - - if file_sha256 in sources: - source_file = sources[file_sha256] - - doc_ref = package_doc.find_external_document_ref(source_file.doc.documentNamespace) - if doc_ref is None: - doc_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - doc_ref.externalDocumentId = "DocumentRef-dependency-" + source_file.doc.name - doc_ref.spdxDocument = source_file.doc.documentNamespace - doc_ref.checksum.algorithm = "SHA1" - doc_ref.checksum.checksumValue = source_file.doc_sha1 - package_doc.externalDocumentRefs.append(doc_ref) - - ref_id = "%s:%s" % (doc_ref.externalDocumentId, source_file.file.SPDXID) - else: - bb.debug(1, "Debug source %s with SHA256 %s not found in any dependency" % (str(debugsrc_path), file_sha256)) - break - else: - bb.debug(1, "Debug source %s not found in sources at all" % (debugsrc)) - - package_doc.add_relationship(pkg_file, "GENERATED_FROM", ref_id, comment=debugsrc) - -def spdx_deploy_path(d, subdir, name): - import os.path - import glob - - multiconfig = d.getVar('BBMULTICONFIG', True) - deploy_dir_spdx = d.getVar('DEPLOY_DIR_SPDX', True) - - if multiconfig == '': - return os.path.join(deploy_dir_spdx, subdir, name) - - try: - deploy_path = glob.glob(os.path.join(deploy_dir_spdx, "..", "*", subdir, name))[0] - except IndexError: - # FIXME: This should not happen. - deploy_path = "" - - return deploy_path - -def collect_dep_recipes(d, doc, spdx_recipe): - from pathlib import Path - import oe_sbom.sbom - import oe_sbom.spdx - - dep_recipes = [] - taskdepdata = d.getVar("BB_TASKDEPDATA", False) - deps = sorted(set( - dep[0] for dep in taskdepdata.values() if - dep[1] == "do_create_spdx" and dep[0] != d.getVar("PN", True) - )) - for dep_pn in deps: - dep_recipe_path = spdx_deploy_path(d, "recipes", ("recipe-%s.spdx.json" % dep_pn)) - if dep_recipe_path == '': - # FIXME: This should not happen. - continue - dep_recipe_path = Path(dep_recipe_path) - - spdx_dep_doc, spdx_dep_sha1 = oe_sbom.sbom.read_doc(dep_recipe_path) - - for pkg in spdx_dep_doc.packages: - if pkg.name == dep_pn: - spdx_dep_recipe = pkg - break - else: - continue - - dep_recipes.append(oe_sbom.sbom.DepRecipe(spdx_dep_doc, spdx_dep_sha1, spdx_dep_recipe)) - - dep_recipe_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - dep_recipe_ref.externalDocumentId = "DocumentRef-dependency-" + spdx_dep_doc.name - dep_recipe_ref.spdxDocument = spdx_dep_doc.documentNamespace - dep_recipe_ref.checksum.algorithm = "SHA1" - dep_recipe_ref.checksum.checksumValue = spdx_dep_sha1 - - doc.externalDocumentRefs.append(dep_recipe_ref) - - doc.add_relationship( - "%s:%s" % (dep_recipe_ref.externalDocumentId, spdx_dep_recipe.SPDXID), - "BUILD_DEPENDENCY_OF", - spdx_recipe - ) - - return dep_recipes - -collect_dep_recipes[vardepsexclude] += "BB_TASKDEPDATA" - - -def collect_dep_sources(d, dep_recipes): - import oe_sbom.sbom - - search_paths = [] - sources = {} - for dep in dep_recipes: - # Don't collect sources from native recipes as they - # match non-native sources also. - if recipe_spdx_is_native(d, dep.recipe): - continue - recipe_files = set(dep.recipe.hasFiles) - - for spdx_file in dep.doc.files: - if spdx_file.SPDXID not in recipe_files: - continue - - if "SOURCE" in spdx_file.fileTypes: - for checksum in spdx_file.checksums: - if checksum.algorithm == "SHA256": - sources[checksum.checksumValue] = oe_sbom.sbom.DepSource(dep.doc, dep.doc_sha1, dep.recipe, spdx_file) - break - search_paths.append(get_spdxdir_from_annotation(d, dep.recipe)) - - return sources, search_paths - - -python do_create_spdx() { - from datetime import datetime, timezone - import oe_sbom.sbom - import oe_sbom.spdx - import oe_sbom.packagedata - import uuid - from pathlib import Path - from contextlib import contextmanager - import oe_sbom.cve_check - - @contextmanager - def optional_tarfile(name, guard, mode="w:xz"): - import tarfile - - if guard: - name.parent.mkdir(parents=True, exist_ok=True) - with tarfile.open(name=name, mode=mode) as f: - yield f - else: - yield None - - def get_version_from_PV(PV_str): - if '+git' in PV_str: - return PV_str.split('+git')[0] - else: - return PV_str - - def get_packagegroup(): - package_bb = d.getVar("FILE", True) - if 'recipes-' in package_bb: - packagegroup = package_bb.split('recipes-')[1].split('/')[0] - return packagegroup - else: - return 'None' - - deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX", True)) - top_dir = Path(d.getVar("TOPDIR", True)) - spdx_workdir = Path(d.getVar("SPDXWORK", True)) - include_packaged = d.getVar("SPDX_INCLUDE_PACKAGED", True) == "1" - include_sources = d.getVar("SPDX_INCLUDE_SOURCES", True) == "1" - archive_sources = d.getVar("SPDX_ARCHIVE_SOURCES", True) == "1" - archive_packaged = d.getVar("SPDX_ARCHIVE_PACKAGED", True) == "1" - - creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - doc = oe_sbom.spdx.SPDXDocument() - - doc.name = "recipe-" + d.getVar("PN", True) - doc.documentNamespace = get_doc_namespace(d, doc) - doc.creationInfo.created = creation_time - doc.creationInfo.comment = "This document was created by analyzing recipe files during the build." - doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA", True)["licenseListVersion"] - doc.creationInfo.creators.append("Tool: meta-wr-sbom") - doc.creationInfo.creators.append("Organization: Wind River Systems, Inc.") - - recipe = oe_sbom.spdx.SPDXPackage() - recipe.name = d.getVar("PN", True) - if d.getVar("SLS_EXTEND_VERSION", True): - recipe.versionInfo = d.getVar("PV", True) + "-" + d.getVar("SLS_EXTEND_VERSION", True) - else: - recipe.versionInfo = d.getVar("PV", True) - recipe.SPDXID = oe_sbom.sbom.get_recipe_spdxid(d) - recipe.comment = " PackageGroup: " + get_packagegroup() - if bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d): - recipe.annotations.append(create_annotation(d, "isNative")) - recipe.annotations.append(create_annotation(d, "SPDXDIR:%s" % d.getVar("PKGD", True).replace(str(top_dir) +'/', ''))) - - - for s in d.getVar('SRC_URI', True).split(): - if not s.startswith("file://"): - recipe.downloadLocation = s - break - else: - recipe.downloadLocation = "NOASSERTION" - - homepage = d.getVar("HOMEPAGE", True) - if homepage: - recipe.homepage = homepage - - license = d.getVar("LICENSE", True) - if license: - recipe.licenseDeclared = convert_license_to_spdx(license, doc, d) - - summary = d.getVar("SUMMARY", True) - if summary: - recipe.summary = summary - - description = d.getVar("DESCRIPTION", True) - if description: - recipe.description = description - - # Some CVEs may be patched during the build process without incrementing the version number, - # so querying for CVEs based on the CPE id can lead to false positives. To account for this, - # save the CVEs fixed by patches to source information field in the SPDX. - patched_cves = oe_sbom.cve_check.get_patched_cves(d) - patched_cves = list(patched_cves) - patched_cves = ' '.join(patched_cves) - if patched_cves: - recipe.sourceInfo = "CVEs fixed: " + patched_cves - - cpe_ids = oe_sbom.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT", True), d.getVar("CVE_VERSION", True)) - if cpe_ids: - for cpe_id in cpe_ids: - cpe = oe_sbom.spdx.SPDXExternalReference() - cpe.referenceCategory = "SECURITY" - cpe.referenceType = "cpe23Type" - cpe.referenceLocator = cpe_id - recipe.externalRefs.append(cpe) - - doc.packages.append(recipe) - doc.add_relationship(doc, "DESCRIBES", recipe) - - if process_sources(d) and include_sources: - recipe_archive = deploy_dir_spdx / "recipes" / (doc.name + ".tar.xz") - with optional_tarfile(recipe_archive, archive_sources) as archive: - spdx_get_src(d) - - add_package_files( - d, - doc, - recipe, - spdx_workdir, - lambda file_counter: "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN", True), file_counter), - lambda filepath: ["SOURCE"], - ignore_dirs=[".git"], - ignore_top_level_dirs=["temp"], - archive=archive, - ) - - if archive is not None: - recipe.packageFileName = str(recipe_archive.name) - - dep_recipes = collect_dep_recipes(d, doc, recipe) - - doc_sha1 = oe_sbom.sbom.write_doc(d, doc, "recipes") - dep_recipes.append(oe_sbom.sbom.DepRecipe(doc, doc_sha1, recipe)) - - recipe_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - recipe_ref.externalDocumentId = "DocumentRef-recipe-" + recipe.name - recipe_ref.spdxDocument = doc.documentNamespace - recipe_ref.checksum.algorithm = "SHA1" - recipe_ref.checksum.checksumValue = doc_sha1 - - sources, search_paths = collect_dep_sources(d, dep_recipes) - found_licenses = {license.name:recipe_ref.externalDocumentId + ":" + license.licenseId for license in doc.hasExtractedLicensingInfos} - - if not recipe_spdx_is_native(d, recipe): - bb.build.exec_func("read_subpackage_metadata", d) - - pkgdest = Path(d.getVar("PKGDEST", True)) - for package in d.getVar("PACKAGES", True).split(): - if not oe_sbom.packagedata.packaged(package, d): - continue - - package_doc = oe_sbom.spdx.SPDXDocument() - - pkg_name = get_final_pkg_name(d, package) - - package_doc.name = pkg_name - package_doc.documentNamespace = get_doc_namespace(d, package_doc) - package_doc.creationInfo.created = creation_time - package_doc.creationInfo.comment = "This document was created by analyzing packages created during the build." - package_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA", True)["licenseListVersion"] - package_doc.creationInfo.creators.append("Tool: meta-wr-sbom") - package_doc.creationInfo.creators.append("Organization: Wind River Systems, Inc.") - package_doc.externalDocumentRefs.append(recipe_ref) - - package_license = d.getVar("LICENSE:%s" % package, True) or d.getVar("LICENSE", True) - - spdx_package = oe_sbom.spdx.SPDXPackage() - - spdx_package.SPDXID = oe_sbom.sbom.get_package_spdxid(pkg_name) - spdx_package.name = pkg_name - spdx_package.versionInfo = d.getVar("PV", True) - spdx_package.licenseDeclared = convert_license_to_spdx(package_license, package_doc, d, found_licenses) - - package_doc.packages.append(spdx_package) - - package_doc.add_relationship(spdx_package, "GENERATED_FROM", "%s:%s" % (recipe_ref.externalDocumentId, recipe.SPDXID)) - package_doc.add_relationship(package_doc, "DESCRIBES", spdx_package) - - package_archive = deploy_dir_spdx / "packages" / (package_doc.name + ".tar.xz") - with optional_tarfile(package_archive, archive_packaged) as archive: - package_files = add_package_files( - d, - package_doc, - spdx_package, - str(pkgdest / package), - lambda file_counter: oe_sbom.sbom.get_packaged_file_spdxid(pkg_name, file_counter), - lambda filepath: ["BINARY"], - ignore_dirs=['CONTROL', 'DEBIAN'], - archive=archive, - ) - - if archive is not None: - spdx_package.packageFileName = str(package_archive.name) - - add_package_sources_from_debug(d, package_doc, spdx_package, package, package_files, sources, search_paths) - - oe_sbom.sbom.write_doc(d, package_doc, "packages") -} -# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source -addtask do_create_spdx after do_package do_packagedata do_unpack before do_build do_rm_work - -SSTATETASKS += "do_create_spdx" -do_create_spdx[sstate-inputdirs] = "${SPDXDEPLOY}" -do_create_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}" -do_create_spdx[sstate-lockfile] = "${WORKDIR}/create_spdx_sstate.lock" - -python do_create_spdx_setscene () { - sstate_setscene(d) -} -addtask do_create_spdx_setscene - -do_create_spdx[dirs] = "${SPDXWORK}" -do_create_spdx[cleandirs] = "${SPDXDEPLOY} ${SPDXWORK}" -do_create_spdx[depends] += "${PATCHDEPENDENCY}" -do_create_spdx[deptask] = "do_create_spdx" -do_create_spdx[lockfiles] = "${SPDXWORK}/create_spdx.lock" - -def spdx_disable_task(d, task): - pn = d.getVar('PN', True) - is_native = bb.data.inherits_class('native', d) or pn.endswith('-native') - is_blocked = pn in d.getVar('SPDX_BLACKLIST', True).split() - current_mc = d.getVar('BB_CURRENT_MC', True) - - if (is_native and current_mc != '') or is_blocked: - d.setVarFlag(task, 'noexec', '1') - -python () { - spdx_disable_task(d, 'do_create_spdx') -} - -def collect_package_providers(d): - from pathlib import Path - import oe_sbom.sbom - import oe_sbom.spdx - import json - - deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX", True)) - - providers = {} - - taskdepdata = d.getVar("BB_TASKDEPDATA", False) - deps = sorted(set( - dep[0] for dep in taskdepdata.values() if dep[0] != d.getVar("PN", True) - )) - deps.append(d.getVar("PN", True)) - - for dep_pn in deps: - recipe_data = oe_sbom.packagedata.read_pkgdata(dep_pn, d) - - for pkg in recipe_data.get("PACKAGES", "").split(): - - pkg_data = oe_sbom.packagedata.read_subpkgdata_dict(pkg, d) - rprovides = set(n for n, _ in bb.utils.explode_dep_versions2(pkg_data.get("RPROVIDES", "")).items()) - rprovides.add(pkg) - - for r in rprovides: - providers[r] = pkg - - return providers - -collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA" - -python do_create_runtime_spdx() { - from datetime import datetime, timezone - import oe_sbom.sbom - import oe_sbom.spdx - import oe_sbom.packagedata - from pathlib import Path - - deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX", True)) - spdx_deploy = Path(d.getVar("SPDXRUNTIMEDEPLOY", True)) - is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class("cross", d) - - creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - providers = collect_package_providers(d) - - if not is_native: - bb.build.exec_func("read_subpackage_metadata", d) - - dep_package_cache = {} - - pkgdest = Path(d.getVar("PKGDEST", True)) - for package in d.getVar("PACKAGES", True).split(): - localdata = bb.data.createCopy(d) - - pkg_name = get_final_pkg_name(d, package) - - localdata.setVar("PKG", pkg_name) - localdata.setVar('OVERRIDES', d.getVar("OVERRIDES", False) + ":" + package) - - if not oe_sbom.packagedata.packaged(package, localdata): - continue - - pkg_spdx_path = deploy_dir_spdx / "packages" / (pkg_name + ".spdx.json") - - package_doc, package_doc_sha1 = oe_sbom.sbom.read_doc(pkg_spdx_path) - - for p in package_doc.packages: - if p.name == pkg_name: - spdx_package = p - break - else: - bb.warn("Package '%s' not found in %s" % (pkg_name, pkg_spdx_path)) - spdx_package = oe_sbom.spdx.SPDXPackage() - spdx_package.SPDXID = ' ' - - runtime_doc = oe_sbom.spdx.SPDXDocument() - runtime_doc.name = "runtime-" + pkg_name - runtime_doc.documentNamespace = get_doc_namespace(localdata, runtime_doc) - runtime_doc.creationInfo.created = creation_time - runtime_doc.creationInfo.comment = "This document was created by analyzing package runtime dependencies." - runtime_doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA", True)["licenseListVersion"] - runtime_doc.creationInfo.creators.append("Tool: meta-wr-sbom") - runtime_doc.creationInfo.creators.append("Organization: Wind River Systems, Inc.") - - package_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - package_ref.externalDocumentId = "DocumentRef-package-" + package - package_ref.spdxDocument = package_doc.documentNamespace - package_ref.checksum.algorithm = "SHA1" - package_ref.checksum.checksumValue = package_doc_sha1 - - runtime_doc.externalDocumentRefs.append(package_ref) - - runtime_doc.add_relationship( - runtime_doc.SPDXID, - "AMENDS", - "%s:%s" % (package_ref.externalDocumentId, package_doc.SPDXID) - ) - - deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS", True) or "") - seen_deps = set() - for dep, _ in deps.items(): - if dep in seen_deps: - continue - - if dep not in providers.keys(): - continue - - dep = providers[dep] - - if not oe_sbom.packagedata.packaged(dep, localdata): - continue - - dep_pkg_data = oe_sbom.packagedata.read_subpkgdata_dict(dep, d) - dep_pkg = dep_pkg_data["PKG"] - - if dep in dep_package_cache: - (dep_spdx_package, dep_package_ref) = dep_package_cache[dep] - else: - dep_path = deploy_dir_spdx / "packages" / ("%s.spdx.json" % dep_pkg) - - if not os.path.exists(str(dep_path)): - continue - - spdx_dep_doc, spdx_dep_sha1 = oe_sbom.sbom.read_doc(dep_path) - - for pkg in spdx_dep_doc.packages: - if pkg.name == dep_pkg: - dep_spdx_package = pkg - break - else: - bb.warn("Package '%s' not found in %s" % (dep_pkg, dep_path)) - dep_spdx_package = oe_sbom.spdx.SPDXPackage() - dep_spdx_package.SPDXID = ' ' - - dep_package_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - dep_package_ref.externalDocumentId = "DocumentRef-runtime-dependency-" + spdx_dep_doc.name - dep_package_ref.spdxDocument = spdx_dep_doc.documentNamespace - dep_package_ref.checksum.algorithm = "SHA1" - dep_package_ref.checksum.checksumValue = spdx_dep_sha1 - - dep_package_cache[dep] = (dep_spdx_package, dep_package_ref) - - runtime_doc.externalDocumentRefs.append(dep_package_ref) - - runtime_doc.add_relationship( - "%s:%s" % (dep_package_ref.externalDocumentId, dep_spdx_package.SPDXID), - "RUNTIME_DEPENDENCY_OF", - "%s:%s" % (package_ref.externalDocumentId, spdx_package.SPDXID) - ) - seen_deps.add(dep) - - oe_sbom.sbom.write_doc(d, runtime_doc, "runtime", spdx_deploy) -} - -addtask do_create_runtime_spdx after do_create_spdx before do_build do_rm_work -SSTATETASKS += "do_create_runtime_spdx" -do_create_runtime_spdx[sstate-inputdirs] = "${SPDXRUNTIMEDEPLOY}" -do_create_runtime_spdx[sstate-outputdirs] = "${DEPLOY_DIR_SPDX}" -do_create_runtime_spdx[sstate-lockfile] = "${WORKDIR}/create_runtime_spdx_sstate.lock" - -python do_create_runtime_spdx_setscene () { - sstate_setscene(d) -} -addtask do_create_runtime_spdx_setscene - -do_create_runtime_spdx[dirs] = "${SPDXRUNTIMEDEPLOY}" -do_create_runtime_spdx[cleandirs] = "${SPDXRUNTIMEDEPLOY}" -do_create_runtime_spdx[rdeptask] = "do_create_spdx" -do_create_runtime_spdx[lockfiles] = "${SPDXRUNTIMEDEPLOY}/create_runtime_spdx.lock" - -python () { - spdx_disable_task(d, 'do_create_runtime_spdx') -} - -def spdx_get_src(d): - """ - save patched source of the recipe in SPDX_WORKDIR. - """ - import shutil - spdx_workdir = d.getVar('SPDXWORK', True) - spdx_sysroot_native = d.getVar('STAGING_DIR_NATIVE', True) - pn = d.getVar('PN', True) - - workdir = d.getVar("WORKDIR", True) - - try: - # The kernel class functions require it to be on work-shared, so we dont change WORKDIR - if not is_work_shared_spdx(d): - # Change the WORKDIR to make do_unpack do_patch run in another dir. - d.setVar('WORKDIR', spdx_workdir) - # Restore the original path to recipe's native sysroot (it's relative to WORKDIR). - d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native) - - # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the - # possibly requiring of the following tasks (such as some recipes's - # do_patch required 'B' existed). - bb.utils.mkdirhier(d.getVar('B', True)) - - bb.build.exec_func('do_unpack', d) - # Copy source of kernel to spdx_workdir - if is_work_shared_spdx(d): - d.setVar('WORKDIR', spdx_workdir) - d.setVar('STAGING_DIR_NATIVE', spdx_sysroot_native) - src_dir = spdx_workdir + "/" + d.getVar('PN', True)+ "-" + d.getVar('PV', True) + "-" + d.getVar('PR', True) - bb.utils.mkdirhier(src_dir) - if bb.data.inherits_class('kernel',d): - share_src = d.getVar('STAGING_KERNEL_DIR', True) - else: - share_src = d.getVar('S', True) - cmd_copy_share = "cp -rf " + share_src + "/* " + src_dir + "/" - cmd_copy_kernel_result = os.popen(cmd_copy_share).read() - bb.note("cmd_copy_kernel_result = " + cmd_copy_kernel_result) - - git_path = src_dir + "/.git" - if os.path.exists(git_path): - shutils.rmtree(git_path) - - # Make sure gcc and kernel sources are patched only once - if not (d.getVar('SRC_URI', True) == "" or is_work_shared_spdx(d)): - bb.build.exec_func('do_patch', d) - - # Some userland has no source. - if not os.path.exists( spdx_workdir ): - bb.utils.mkdirhier(spdx_workdir) - finally: - d.setVar("WORKDIR", workdir) - -do_rootfs[recrdeptask] += "do_create_spdx do_create_runtime_spdx" - -ROOTFS_POSTUNINSTALL_COMMAND =+ "image_packages_spdx ; " - -def get_yocto_codename(version): - yocto_version_to_codename = {"4.1": "Langdale", "4.0": "Kirkstone", "3.4": "Honister", "3.3": "Hardknott", "3.2": "Gatesgarth", "3.1": "Dunfell", "3.0": "Zeus", "2.7": "Warrior", "2.6": "Thud", "2.5": "Sumo", "2.4": "Rocko", "2.3": "Pyro", "2.2": "Morty", "2.1": "Krogoth", "2.0": "Jethro", "1.8": "Fido", "1.7": "Dizzy", "1.6": "Daisy", "1.5": "Dora", "1.4": "Dylan", "1.3": "Danny", "1.2": "Denzil", "1.1": "Edison", "1.0": "Bernard", "0.9": "Laverne"} - - for ver in yocto_version_to_codename.keys(): - if len(ver) > len(version): - continue - if ver == version[:len(ver)]: - return yocto_version_to_codename[ver] - -def get_yocto_version(bitbake_version): - bb_version_to_yocto_version = {"2.2": "4.1", "2.0": "4.0", "1.52": "3.4", "1.50": "3.3", "1.48": "3.2", "1.46": "3.1", "1.44": "3.0", "1.42": "2.7", "1.40": "2.6", "1.38": "2.5", "1.36": "2.4", "1.34": "2.3", "1.32": "2.2", "1.30": "2.1", "1.28": "2.0", "1.26": "1.8", "1.24": "1.7", "1.22": "1.6", "1.20": "1.5", "1.18": "1.4", "1.18": "1.4", "1.16": "1.3"} - - bb_ver = bitbake_version.split('.') - return bb_version_to_yocto_version[bb_ver[0]+'.'+bb_ver[1]] - -def make_image_link(imgdeploydir, image_link_name, target_path, suffix): - link = imgdeploydir / (image_link_name + suffix) - if link.exists(): - os.remove(str(link)) - link.symlink_to(os.path.relpath(str(target_path), str(link.parent))) - -def replace_name(name, substitutes): - if name in substitutes.keys(): - return substitutes[name] - else: - return name - -def is_CPE_on(d): - return d.getVar('SBOM_CPE', True) == "1" - -def is_PURL_on(d): - return d.getVar('SBOM_PURL', True) == "1" - -def is_license_on(d): - return d.getVar('SBOM_license', True) == "1" - -def is_externalDocumentRefs_on(d): - return d.getVar('SBOM_externalDocumentRefs', True) == "1" - -python image_packages_spdx() { - import os - import re - import oe_sbom.spdx - import oe_sbom.sbom - from oe.rootfs import image_list_installed_packages - from datetime import timezone, datetime - from pathlib import Path - - recipe_substitutes = {} - # replace it because CVE datasource use another package name - recipe_substitutes["linux-yocto"] = "linux" - - distro_substitues = {} - for distro in (d.getVar('SBOM_WRLINUX_DISTROS', True) or "").split(): - distro_substitues[distro] = "wrlinux" - - def get_pkgdata(pkg_name): - import oe.packagedata - - pkgdata_path = os.path.join(d.getVar('PKGDATA_DIR', True), 'runtime-reverse', pkg_name) - pkgdata = oe.packagedata.read_pkgdatafile(pkgdata_path) - return pkgdata - - def ltss_version_validate(ltss_version): - ltss_version_restrict = ['WRL.LTS.5.0.1', 'WRL.LTS.6.0', 'WRL.LTS.7.0', 'WRL.LTS.8.0', 'WRL.LTS.9.0', 'WRL.LTS.17', 'WRL.LTS.18', 'WRL.LTS.19'] - if ltss_version in ltss_version_restrict: - return True - else: - return False - - def collect_lics(pattern, lic_string, results): - lic_ids = re.findall(pattern, lic_string) - for lic_id in lic_ids: - if lic_id not in results.keys(): - results[lic_id] = {} - - def clear_spdxid_improper_char(s): - return re.sub('[^a-zA-Z0-9.-:]', '-', s) - - creation_time = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - image_name = d.getVar("IMAGE_NAME", True) - image_link_name = d.getVar("IMAGE_LINK_NAME", True) - - deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX", True)) - imgdeploydir = Path(d.getVar("IMGDEPLOYDIR", True)) - source_date_epoch = d.getVar("SOURCE_DATE_EPOCH", True) - - doc = oe_sbom.spdx.SPDXDocument() - doc.name = image_name - doc.documentNamespace = get_doc_namespace(d, doc) - doc.creationInfo.created = creation_time - doc.creationInfo.comment = "This document was created by collecting packages built into image." - doc.creationInfo.licenseListVersion = d.getVar("SPDX_LICENSE_DATA", True)["licenseListVersion"] - doc.creationInfo.creators.append("Tool: meta-wr-sbom") - doc.creationInfo.creators.append("Organization: Wind River Systems, Inc.") - if 'Yocto' in d.getVar("DISTRO_NAME", True): - doc.comment = "DISTRO: " + "Yocto-" + get_yocto_codename(d.getVar("DISTRO_VERSION", True)) + "-" + d.getVar("DISTRO_VERSION", True) - image_supplier = "Organization: OpenEmbedded ()" - elif 'Wind River' in d.getVar("DISTRO_NAME", True): - doc.comment = "DISTRO: " + "WRLinux-" + d.getVar("DISTRO_VERSION", True) - image_supplier = "Organization: Wind River Systems, Inc." - else: - wr_version = d.getVar("WRLINUX_VERSION", True) - if wr_version: - doc.comment = "DISTRO: " + "WRLinux-" + wr_version - image_supplier = "Organization: Wind River Systems, Inc." - else: - bb_version = d.getVar("BB_VERSION", True) - yocto_version = get_yocto_version(bb_version) - doc.comment = "DISTRO: " + "Yocto-" + get_yocto_codename(yocto_version) + "-" + yocto_version - image_supplier = "Organization: OpenEmbedded ()" - - D_name = d.getVar("DISTRO_NAME", True).strip().replace(" ", "_") - if D_name: - doc.comment += " CUSTOMIZED_DISTRO: " + D_name + '-' + d.getVar("DISTRO_VERSION", True) - else: - doc.comment += " CUSTOMIZED_DISTRO: Unknown-" + d.getVar("DISTRO_VERSION", True) - doc.comment += " ARCH: " + d.getVar("MACHINE_ARCH", True) - doc.comment += " PROJECT_LABELS: " + str(d.getVar("PROJECT_LABELS", True)) - doc.comment += " PROJECT_RELEASETIME: " + str(d.getVar("PROJECT_RELEASETIME", True)) - - ltss_version = d.getVar("LTSS_VERSION", True) - if ltss_version: - if ltss_version_validate(ltss_version): - doc.comment += " LTSS_VERSION: " + str(d.getVar("LTSS_VERSION", True)) - else: - doc.comment += " LTSS_VERSION: mismatch" - bb.warn("LTSS_VERSION value is not in the regular list.") - doc.documentDescribes.append("SPDXRef-Image-" + d.getVar("IMAGE_NAME", True)) - - image = oe_sbom.spdx.SPDXPackage() - image.name = d.getVar("PN", True) - image.versionInfo = d.getVar("EXTENDPKGV", True) - image.SPDXID = clear_spdxid_improper_char(oe_sbom.sbom.get_image_spdxid(image_name)) - image.supplier = image_supplier - - doc.packages.append(image) - - os_package = oe_sbom.spdx.SPDXPackage() - os_package.name = get_distro_type(d) - os_package.versionInfo = d.getVar("DISTRO_VERSION", True) - os_package.SPDXID = clear_spdxid_improper_char(oe_sbom.sbom.get_os_spdxid(image_name)) - os_package.supplier = image_supplier - - doc.packages.append(os_package) - - doc.add_relationship(doc, "DESCRIBES", "%s" % image.SPDXID) - doc.add_relationship(image, "CONTAINS", "%s" % os_package.SPDXID) - - spdx_package = oe_sbom.spdx.SPDXPackage() - - packages = image_list_installed_packages(d) - recipes = {} - externaldocrefs = set() - user_defined_licenses = {} - user_defined_licenses_extracted = {} - pattern_docref_recipe = r'DocumentRef-recipe-.*\:' - pattern_licref = r'LicenseRef-[a-zA-Z0-9.-]+' - - def collect_dep_relationships(spdx_file_path, relationship_type): - spdx_doc, spdx_doc_sha1 = oe_sbom.sbom.read_doc(spdx_file_path) - - for r in spdx_doc.relationships: - if r.relationshipType == relationship_type: - if relationship_type == "RUNTIME_DEPENDENCY_OF": - r.relatedSpdxElement = clear_spdxid_improper_char(r.relatedSpdxElement.split(":")[1]) - # the runtime depend packages Ref must exists in local doc - r.spdxElementId = clear_spdxid_improper_char(r.spdxElementId.split(":")[1]) - doc.relationships.append(r) - - continue - - elif relationship_type == "BUILD_DEPENDENCY_OF": - r.relatedSpdxElement = clear_spdxid_improper_char("%s:%s" % (r.relatedSpdxElement.replace('SPDXRef-Recipe', 'DocumentRef-recipe'), r.relatedSpdxElement)) - related_doc_ref = r.spdxElementId.split(":")[0] - r.spdxElementId = clear_spdxid_improper_char(r.spdxElementId.replace("dependency-", "")) - for chk_dup in doc.relationships: - if chk_dup.spdxElementId == r.spdxElementId and chk_dup.relatedSpdxElement == r.relatedSpdxElement: - break - else: - doc.relationships.append(r) - elif relationship_type == "GENERATED_FROM": - if r.spdxElementId.startswith("SPDXRef-Package-"): - related_doc_ref = r.relatedSpdxElement.split(":")[0] - r.relatedSpdxElement = clear_spdxid_improper_char(r.relatedSpdxElement) - r.spdxElementId = clear_spdxid_improper_char(r.spdxElementId) - doc.relationships.append(r) - - for ed in spdx_doc.externalDocumentRefs: - if ed.externalDocumentId == related_doc_ref: - ed.externalDocumentId = clear_spdxid_improper_char(ed.externalDocumentId.replace("dependency-", "")) - if ed.externalDocumentId not in externaldocrefs: - doc.externalDocumentRefs.append(ed) - externaldocrefs.add(ed.externalDocumentId) - break - - kernel_recipe = d.getVar("PREFERRED_PROVIDER_virtual/kernel", True) - - for name in sorted(packages.keys()): - # Keep only one kernel package, filter out module packages. - pkgdata = get_pkgdata(name) - if pkgdata["PN"] == kernel_recipe: - if kernel_recipe in recipes.keys(): - continue - - pkg_spdx_path = deploy_dir_spdx / "packages" / (name + ".spdx.json") - rcp_spdx_path = deploy_dir_spdx / "recipes" / ("recipe-" + pkgdata["PN"] + ".spdx.json") - runtime_pkg_spdx_path = deploy_dir_spdx / "runtime" / ("runtime-" + name + ".spdx.json") - if not os.path.exists(str(pkg_spdx_path)): - bb.warn("Unable to find package SPDX file %s" % pkg_spdx_path) - continue - pkg_doc, pkg_doc_sha1 = oe_sbom.sbom.read_doc(pkg_spdx_path) - - for p in pkg_doc.packages: - if p.name == name: - pkg_ref = oe_sbom.spdx.SPDXExternalDocumentRef() - pkg_ref.externalDocumentId = clear_spdxid_improper_char("DocumentRef-package-%s" % pkg_doc.name) - pkg_ref.spdxDocument = pkg_doc.documentNamespace - pkg_ref.checksum.algorithm = "SHA1" - pkg_ref.checksum.checksumValue = pkg_doc_sha1 - - if is_externalDocumentRefs_on(d): - doc.externalDocumentRefs.append(pkg_ref) - - doc.add_relationship("%s" % os_package.SPDXID, "CONTAINS", "%s" % clear_spdxid_improper_char(p.SPDXID)) - collect_dep_relationships(pkg_spdx_path, "GENERATED_FROM") - collect_dep_relationships(rcp_spdx_path, "BUILD_DEPENDENCY_OF") - collect_dep_relationships(runtime_pkg_spdx_path, "RUNTIME_DEPENDENCY_OF") - - component_package = oe_sbom.spdx.SPDXPackage() - component_package.name = p.name - if pkgdata["PN"] == kernel_recipe: - component_package.name = "kernel" - component_package.SPDXID = clear_spdxid_improper_char(p.SPDXID) - - if (not "PR" in pkgdata.keys()) or (not pkgdata["PR"]): - component_package.versionInfo = pkgdata["PV"] - else: - component_package.versionInfo = pkgdata["PV"] + "-" + pkgdata["PR"] - - # Use downloadLocation from package spdx file, because downloadLocation - # from recipe spdx file may refers to local path - component_package.downloadLocation = p.downloadLocation - - if is_license_on(d): - # Not use license from recipe spdx file because it combine multiple - # package licenses into one, it is wrong for package. - # Use license from package spdx file may bring "DocumentRef" - # into licenseDeclared and licenseConcluded, it violates the spdx - # standard, so remove "DocumentRef" - component_package.licenseConcluded = re.sub(pattern_docref_recipe, "", p.licenseConcluded) - collect_lics(pattern_licref, component_package.licenseConcluded, user_defined_licenses) - component_package.licenseDeclared = re.sub(pattern_docref_recipe, "", p.licenseDeclared) - collect_lics(pattern_licref, component_package.licenseDeclared, user_defined_licenses) - - component_package.copyrightText = p.copyrightText - component_package.supplier = "Organization: OpenEmbedded ()" - source_name = replace_name(pkgdata["PN"], recipe_substitutes) - component_package.sourceInfo = "built package from: " + source_name + " " + component_package.versionInfo - - if pkgdata["PN"] not in recipes.keys(): - recipes[pkgdata["PN"]] = [] - recipes[pkgdata["PN"]].append(p.SPDXID) - - if is_PURL_on(d): - purl = oe_sbom.spdx.SPDXExternalReference() - purl.referenceCategory = "PACKAGE-MANAGER" - purl.referenceType = "purl" - purl.referenceLocator = ("pkg:rpm/" + os_package.name + "/" + - component_package.name + "@" + component_package.versionInfo + - "?arch=" + d.getVar("MACHINE_ARCH", True) + "&distro=" + os_package.name + "-" + os_package.versionInfo) - if d.getVar("PROJECT_LABELS", True): - purl.referenceLocator += "&labels=" + str(d.getVar("PROJECT_LABELS", True)) - if d.getVar("LTSS_VERSION", True): - purl.referenceLocator += "<ssVersion=" + str(d.getVar("LTSS_VERSION", True)) - component_package.externalRefs.append(purl) - - doc.packages.append(component_package) - break - else: - bb.warn("Unable to find package with name '%s' in SPDX file %s" % (name, pkg_spdx_path)) - - if is_license_on(d) or is_CPE_on(d): - for name in recipes.keys(): - recipe_spdx_path = deploy_dir_spdx / "recipes" / ("recipe-" + name + ".spdx.json") - if os.path.exists(str(recipe_spdx_path)): - recipe_doc, recipe_doc_sha1 = oe_sbom.sbom.read_doc(recipe_spdx_path) - if is_license_on(d): - # append other licensing information detected section - for licensingInfo in recipe_doc.hasExtractedLicensingInfos: - if (licensingInfo.licenseId in user_defined_licenses.keys() and - licensingInfo.licenseId not in user_defined_licenses_extracted.keys()): - doc.hasExtractedLicensingInfos.append(licensingInfo) - user_defined_licenses_extracted[licensingInfo.licenseId] = {} - - if is_CPE_on(d): - # append CPEs - for package_r in recipe_doc.packages: - for externalRef in package_r.externalRefs: - if externalRef.referenceCategory == "SECURITY": - for package in doc.packages: - if package.SPDXID in recipes[name]: - package.externalRefs.append(externalRef) - - image_spdx_path = imgdeploydir / (image_name + ".spdx.json") - - with image_spdx_path.open("wb") as f: - doc.to_json(f, sort_keys=True) - - make_image_link(imgdeploydir, image_link_name, image_spdx_path, ".spdx.json") -} diff --git a/classes/spdx-common.bbclass b/classes/spdx-common.bbclass new file mode 100644 index 0000000..b31e549 --- /dev/null +++ b/classes/spdx-common.bbclass @@ -0,0 +1,107 @@ +# +# Copyright OpenEmbedded Contributors +# +# SPDX-License-Identifier: GPL-2.0-only +# + +SPDX_VERSION ??= "" +DEPLOY_DIR_SPDX ??= "${DEPLOY_DIR}/spdx/${SPDX_VERSION}" + +# The product name that the CVE database uses. Defaults to BPN, but may need to +# be overriden per recipe (for example tiff.bb sets CVE_PRODUCT=libtiff). +CVE_PRODUCT ??= "${BPN}" +CVE_VERSION ??= "${PV}" + +SPDXDIR ??= "${WORKDIR}/spdx/${SPDX_VERSION}" +SPDXDEPLOY = "${SPDXDIR}/deploy" +SPDXWORK = "${SPDXDIR}/work" +SPDXIMAGEWORK = "${SPDXDIR}/image-work" +SPDXSDKWORK = "${SPDXDIR}/sdk-work" +SPDXSDKEXTWORK = "${SPDXDIR}/sdk-ext-work" +SPDXDEPS = "${SPDXDIR}/deps.json" + +SPDX_TOOL_NAME ??= "oe-spdx-creator" +SPDX_TOOL_VERSION ??= "1.0" + +SPDXRUNTIMEDEPLOY = "${SPDXDIR}/runtime-deploy" + +SPDX_INCLUDE_SOURCES ??= "0" +SPDX_INCLUDE_COMPILED_SOURCES ??= "0" + +SPDX_UUID_NAMESPACE ??= "sbom.openembedded.org" +SPDX_NAMESPACE_PREFIX ??= "http://spdx.org/spdxdocs" +SPDX_PRETTY ??= "0" + +SPDX_LICENSES ??= "${WRSBOM_LAYER}/meta/files/spdx-licenses.json" + +SPDX_CUSTOM_ANNOTATION_VARS ??= "" + +SPDX_MULTILIB_SSTATE_ARCHS ??= "${SSTATE_ARCHS}" + +python () { + from oe_sbom.cve_check import extend_cve_status + extend_cve_status(d) + if d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1": + d.setVar("SPDX_INCLUDE_SOURCES", "1") +} + +def create_spdx_source_deps(d): + import oe_sbom.spdx_common + + deps = [] + if d.getVar("SPDX_INCLUDE_SOURCES") == "1": + pn = d.getVar('PN') + # do_unpack is a hack for now; we only need it to get the + # dependencies do_unpack already has so we can extract the source + # ourselves + if oe_sbom.spdx_common.has_task(d, "do_unpack"): + deps.append("%s:do_unpack" % pn) + + if oe_sbom.spdx_common.is_work_shared_spdx(d) and \ + oe_sbom.spdx_common.process_sources(d): + # For kernel source code + if oe_sbom.spdx_common.has_task(d, "do_shared_workdir"): + deps.append("%s:do_shared_workdir" % pn) + elif d.getVar('S') == d.getVar('STAGING_KERNEL_DIR'): + deps.append("virtual/kernel:do_shared_workdir") + + # For gcc-source-${PV} source code + if oe_sbom.spdx_common.has_task(d, "do_preconfigure"): + deps.append("%s:do_preconfigure" % pn) + elif oe_sbom.spdx_common.has_task(d, "do_patch"): + deps.append("%s:do_patch" % pn) + # For gcc-cross-x86_64 source code + elif oe_sbom.spdx_common.has_task(d, "do_configure"): + deps.append("%s:do_configure" % pn) + + return " ".join(deps) + + +python do_collect_spdx_deps() { + # This task calculates the build time dependencies of the recipe, and is + # required because while a task can deptask on itself, those dependencies + # do not show up in BB_TASKDEPDATA. To work around that, this task does the + # deptask on do_create_spdx and writes out the dependencies it finds, then + # do_create_spdx reads in the found dependencies when writing the actual + # SPDX document + import json + import oe_sbom.spdx_common + from pathlib import Path + + spdx_deps_file = Path(d.getVar("SPDXDEPS")) + + deps = oe_sbom.spdx_common.collect_direct_deps(d, "do_create_spdx") + + with spdx_deps_file.open("w") as f: + json.dump(deps, f) +} +# NOTE: depending on do_unpack is a hack that is necessary to get it's dependencies for archive the source +addtask do_collect_spdx_deps after do_unpack +do_collect_spdx_deps[depends] += "${PATCHDEPENDENCY}" +do_collect_spdx_deps[deptask] = "do_create_spdx" +do_collect_spdx_deps[dirs] = "${SPDXDIR}" + +oe_sbom.spdx_common.collect_direct_deps[vardepsexclude] += "BB_TASKDEPDATA" +oe_sbom.spdx_common.collect_direct_deps[vardeps] += "DEPENDS" +oe_sbom.spdx_common.collect_package_providers[vardepsexclude] += "BB_TASKDEPDATA" +oe_sbom.spdx_common.get_patched_src[vardepsexclude] += "STAGING_KERNEL_DIR" \ No newline at end of file diff --git a/conf/layer.conf b/conf/layer.conf index 8393cb4..2d44b27 100644 --- a/conf/layer.conf +++ b/conf/layer.conf @@ -31,4 +31,4 @@ SBOM_externalDocumentRefs ?= "0" # DISTRO names in this variable will be changed to "wrlinux" SBOM_WRLINUX_DISTROS ?= "wrlinux wrlinux-tiny wrlinux-installer wrlinux-graphics wrlinux-ovp wrlinux-cgl" -INHERIT += 'sls-create-spdx' +INHERIT += 'create-spdx-2.2' diff --git a/lib/oe_sbom/cve_check.py b/lib/oe_sbom/cve_check.py index d82519e..96be3c0 100644 --- a/lib/oe_sbom/cve_check.py +++ b/lib/oe_sbom/cve_check.py @@ -5,9 +5,11 @@ # import collections -import re -import itertools import functools +import itertools +import os.path +import re +import oe.patch _Version = collections.namedtuple( "_Version", ["release", "patch_l", "pre_l", "pre_v"] @@ -71,63 +73,134 @@ def _cmpkey(release, patch_l, pre_l, pre_v): return _release, _patch, _pre -def get_patched_cves(d): +def parse_cve_from_filename(patch_filename): """ - Get patches that solve CVEs using the "CVE: " tag. + Parses CVE ID from the filename + + Matches the last "CVE-YYYY-ID" in the file name, also if written + in lowercase. Possible to have multiple CVE IDs in a single + file name, but only the last one will be detected from the file name. + + Returns the last CVE ID foudn in the filename. If no CVE ID is found + an empty string is returned. """ + cve_file_name_match = re.compile(r".*(CVE-\d{4}-\d{4,})", re.IGNORECASE) - import re - import oe_sbom.patch + # Check patch file name for CVE ID + fname_match = cve_file_name_match.search(patch_filename) + return fname_match.group(1).upper() if fname_match else "" - pn = d.getVar("PN", True) - cve_match = re.compile("CVE:( CVE\-\d{4}\-\d+)+") - # Matches the last "CVE-YYYY-ID" in the file name, also if written - # in lowercase. Possible to have multiple CVE IDs in a single - # file name, but only the last one will be detected from the file name. - # However, patch files contents addressing multiple CVE IDs are supported - # (cve_match regular expression) +def parse_cves_from_patch_contents(patch_contents): + """ + Parses CVE IDs from patch contents - cve_file_name_match = re.compile(".*([Cc][Vv][Ee]\-\d{4}\-\d+)") + Matches all CVE IDs contained on a line that starts with "CVE: ". Any + delimiter (',', '&', "and", etc.) can be used without any issues. Multiple + "CVE:" lines can also exist. - patched_cves = set() - bb.debug(2, "Looking for patches that solves CVEs for %s" % pn) - for url in oe_sbom.patch.src_patches(d): - patch_file = bb.fetch.decodeurl(url)[2] + Returns a set of all CVE IDs found in the patch contents. + """ + cve_ids = set() + cve_match = re.compile(r"CVE-\d{4}-\d{4,}") + # Search for one or more "CVE: " lines + for line in patch_contents.split("\n"): + if not line.startswith("CVE:"): + continue + cve_ids.update(cve_match.findall(line)) + return cve_ids - if not os.path.isfile(patch_file): - bb.error("File Not found: %s" % patch_file) - raise FileNotFoundError - # Check patch file name for CVE ID - fname_match = cve_file_name_match.search(patch_file) - if fname_match: - cve = fname_match.group(1).upper() - patched_cves.add(cve) - bb.debug(2, "Found CVE %s from patch file name %s" % (cve, patch_file)) +def parse_cves_from_patch_file(patch_file): + """ + Parses CVE IDs associated with a particular patch file, using both the filename + and patch contents. - with open(patch_file, "r", encoding="utf-8") as f: - try: + Returns a set of all CVE IDs found in the patch filename and contents. + """ + cve_ids = set() + filename_cve = parse_cve_from_filename(patch_file) + if filename_cve: + bb.debug(2, "Found %s from patch file name %s" % (filename_cve, patch_file)) + cve_ids.add(parse_cve_from_filename(patch_file)) + + # Remote patches won't be present and compressed patches won't be + # unpacked, so say we're not scanning them + if not os.path.isfile(patch_file): + bb.note("%s is remote or compressed, not scanning content" % patch_file) + return cve_ids + + with open(patch_file, "r", encoding="utf-8") as f: + try: + patch_text = f.read() + except UnicodeDecodeError: + bb.debug( + 1, + "Failed to read patch %s using UTF-8 encoding" + " trying with iso8859-1" % patch_file, + ) + f.close() + with open(patch_file, "r", encoding="iso8859-1") as f: patch_text = f.read() - except UnicodeDecodeError: - bb.debug(1, "Failed to read patch %s using UTF-8 encoding" - " trying with iso8859-1" % patch_file) - f.close() - with open(patch_file, "r", encoding="iso8859-1") as f: - patch_text = f.read() - - # Search for one or more "CVE: " lines - text_match = False - for match in cve_match.finditer(patch_text): - # Get only the CVEs without the "CVE: " tag - cves = patch_text[match.start()+5:match.end()] - for cve in cves.split(): - bb.debug(2, "Patch %s solves %s" % (patch_file, cve)) - patched_cves.add(cve) - text_match = True - - if not fname_match and not text_match: - bb.debug(2, "Patch %s doesn't solve CVEs" % patch_file) + + cve_ids.update(parse_cves_from_patch_contents(patch_text)) + + if not cve_ids: + bb.debug(2, "Patch %s doesn't solve CVEs" % patch_file) + else: + bb.debug(2, "Patch %s solves %s" % (patch_file, ", ".join(sorted(cve_ids)))) + + return cve_ids + + +@bb.parse.vardeps("CVE_STATUS") +def get_patched_cves(d): + """ + Determines the CVE IDs that have been solved by either patches incuded within + SRC_URI or by setting CVE_STATUS. + + Returns a dictionary with the CVE IDs as keys and an associated dictonary of + relevant metadata as the value. + """ + patched_cves = {} + patches = oe.patch.src_patches(d) + bb.debug(2, "Scanning %d patches for CVEs" % len(patches)) + + # Check each patch file + for url in patches: + patch_file = bb.fetch.decodeurl(url)[2] + for cve_id in parse_cves_from_patch_file(patch_file): + if cve_id not in patched_cves: + patched_cves[cve_id] = { + "abbrev-status": "Patched", + "status": "fix-file-included", + "resource": [patch_file], + } + else: + patched_cves[cve_id]["resource"].append(patch_file) + + # Search for additional patched CVEs + for cve_id in d.getVarFlags("CVE_STATUS") or {}: + decoded_status = decode_cve_status(d, cve_id) + products = d.getVar("CVE_PRODUCT") + if has_cve_product_match(decoded_status, products): + if cve_id in patched_cves: + bb.warn( + 'CVE_STATUS[%s] = "%s" is overwriting previous status of "%s: %s"' + % ( + cve_id, + d.getVarFlag("CVE_STATUS", cve_id), + patched_cves[cve_id]["abbrev-status"], + patched_cves[cve_id]["status"], + ) + ) + patched_cves[cve_id] = { + "abbrev-status": decoded_status["mapping"], + "status": decoded_status["detail"], + "justification": decoded_status["description"], + "affected-vendor": decoded_status["vendor"], + "affected-product": decoded_status["product"], + } return patched_cves @@ -148,7 +221,158 @@ def get_cpe_ids(cve_product, version): else: vendor = "*" - cpe_id = 'cpe:2.3:a:%s:%s:%s:*:*:*:*:*:*:*' % (vendor, product, version) + cpe_id = 'cpe:2.3:*:{}:{}:{}:*:*:*:*:*:*:*'.format(vendor, product, version) cpe_ids.append(cpe_id) return cpe_ids + +def cve_check_merge_jsons(output, data): + """ + Merge the data in the "package" property to the main data file + output + """ + if output["version"] != data["version"]: + bb.error("Version mismatch when merging JSON outputs") + return + + for product in output["package"]: + if product["name"] == data["package"][0]["name"]: + bb.error("Error adding the same package %s twice" % product["name"]) + return + + output["package"].append(data["package"][0]) + +def update_symlinks(target_path, link_path): + """ + Update a symbolic link link_path to point to target_path. + Remove the link and recreate it if exist and is different. + """ + if link_path != target_path and os.path.exists(target_path): + if os.path.exists(os.path.realpath(link_path)): + os.remove(link_path) + os.symlink(os.path.basename(target_path), link_path) + + +def convert_cve_version(version): + """ + This function converts from CVE format to Yocto version format. + eg 8.3_p1 -> 8.3p1, 6.2_rc1 -> 6.2-rc1 + + Unless it is redefined using CVE_VERSION in the recipe, + cve_check uses the version in the name of the recipe (${PV}) + to check vulnerabilities against a CVE in the database downloaded from NVD. + + When the version has an update, i.e. + "p1" in OpenSSH 8.3p1, + "-rc1" in linux kernel 6.2-rc1, + the database stores the version as version_update (8.3_p1, 6.2_rc1). + Therefore, we must transform this version before comparing to the + recipe version. + + In this case, the parameter of the function is 8.3_p1. + If the version uses the Release Candidate format, "rc", + this function replaces the '_' by '-'. + If the version uses the Update format, "p", + this function removes the '_' completely. + """ + import re + + matches = re.match('^([0-9.]+)_((p|rc)[0-9]+)$', version) + + if not matches: + return version + + version = matches.group(1) + update = matches.group(2) + + if matches.group(3) == "rc": + return version + '-' + update + + return version + update + +@bb.parse.vardeps("CVE_STATUS", "CVE_CHECK_STATUSMAP") +def decode_cve_status(d, cve): + """ + Convert CVE_STATUS into status, vendor, product, detail and description. + """ + status = d.getVarFlag("CVE_STATUS", cve) + if not status: + return {} + + status_split = status.split(':', 4) + status_out = {} + status_out["detail"] = status_split[0] + product = "*" + vendor = "*" + description = "" + if len(status_split) >= 4 and status_split[1].strip() == "cpe": + # Both vendor and product are mandatory if cpe: present, the syntax is then: + # detail: cpe:vendor:product:description + vendor = status_split[2].strip() + product = status_split[3].strip() + description = status_split[4].strip() + elif len(status_split) >= 2 and status_split[1].strip() == "cpe": + # Malformed CPE + bb.warn( + 'Invalid CPE information for CVE_STATUS[%s] = "%s", not setting CPE' + % (cve, status) + ) + else: + # Other case: no CPE, the syntax is then: + # detail: description + description = status.split(':', 1)[1].strip() if (len(status_split) > 1) else "" + + status_out["vendor"] = vendor + status_out["product"] = product + status_out["description"] = description + + detail = status_out["detail"] + status_mapping = d.getVarFlag("CVE_CHECK_STATUSMAP", detail) + if status_mapping is None: + bb.warn( + 'Invalid detail "%s" for CVE_STATUS[%s] = "%s", fallback to Unpatched' + % (detail, cve, status) + ) + status_mapping = "Unpatched" + status_out["mapping"] = status_mapping + + return status_out + +def has_cve_product_match(detailed_status, products): + """ + Check product/vendor match between detailed_status from decode_cve_status and a string of + products (like from CVE_PRODUCT) + """ + for product in products.split(): + vendor = "*" + if ":" in product: + vendor, product = product.split(":", 1) + + if (vendor == detailed_status["vendor"] or detailed_status["vendor"] == "*") and \ + (product == detailed_status["product"] or detailed_status["product"] == "*"): + return True + + #if no match, return False + return False + +def extend_cve_status(d): + # do this only once in case multiple classes use this + if d.getVar("CVE_STATUS_EXTENDED"): + return + d.setVar("CVE_STATUS_EXTENDED", "1") + + # Fallback all CVEs from CVE_CHECK_IGNORE to CVE_STATUS + cve_check_ignore = d.getVar("CVE_CHECK_IGNORE") + if cve_check_ignore: + bb.warn("CVE_CHECK_IGNORE is deprecated in favor of CVE_STATUS") + for cve in (d.getVar("CVE_CHECK_IGNORE") or "").split(): + d.setVarFlag("CVE_STATUS", cve, "ignored") + + # Process CVE_STATUS_GROUPS to set multiple statuses and optional detail or description at once + for cve_status_group in (d.getVar("CVE_STATUS_GROUPS") or "").split(): + cve_group = d.getVar(cve_status_group) + if cve_group is not None: + for cve in cve_group.split(): + d.setVarFlag("CVE_STATUS", cve, d.getVarFlag(cve_status_group, "status")) + else: + bb.warn("CVE_STATUS_GROUPS contains undefined variable %s" % cve_status_group) \ No newline at end of file diff --git a/lib/oe_sbom/packagedata.py b/lib/oe_sbom/packagedata.py deleted file mode 100644 index 10a07f8..0000000 --- a/lib/oe_sbom/packagedata.py +++ /dev/null @@ -1,139 +0,0 @@ -# -# Copyright OpenEmbedded Contributors -# -# SPDX-License-Identifier: GPL-2.0-only -# - -import codecs -import os - -def packaged(pkg, d): - return os.access(get_subpkgedata_fn(pkg, d) + '.packaged', os.R_OK) - -def read_pkgdatafile(fn, d): - pkgdata = {} - - def decode(str): - c = codecs.getdecoder("unicode_escape") - return c(str)[0] - - if os.access(fn, os.R_OK): - import re - with open(fn, 'r') as f: - lines = f.readlines() - - distro_ver = d.getVar("DISTRO_VERSION", True) - if 'Wind River' in d.getVar("DISTRO_NAME", True): - if (distro_ver.split('.')[0] == '10') and (distro_ver.split('.')[1] > '21'): - r = re.compile(r"(^.+?):\s+(.*)") - else: - r = re.compile("([^:]+):\s*(.*)") - else: - if d.getVar("BB_VERSION", True) > '1.50.0': - r = re.compile(r"(^.+?):\s+(.*)") - else: - r = re.compile("([^:]+):\s*(.*)") - - for l in lines: - m = r.match(l) - if m: - pkgdata[m.group(1)] = decode(m.group(2)) - - return pkgdata - -def get_subpkgedata_fn(pkg, d): - return d.expand('${PKGDATA_DIR}/runtime/%s' % pkg) - -def has_subpkgdata(pkg, d): - return os.access(get_subpkgedata_fn(pkg, d), os.R_OK) - -def read_subpkgdata(pkg, d): - return read_pkgdatafile(get_subpkgedata_fn(pkg, d), d) - -def has_pkgdata(pn, d): - fn = d.expand('${PKGDATA_DIR}/%s' % pn) - return os.access(fn, os.R_OK) - -def read_pkgdata(pn, d): - fn = d.expand('${PKGDATA_DIR}/%s' % pn) - return read_pkgdatafile(fn, d) - -# -# Collapse FOO_pkg variables into FOO -# -def read_subpkgdata_dict(pkg, d): - ret = {} - subd = read_pkgdatafile(get_subpkgedata_fn(pkg, d), d) - for var in subd: - distro_ver = d.getVar("DISTRO_VERSION", True) - if 'Wind River' in d.getVar("DISTRO_NAME", True): - if (distro_ver.split('.')[0] == '10') and (distro_ver.split('.')[1] > '21'): - newvar = var.replace(":" + pkg, "") - if newvar == var and var + ":" + pkg in subd: - continue - else: - newvar = var.replace("_" + pkg, "") - if newvar == var and var + "_" + pkg in subd: - continue - else: - if d.getVar("BB_VERSION", True) > '1.50.0': - newvar = var.replace(":" + pkg, "") - if newvar == var and var + "_" + pkg in subd: - continue - else: - newvar = var.replace("_" + pkg, "") - if newvar == var and var + "_" + pkg in subd: - continue - - ret[newvar] = subd[var] - return ret - -def read_subpkgdata_extended(pkg, d): - import json - - fn = d.expand("${PKGDATA_DIR}/extended/%s.json" % pkg) - try: - with open(fn, "rt", encoding="utf-8") as f: - return json.load(f) - except FileNotFoundError: - return None - -def _pkgmap(d): - """Return a dictionary mapping package to recipe name.""" - - pkgdatadir = d.getVar("PKGDATA_DIR") - - pkgmap = {} - try: - files = os.listdir(pkgdatadir) - except OSError: - bb.warn("No files in %s?" % pkgdatadir) - files = [] - - for pn in [f for f in files if not os.path.isdir(os.path.join(pkgdatadir, f))]: - try: - pkgdata = read_pkgdatafile(os.path.join(pkgdatadir, pn), d) - except OSError: - continue - - packages = pkgdata.get("PACKAGES") or "" - for pkg in packages.split(): - pkgmap[pkg] = pn - - return pkgmap - -def pkgmap(d): - """Return a dictionary mapping package to recipe name. - Cache the mapping in the metadata""" - - pkgmap_data = d.getVar("__pkgmap_data", False) - if pkgmap_data is None: - pkgmap_data = _pkgmap(d) - d.setVar("__pkgmap_data", pkgmap_data) - - return pkgmap_data - -def recipename(pkg, d): - """Return the recipe name for the given binary package name.""" - - return pkgmap(d).get(pkg) diff --git a/lib/oe_sbom/patch.py b/lib/oe_sbom/patch.py index 9b721f3..7f8b6c1 100644 --- a/lib/oe_sbom/patch.py +++ b/lib/oe_sbom/patch.py @@ -4,9 +4,11 @@ # SPDX-License-Identifier: GPL-2.0-only # +import os +import shlex +import subprocess import oe.path import oe.types -import subprocess class NotFoundError(bb.BBHandledException): def __init__(self, path): @@ -27,8 +29,6 @@ def __str__(self): def runcmd(args, dir = None): - import pipes - if dir: olddir = os.path.abspath(os.curdir) if not os.path.exists(dir): @@ -37,7 +37,7 @@ def runcmd(args, dir = None): # print("cwd: %s -> %s" % (olddir, dir)) try: - args = [ pipes.quote(str(arg)) for arg in args ] + args = [ shlex.quote(str(arg)) for arg in args ] cmd = " ".join(args) # print("cmd: %s" % cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) @@ -217,7 +217,7 @@ def _removePatchFile(self, all = False): with open(self.seriespath, 'w') as f: for p in patches: f.write(p) - + def Import(self, patch, force = None): """""" PatchSet.Import(self, patch, force) @@ -294,26 +294,32 @@ def Clean(self): self.Pop(all=True) class GitApplyTree(PatchTree): - patch_line_prefix = '%% original patch' - ignore_commit_prefix = '%% ignore' + notes_ref = "refs/notes/devtool" + original_patch = 'original patch' + ignore_commit = 'ignore' def __init__(self, dir, d): PatchTree.__init__(self, dir, d) - self.commituser = d.getVar('PATCH_GIT_USER_NAME', True) - self.commitemail = d.getVar('PATCH_GIT_USER_EMAIL', True) - if not self._isInitialized(): + self.commituser = d.getVar('PATCH_GIT_USER_NAME') + self.commitemail = d.getVar('PATCH_GIT_USER_EMAIL') + if not self._isInitialized(d): self._initRepo() - def _isInitialized(self): + def _isInitialized(self, d): cmd = "git rev-parse --show-toplevel" - (status, output) = subprocess.getstatusoutput(cmd.split()) - ## Make sure repo is in builddir to not break top-level git repos - return status == 0 and os.path.samedir(output, self.dir) + try: + output = runcmd(cmd.split(), self.dir).strip() + except CmdError as err: + ## runcmd returned non-zero which most likely means 128 + ## Not a git directory + return False + ## Make sure repo is in builddir to not break top-level git repos, or under workdir + return os.path.samefile(output, self.dir) or oe.path.is_path_parent(d.getVar('WORKDIR'), output) def _initRepo(self): runcmd("git init".split(), self.dir) runcmd("git add .".split(), self.dir) - runcmd("git commit -a --allow-empty -m Patching_started".split(), self.dir) + runcmd("git commit -a --allow-empty -m bitbake_patching_started".split(), self.dir) @staticmethod def extractPatchHeader(patchfile): @@ -402,8 +408,8 @@ def interpretPatchHeader(headerlines): @staticmethod def gitCommandUserOptions(cmd, commituser=None, commitemail=None, d=None): if d: - commituser = d.getVar('PATCH_GIT_USER_NAME', True) - commitemail = d.getVar('PATCH_GIT_USER_EMAIL', True) + commituser = d.getVar('PATCH_GIT_USER_NAME') + commitemail = d.getVar('PATCH_GIT_USER_EMAIL') if commituser: cmd += ['-c', 'user.name="%s"' % commituser] if commitemail: @@ -447,7 +453,7 @@ def prepareCommit(patchfile, commituser=None, commitemail=None): # Prepare git command cmd = ["git"] GitApplyTree.gitCommandUserOptions(cmd, commituser, commitemail) - cmd += ["commit", "-F", tmpfile] + cmd += ["commit", "-F", tmpfile, "--no-verify"] # git doesn't like plain email addresses as authors if author and '<' in author: cmd.append('--author="%s"' % author) @@ -456,44 +462,133 @@ def prepareCommit(patchfile, commituser=None, commitemail=None): return (tmpfile, cmd) @staticmethod - def extractPatches(tree, startcommit, outdir, paths=None): + def addNote(repo, ref, key, value=None, commituser=None, commitemail=None): + note = key + (": %s" % value if value else "") + notes_ref = GitApplyTree.notes_ref + runcmd(["git", "config", "notes.rewriteMode", "ignore"], repo) + runcmd(["git", "config", "notes.displayRef", notes_ref, notes_ref], repo) + runcmd(["git", "config", "notes.rewriteRef", notes_ref, notes_ref], repo) + cmd = ["git"] + GitApplyTree.gitCommandUserOptions(cmd, commituser, commitemail) + runcmd(cmd + ["notes", "--ref", notes_ref, "append", "-m", note, ref], repo) + + @staticmethod + def removeNote(repo, ref, key, commituser=None, commitemail=None): + notes = GitApplyTree.getNotes(repo, ref) + notes = {k: v for k, v in notes.items() if k != key and not k.startswith(key + ":")} + runcmd(["git", "notes", "--ref", GitApplyTree.notes_ref, "remove", "--ignore-missing", ref], repo) + for note, value in notes.items(): + GitApplyTree.addNote(repo, ref, note, value, commituser, commitemail) + + @staticmethod + def getNotes(repo, ref): + import re + + note = None + try: + note = runcmd(["git", "notes", "--ref", GitApplyTree.notes_ref, "show", ref], repo) + prefix = "" + except CmdError: + note = runcmd(['git', 'show', '-s', '--format=%B', ref], repo) + prefix = "%% " + + note_re = re.compile(r'^%s(.*?)(?::\s*(.*))?$' % prefix) + notes = dict() + for line in note.splitlines(): + m = note_re.match(line) + if m: + notes[m.group(1)] = m.group(2) + + return notes + + @staticmethod + def commitIgnored(subject, dir=None, files=None, d=None): + if files: + runcmd(['git', 'add'] + files, dir) + cmd = ["git"] + GitApplyTree.gitCommandUserOptions(cmd, d=d) + cmd += ["commit", "-m", subject, "--no-verify"] + runcmd(cmd, dir) + GitApplyTree.addNote(dir, "HEAD", GitApplyTree.ignore_commit, d.getVar('PATCH_GIT_USER_NAME'), d.getVar('PATCH_GIT_USER_EMAIL')) + + @staticmethod + def extractPatches(tree, startcommits, outdir, paths=None): import tempfile import shutil tempdir = tempfile.mkdtemp(prefix='oepatch') try: - shellcmd = ["git", "format-patch", "--no-signature", "--no-numbered", startcommit, "-o", tempdir] - if paths: - shellcmd.append('--') - shellcmd.extend(paths) - out = runcmd(["sh", "-c", " ".join(shellcmd)], tree) - if out: - for srcfile in out.split(): - for encoding in ['utf-8', 'latin-1']: - patchlines = [] - outfile = None - try: - with open(srcfile, 'r', encoding=encoding) as f: - for line in f: - if line.startswith(GitApplyTree.patch_line_prefix): - outfile = line.split()[-1].strip() - continue - if line.startswith(GitApplyTree.ignore_commit_prefix): - continue - patchlines.append(line) - except UnicodeDecodeError: + for name, rev in startcommits.items(): + shellcmd = ["git", "format-patch", "--no-signature", "--no-numbered", rev, "-o", tempdir] + if paths: + shellcmd.append('--') + shellcmd.extend(paths) + out = runcmd(["sh", "-c", " ".join(shellcmd)], os.path.join(tree, name)) + if out: + for srcfile in out.split(): + # This loop, which is used to remove any line that + # starts with "%% original patch", is kept for backwards + # compatibility. If/when that compatibility is dropped, + # it can be replaced with code to just read the first + # line of the patch file to get the SHA-1, and the code + # below that writes the modified patch file can be + # replaced with a simple file move. + for encoding in ['utf-8', 'latin-1']: + patchlines = [] + try: + with open(srcfile, 'r', encoding=encoding, newline='') as f: + for line in f: + if line.startswith("%% " + GitApplyTree.original_patch): + continue + patchlines.append(line) + except UnicodeDecodeError: + continue + break + else: + raise PatchError('Unable to find a character encoding to decode %s' % srcfile) + + sha1 = patchlines[0].split()[1] + notes = GitApplyTree.getNotes(os.path.join(tree, name), sha1) + if GitApplyTree.ignore_commit in notes: continue - break - else: - raise PatchError('Unable to find a character encoding to decode %s' % srcfile) - - if not outfile: - outfile = os.path.basename(srcfile) - with open(os.path.join(outdir, outfile), 'w') as of: - for line in patchlines: - of.write(line) + outfile = notes.get(GitApplyTree.original_patch, os.path.basename(srcfile)) + + bb.utils.mkdirhier(os.path.join(outdir, name)) + with open(os.path.join(outdir, name, outfile), 'w') as of: + for line in patchlines: + of.write(line) finally: shutil.rmtree(tempdir) + def _need_dirty_check(self): + fetch = bb.fetch2.Fetch([], self.d) + check_dirtyness = False + for url in fetch.urls: + url_data = fetch.ud[url] + parm = url_data.parm + # a git url with subpath param will surely be dirty + # since the git tree from which we clone will be emptied + # from all files that are not in the subpath + if url_data.type == 'git' and parm.get('subpath'): + check_dirtyness = True + return check_dirtyness + + def _commitpatch(self, patch, patchfilevar): + output = "" + # Add all files + shellcmd = ["git", "add", "-f", "-A", "."] + output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) + # Exclude the patches directory + shellcmd = ["git", "reset", "HEAD", self.patchdir] + output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) + # Commit the result + (tmpfile, shellcmd) = self.prepareCommit(patch['file'], self.commituser, self.commitemail) + try: + shellcmd.insert(0, patchfilevar) + output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) + finally: + os.remove(tmpfile) + return output + def _applypatch(self, patch, force = False, reverse = False, run = True): import shutil @@ -508,27 +603,26 @@ def _applypatchhelper(shellcmd, patch, force = False, reverse = False, run = Tru return runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) - # Add hooks which add a pointer to the original patch file name in the commit message reporoot = (runcmd("git rev-parse --show-toplevel".split(), self.dir) or '').strip() if not reporoot: raise Exception("Cannot get repository root for directory %s" % self.dir) - hooks_dir = os.path.join(reporoot, '.git', 'hooks') - hooks_dir_backup = hooks_dir + '.devtool-orig' - if os.path.lexists(hooks_dir_backup): - raise Exception("Git hooks backup directory already exists: %s" % hooks_dir_backup) - if os.path.lexists(hooks_dir): - shutil.move(hooks_dir, hooks_dir_backup) - os.mkdir(hooks_dir) - commithook = os.path.join(hooks_dir, 'commit-msg') - applyhook = os.path.join(hooks_dir, 'applypatch-msg') - with open(commithook, 'w') as f: - # NOTE: the formatting here is significant; if you change it you'll also need to - # change other places which read it back - f.write('echo "\n%s: $PATCHFILE" >> $1' % GitApplyTree.patch_line_prefix) - os.chmod(commithook, 0o755) - shutil.copy2(commithook, applyhook) + + patch_applied = True try: patchfilevar = 'PATCHFILE="%s"' % os.path.basename(patch['file']) + if self._need_dirty_check(): + # Check dirtyness of the tree + try: + output = runcmd(["git", "--work-tree=%s" % reporoot, "status", "--short"]) + except CmdError: + pass + else: + if output: + # The tree is dirty, no need to try to apply patches with git anymore + # since they fail, fallback directly to patch + output = PatchTree._applypatch(self, patch, force, reverse, run) + output += self._commitpatch(patch, patchfilevar) + return output try: shellcmd = [patchfilevar, "git", "--work-tree=%s" % reporoot] self.gitCommandUserOptions(shellcmd, self.commituser, self.commitemail) @@ -555,29 +649,19 @@ def _applypatchhelper(shellcmd, patch, force = False, reverse = False, run = Tru except CmdError: # Fall back to patch output = PatchTree._applypatch(self, patch, force, reverse, run) - # Add all files - shellcmd = ["git", "add", "-f", "-A", "."] - output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) - # Exclude the patches directory - shellcmd = ["git", "reset", "HEAD", self.patchdir] - output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) - # Commit the result - (tmpfile, shellcmd) = self.prepareCommit(patch['file'], self.commituser, self.commitemail) - try: - shellcmd.insert(0, patchfilevar) - output += runcmd(["sh", "-c", " ".join(shellcmd)], self.dir) - finally: - os.remove(tmpfile) + output += self._commitpatch(patch, patchfilevar) return output + except: + patch_applied = False + raise finally: - shutil.rmtree(hooks_dir) - if os.path.lexists(hooks_dir_backup): - shutil.move(hooks_dir_backup, hooks_dir) + if patch_applied: + GitApplyTree.addNote(self.dir, "HEAD", GitApplyTree.original_patch, os.path.basename(patch['file']), self.commituser, self.commitemail) class QuiltTree(PatchSet): def _runcmd(self, args, run = True): - quiltrc = self.d.getVar('QUILTRCFILE', True) + quiltrc = self.d.getVar('QUILTRCFILE') if not run: return ["quilt"] + ["--quiltrc"] + [quiltrc] + args runcmd(["quilt"] + ["--quiltrc"] + [quiltrc] + args, self.dir) @@ -595,6 +679,8 @@ def __init__(self, dir, d): def Clean(self): try: + # make sure that patches/series file exists before quilt pop to keep quilt-0.67 happy + open(os.path.join(self.dir, "patches","series"), 'a').close() self._runcmd(["pop", "-a", "-f"]) oe.path.remove(os.path.join(self.dir, "patches","series")) except Exception: @@ -731,8 +817,9 @@ def Resolve(self): self.patchset.Push() except Exception: import sys - os.chdir(olddir) raise + finally: + os.chdir(olddir) # Patch resolver which relies on the user doing all the work involved in the # resolution, with the exception of refreshing the remote copy of the patch @@ -753,7 +840,7 @@ def Resolve(self): # Patch application failed patchcmd = self.patchset.Push(True, False, False) - t = self.patchset.d.getVar('T', True) + t = self.patchset.d.getVar('T') if not t: bb.msg.fatal("Build", "T not set") bb.utils.mkdirhier(t) @@ -792,12 +879,12 @@ def Resolve(self): # User did not fix the problem. Abort. raise PatchError("Patch application failed, and user did not fix and refresh the patch.") except Exception: - os.chdir(olddir) raise - os.chdir(olddir) + finally: + os.chdir(olddir) -def patch_path(url, fetch, workdir, expand=True): +def patch_path(url, fetch, unpackdir, expand=True): """Return the local path of a patch, or return nothing if this isn't a patch""" local = fetch.localpath(url) @@ -806,7 +893,7 @@ def patch_path(url, fetch, workdir, expand=True): base, ext = os.path.splitext(os.path.basename(local)) if ext in ('.gz', '.bz2', '.xz', '.Z'): if expand: - local = os.path.join(workdir, base) + local = os.path.join(unpackdir, base) ext = os.path.splitext(base)[1] urldata = fetch.ud[url] @@ -820,12 +907,12 @@ def patch_path(url, fetch, workdir, expand=True): return local def src_patches(d, all=False, expand=True): - workdir = d.getVar('WORKDIR', True) + unpackdir = d.getVar('UNPACKDIR') fetch = bb.fetch2.Fetch([], d) patches = [] sources = [] for url in fetch.urls: - local = patch_path(url, fetch, workdir, expand) + local = patch_path(url, fetch, unpackdir, expand) if not local: if all: local = fetch.localpath(url) @@ -868,13 +955,13 @@ def src_patches(d, all=False, expand=True): def should_apply(parm, d): import bb.utils if "mindate" in parm or "maxdate" in parm: - pn = d.getVar('PN', True) - srcdate = d.getVar('SRCDATE_%s' % pn, True) + pn = d.getVar('PN') + srcdate = d.getVar('SRCDATE_%s' % pn) if not srcdate: - srcdate = d.getVar('SRCDATE', True) + srcdate = d.getVar('SRCDATE') if srcdate == "now": - srcdate = d.getVar('DATE', True) + srcdate = d.getVar('DATE') if "maxdate" in parm and parm["maxdate"] < srcdate: return False, 'is outdated' @@ -884,34 +971,33 @@ def should_apply(parm, d): if "minrev" in parm: - srcrev = d.getVar('SRCREV', True) + srcrev = d.getVar('SRCREV') if srcrev and srcrev < parm["minrev"]: return False, 'applies to later revisions' if "maxrev" in parm: - srcrev = d.getVar('SRCREV', True) + srcrev = d.getVar('SRCREV') if srcrev and srcrev > parm["maxrev"]: return False, 'applies to earlier revisions' if "rev" in parm: - srcrev = d.getVar('SRCREV', True) + srcrev = d.getVar('SRCREV') if srcrev and parm["rev"] not in srcrev: return False, "doesn't apply to revision" if "notrev" in parm: - srcrev = d.getVar('SRCREV', True) + srcrev = d.getVar('SRCREV') if srcrev and parm["notrev"] in srcrev: return False, "doesn't apply to revision" if "maxver" in parm: - pv = d.getVar('PV', True) + pv = d.getVar('PV') if bb.utils.vercmp_string_op(pv, parm["maxver"], ">"): return False, "applies to earlier version" if "minver" in parm: - pv = d.getVar('PV', True) + pv = d.getVar('PV') if bb.utils.vercmp_string_op(pv, parm["minver"], "<"): return False, "applies to later version" - return True, None - + return True, None \ No newline at end of file diff --git a/lib/oe_sbom/sbom.py b/lib/oe_sbom/sbom.py index d768c1a..aeadda4 100644 --- a/lib/oe_sbom/sbom.py +++ b/lib/oe_sbom/sbom.py @@ -5,45 +5,98 @@ # import collections +import os DepRecipe = collections.namedtuple("DepRecipe", ("doc", "doc_sha1", "recipe")) DepSource = collections.namedtuple("DepSource", ("doc", "doc_sha1", "recipe", "file")) +def sanitize_spdx_id(spdx_id): + """ + Sanitize SPDX ID to comply with SPDX 2.2 specification. + Replace underscores and other invalid characters with hyphens. + """ + import re + # Replace underscores and other invalid characters with hyphens + sanitized = re.sub(r'[^a-zA-Z0-9.-]', '-', spdx_id) + + return sanitized def get_recipe_spdxid(d): - return "SPDXRef-%s-%s" % ("Recipe", d.getVar("PN", True)) + return "SPDXRef-%s-%s" % ("Recipe", d.getVar("PN")) + + +def get_download_spdxid(d, idx): + return "SPDXRef-Download-%s-%d" % (d.getVar("PN"), idx) def get_package_spdxid(pkg): - return "SPDXRef-Package-%s" % pkg + return sanitize_spdx_id("SPDXRef-Package-%s" % pkg) def get_source_file_spdxid(d, idx): - return "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN", True), idx) + return "SPDXRef-SourceFile-%s-%d" % (d.getVar("PN"), idx) def get_packaged_file_spdxid(pkg, idx): - return "SPDXRef-PackagedFile-%s-%d" % (pkg, idx) + return sanitize_spdx_id("SPDXRef-PackagedFile-%s-%d" % (pkg, idx)) def get_image_spdxid(img): return "SPDXRef-Image-%s" % img -def get_os_spdxid(img): - return "SPDXRef-OperatingSystem-%s" % img -def write_doc(d, spdx_doc, subdir, spdx_deploy=None): +def get_sdk_spdxid(sdk): + return "SPDXRef-SDK-%s" % sdk + + +def _doc_path_by_namespace(spdx_deploy, arch, doc_namespace): + return spdx_deploy / "by-namespace" / arch / doc_namespace.replace("/", "_") + + +def doc_find_by_namespace(spdx_deploy, search_arches, doc_namespace): + for pkgarch in search_arches: + p = _doc_path_by_namespace(spdx_deploy, pkgarch, doc_namespace) + if os.path.exists(str(p)): + return p + return None + + +def _doc_path_by_hashfn(spdx_deploy, arch, doc_name, hashfn): + return ( + spdx_deploy / "by-hash" / arch / hashfn.split()[1] / (doc_name + ".spdx.json") + ) + + +def doc_find_by_hashfn(spdx_deploy, search_arches, doc_name, hashfn): + for pkgarch in search_arches: + p = _doc_path_by_hashfn(spdx_deploy, pkgarch, doc_name, hashfn) + if os.path.exists(str(p)): + return p + return None + + +def doc_path(spdx_deploy, doc_name, arch, subdir): + return spdx_deploy / arch / subdir / (doc_name + ".spdx.json") + + +def write_doc(d, spdx_doc, arch, subdir, spdx_deploy=None, indent=None): from pathlib import Path if spdx_deploy is None: - spdx_deploy = Path(d.getVar("SPDXDEPLOY", True)) + spdx_deploy = Path(d.getVar("SPDXDEPLOY")) - dest = spdx_deploy / subdir / (spdx_doc.name + ".spdx.json") + dest = doc_path(spdx_deploy, spdx_doc.name, arch, subdir) dest.parent.mkdir(exist_ok=True, parents=True) with dest.open("wb") as f: - doc_sha1 = spdx_doc.to_json(f, sort_keys=True) + doc_sha1 = spdx_doc.to_json(f, sort_keys=True, indent=indent) + + l = _doc_path_by_namespace(spdx_deploy, arch, spdx_doc.documentNamespace) + l.parent.mkdir(exist_ok=True, parents=True) + l.symlink_to(os.path.relpath(str(dest), str(l.parent))) - l = spdx_deploy / "by-namespace" / spdx_doc.documentNamespace.replace("/", "_") + l = _doc_path_by_hashfn( + spdx_deploy, arch, spdx_doc.name, d.getVar("BB_HASHFILENAME") + ) l.parent.mkdir(exist_ok=True, parents=True) l.symlink_to(os.path.relpath(str(dest), str(l.parent))) @@ -75,4 +128,4 @@ def get_file(): f.seek(0) doc = oe_sbom.spdx.SPDXDocument.from_json(f) - return (doc, sha1.hexdigest()) + return (doc, sha1.hexdigest()) \ No newline at end of file diff --git a/lib/oe_sbom/spdx.py b/lib/oe_sbom/spdx.py index 6dbc34e..df44b9b 100644 --- a/lib/oe_sbom/spdx.py +++ b/lib/oe_sbom/spdx.py @@ -107,10 +107,13 @@ def get_helper(obj): obj._spdx[name] = [] return obj._spdx[name] + def set_helper(obj, value): + obj._spdx[name] = list(value) + def del_helper(obj): del obj._spdx[name] - attrs[name] = property(get_helper, None, del_helper) + attrs[name] = property(get_helper, set_helper, del_helper) def init(self, source): return [self.prop.init(o) for o in source] @@ -213,6 +216,18 @@ class SPDXPackageVerificationCode(SPDXObject): class SPDXPackage(SPDXObject): + ALLOWED_CHECKSUMS = [ + "SHA1", + "SHA224", + "SHA256", + "SHA384", + "SHA512", + "MD2", + "MD4", + "MD5", + "MD6", + ] + name = _String() SPDXID = _String() versionInfo = _String() @@ -231,7 +246,7 @@ class SPDXPackage(SPDXObject): hasFiles = _StringList() packageFileName = _String() annotations = _ObjectList(SPDXAnnotation) - comment = _String() + checksums = _ObjectList(SPDXChecksum) class SPDXFile(SPDXObject): @@ -271,18 +286,16 @@ class SPDXDocument(SPDXObject): name = _String() documentNamespace = _String() creationInfo = _Object(SPDXCreationInfo) - comment = _String() packages = _ObjectList(SPDXPackage) files = _ObjectList(SPDXFile) relationships = _ObjectList(SPDXRelationship) - documentDescribes = _StringList() externalDocumentRefs = _ObjectList(SPDXExternalDocumentRef) hasExtractedLicensingInfos = _ObjectList(SPDXExtractedLicensingInfo) def __init__(self, **d): super().__init__(**d) - def to_json(self, f, *, sort_keys=False, indent=2, separators=None): + def to_json(self, f, *, sort_keys=False, indent=None, separators=None): class Encoder(json.JSONEncoder): def default(self, o): if isinstance(o, SPDXObject): @@ -304,7 +317,8 @@ def default(self, o): @classmethod def from_json(cls, f): - return cls(**json.loads(f.read().decode('utf-8', 'replace'))) + content = f.read().decode("utf-8") + return cls(**json.loads(content)) def add_relationship(self, _from, relationship, _to, *, comment=None, annotation=None): if isinstance(_from, SPDXObject): @@ -341,4 +355,4 @@ def find_external_document_ref(self, namespace): for r in self.externalDocumentRefs: if r.spdxDocument == namespace: return r - return None + return None \ No newline at end of file diff --git a/lib/oe_sbom/spdx_common.py b/lib/oe_sbom/spdx_common.py new file mode 100644 index 0000000..e718bb8 --- /dev/null +++ b/lib/oe_sbom/spdx_common.py @@ -0,0 +1,288 @@ +# +# Copyright OpenEmbedded Contributors +# +# SPDX-License-Identifier: GPL-2.0-only +# + +import bb +import collections +import json +import oe.packagedata +import re +import shutil + +from pathlib import Path + +LIC_REGEX = re.compile( + rb"^\W*SPDX-License-Identifier:\s*([ \w\d.()+-]+?)(?:\s+\W*)?$", + re.MULTILINE, +) + + +def extract_licenses(filename): + """ + Extract SPDX License identifiers from a file + """ + try: + with open(filename, "rb") as f: + size = min(15000, os.stat(filename).st_size) + txt = f.read(size) + licenses = re.findall(LIC_REGEX, txt) + if licenses: + ascii_licenses = [lic.decode("ascii") for lic in licenses] + return ascii_licenses + except Exception as e: + bb.warn("Exception reading {}: {}".format(filename, e)) + return [] + + +def is_work_shared_spdx(d): + return '/work-shared/' in d.getVar('S') + + +def load_spdx_license_data(d): + with open(d.getVar("SPDX_LICENSES"), "r") as f: + data = json.load(f) + # Transform the license array to a dictionary + data["licenses"] = {l["licenseId"]: l for l in data["licenses"]} + + return data + + +def process_sources(d): + """ + Returns True if the sources for this recipe should be included in the SPDX + or False if not + """ + pn = d.getVar("PN") + assume_provided = (d.getVar("ASSUME_PROVIDED") or "").split() + if pn in assume_provided: + for p in d.getVar("PROVIDES").split(): + if p != pn: + pn = p + break + + # glibc-locale: do_fetch, do_unpack and do_patch tasks have been deleted, + # so avoid archiving source here. + if pn.startswith("glibc-locale"): + return False + if d.getVar("PN") == "libtool-cross": + return False + if d.getVar("PN") == "libgcc-initial": + return False + if d.getVar("PN") == "shadow-sysroot": + return False + + return True + + +class Dep(object): + pn = None + hashfn = None + in_taskhash = None + + def __init__(self, pn, hashfn, in_taskhash): + self.pn = pn + self.hashfn = hashfn + self.in_taskhash = in_taskhash + + +def collect_direct_deps(d, dep_task): + """ + Find direct dependencies of current task + + Returns the list of recipes that have a dep_task that the current task + depends on + """ + current_task = "do_" + d.getVar("BB_CURRENTTASK") + pn = d.getVar("PN") + + taskdepdata = d.getVar("BB_TASKDEPDATA", False) + + for this_dep in taskdepdata.values(): + if this_dep[0] == pn and this_dep[1] == current_task: + break + else: + bb.fatal("Unable to find this {}:{} in taskdepdata".format(pn, current_task)) + + deps = set() + + for dep_name in this_dep[3]: + dep_data = taskdepdata[dep_name] + if dep_data[1] == dep_task and dep_data[0] != pn: + deps.add((dep_data[0], dep_data[6], dep_name in this_dep[8])) + + return sorted(deps) + + +def get_spdx_deps(d): + """ + Reads the SPDX dependencies JSON file and returns the data + """ + spdx_deps_file = Path(d.getVar("SPDXDEPS")) + + deps = [] + with spdx_deps_file.open("r") as f: + for d in json.load(f): + deps.append(Dep(*d)) + return deps + + +def collect_package_providers(d): + """ + Returns a dictionary where each RPROVIDES is mapped to the package that + provides it + """ + deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX")) + + providers = {} + + deps = collect_direct_deps(d, "do_create_spdx") + deps.append((d.getVar("PN"), d.getVar("BB_HASHFILENAME"), True)) + + for dep_pn, dep_hashfn, _ in deps: + localdata = d + recipe_data = oe.packagedata.read_pkgdata(dep_pn, localdata) + if not recipe_data: + localdata = bb.data.createCopy(d) + localdata.setVar("PKGDATA_DIR", "${PKGDATA_DIR_SDK}") + recipe_data = oe.packagedata.read_pkgdata(dep_pn, localdata) + + for pkg in recipe_data.get("PACKAGES", "").split(): + pkg_data = oe.packagedata.read_subpkgdata_dict(pkg, localdata) + rprovides = set( + n + for n, _ in bb.utils.explode_dep_versions2( + pkg_data.get("RPROVIDES", "") + ).items() + ) + rprovides.add(pkg) + + if "PKG" in pkg_data: + pkg = pkg_data["PKG"] + rprovides.add(pkg) + + for r in rprovides: + providers[r] = (pkg, dep_hashfn) + + return providers + + +def get_patched_src(d): + """ + Save patched source of the recipe in SPDX_WORKDIR. + """ + spdx_workdir = d.getVar("SPDXWORK") + spdx_sysroot_native = d.getVar("STAGING_DIR_NATIVE") + pn = d.getVar("PN") + + workdir = d.getVar("WORKDIR") + + try: + # The kernel class functions require it to be on work-shared, so we dont change WORKDIR + if not is_work_shared_spdx(d): + # Change the WORKDIR to make do_unpack do_patch run in another dir. + d.setVar("WORKDIR", spdx_workdir) + # Restore the original path to recipe's native sysroot (it's relative to WORKDIR). + d.setVar("STAGING_DIR_NATIVE", spdx_sysroot_native) + + # The changed 'WORKDIR' also caused 'B' changed, create dir 'B' for the + # possibly requiring of the following tasks (such as some recipes's + # do_patch required 'B' existed). + bb.utils.mkdirhier(d.getVar("B")) + + bb.build.exec_func("do_unpack", d) + + if d.getVar("SRC_URI") != "": + if bb.data.inherits_class('dos2unix', d): + bb.build.exec_func('do_convert_crlf_to_lf', d) + bb.build.exec_func("do_patch", d) + + # Copy source from work-share to spdx_workdir + if is_work_shared_spdx(d): + share_src = d.getVar('S') + d.setVar("WORKDIR", spdx_workdir) + d.setVar("STAGING_DIR_NATIVE", spdx_sysroot_native) + # Copy source to ${SPDXWORK}, same basename dir of ${S}; + src_dir = ( + spdx_workdir + + "/" + + os.path.basename(share_src) + ) + # For kernel souce, rename suffix dir 'kernel-source' + # to ${BP} (${BPN}-${PV}) + if bb.data.inherits_class("kernel", d): + src_dir = spdx_workdir + "/" + d.getVar('BP') + + bb.note("copyhardlinktree {} to {}".format(share_src, src_dir)) + oe.path.copyhardlinktree(share_src, src_dir) + + # Some userland has no source. + if not os.path.exists(spdx_workdir): + bb.utils.mkdirhier(spdx_workdir) + finally: + d.setVar("WORKDIR", workdir) + + +def has_task(d, task): + return bool(d.getVarFlag(task, "task", False)) and not bool(d.getVarFlag(task, "noexec", False)) + + +def fetch_data_to_uri(fd, name): + """ + Translates a bitbake FetchData to a string URI + """ + uri = fd.type + # Map gitsm to git, since gitsm:// is not a valid URI protocol + if uri == "gitsm": + uri = "git" + proto = getattr(fd, "proto", None) + if proto is not None: + uri = uri + "+" + proto + uri = uri + "://" + fd.host + fd.path + + if fd.method.supports_srcrev(): + uri = uri + "@" + fd.revision + + return uri + +def is_compiled_source (filename, compiled_sources, types): + """ + Check if the file is a compiled file + """ + import os + # If we don't have compiled source, we assume all are compiled. + if not compiled_sources: + return True + + # We return always true if the file type is not in the list of compiled files. + # Some files in the source directory are not compiled, for example, Makefiles, + # but also python .py file. We need to include them in the SPDX. + basename = os.path.basename(filename) + ext = basename.partition(".")[2] + if ext not in types: + return True + # Check that the file is in the list + return filename in compiled_sources + +def get_compiled_sources(d): + """ + Get list of compiled sources from debug information and normalize the paths + """ + import itertools + source_info = oe.package.read_debugsources_info(d) + if not source_info: + bb.debug(1, "Do not have debugsources.list. Skipping") + return [], [] + + # Sources are not split now in SPDX, so we aggregate them + sources = set(itertools.chain.from_iterable(source_info.values())) + # Check extensions of files + types = set() + for src in sources: + basename = os.path.basename(src) + ext = basename.partition(".")[2] + if ext not in types and ext: + types.add(ext) + bb.debug(1, "Num of sources: {} and types: {} {}".format(len(sources), len(types), str(types))) + return sources, types \ No newline at end of file diff --git a/templates/feature/sbom/template.conf b/templates/feature/sbom/template.conf index 752d8bc..4959df2 100644 --- a/templates/feature/sbom/template.conf +++ b/templates/feature/sbom/template.conf @@ -6,7 +6,7 @@ require conf/distro/include/license_config.inc SPDX_INCLUDE_SOURCES = "1" SPDX_ARCHIVE_SOURCES = "1" -WRTEMPLATE_CLASSES += "sls-create-spdx" +WRTEMPLATE_CLASSES += "create-spdx-2.2" KERNEL_FEATURES_append = " cfg/debug/debug-info/debug-info.scc"