|
7 | 7 | # See https://aboutcode.org for more information about nexB OSS projects. |
8 | 8 | # |
9 | 9 |
|
| 10 | +import io |
| 11 | +import os |
| 12 | +import fnmatch |
10 | 13 | import logging |
11 | 14 | import sys |
12 | 15 | from collections import namedtuple |
13 | 16 | from pathlib import Path |
14 | 17 |
|
15 | 18 | from packagedcode import models |
16 | 19 | from packagedcode import nevra |
| 20 | +from packagedcode.licensing import RESOURCE_TO_PACKAGE_LICENSE_FIELDS |
17 | 21 | from packagedcode.pyrpm import RPM |
18 | 22 | from packagedcode.rpm_installed import collect_installed_rpmdb_xmlish_from_rpmdb_loc |
19 | 23 | from packagedcode.rpm_installed import parse_rpm_xmlish |
20 | 24 | from packagedcode.utils import build_description |
21 | 25 | from packagedcode.utils import get_ancestor |
| 26 | +from scancode.api import get_licenses |
22 | 27 |
|
23 | | -TRACE = False |
| 28 | +TRACE = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False) |
24 | 29 |
|
25 | 30 |
|
26 | 31 | def logger_debug(*args): |
@@ -374,6 +379,166 @@ def parse(cls, location, package_only=False): |
374 | 379 | yield models.PackageData.from_data(package_data, package_only) |
375 | 380 |
|
376 | 381 |
|
| 382 | +class RpmMarinerContainerManifestHandler(models.DatafileHandler): |
| 383 | + datasource_id = 'rpm_mariner_manifest' |
| 384 | + # container-manifest-1 is more minimal and has the same data |
| 385 | + path_patterns = ('*var/lib/rpmmanifest/container-manifest-2',) |
| 386 | + default_package_type = 'rpm' |
| 387 | + default_package_namespace = 'mariner' |
| 388 | + description = 'RPM mariner distroless package manifest' |
| 389 | + documentation_url = 'https://github.com/microsoft/marinara/' |
| 390 | + |
| 391 | + manifest_attributes = [ |
| 392 | + "name", |
| 393 | + "version", |
| 394 | + "n1", |
| 395 | + "n2", |
| 396 | + "party", |
| 397 | + "n3", |
| 398 | + "n4", |
| 399 | + "arch", |
| 400 | + "checksum_algo", |
| 401 | + "filename" |
| 402 | + ] |
| 403 | + |
| 404 | + @classmethod |
| 405 | + def parse(cls, location, package_only=False): |
| 406 | + with io.open(location, encoding='utf-8') as data: |
| 407 | + lines = data.readlines() |
| 408 | + |
| 409 | + for line in lines: |
| 410 | + line = line.rstrip("\n") |
| 411 | + metadata = line.split("\t") |
| 412 | + |
| 413 | + package_data = { |
| 414 | + "type": cls.default_package_type, |
| 415 | + "namespace": cls.default_package_namespace, |
| 416 | + "datasource_id": cls.datasource_id, |
| 417 | + } |
| 418 | + for key, value in zip(cls.manifest_attributes, metadata): |
| 419 | + package_data[key] = value |
| 420 | + |
| 421 | + package_data = cls.clean_mariner_manifest_data(package_data) |
| 422 | + yield models.PackageData.from_data(package_data=package_data) |
| 423 | + |
| 424 | + @classmethod |
| 425 | + def assemble(cls, package_data, resource, codebase, package_adder): |
| 426 | + |
| 427 | + levels_up = len('var/lib/rpmmanifest/container-manifest-2'.split('/')) |
| 428 | + root_resource = get_ancestor( |
| 429 | + levels_up=levels_up, |
| 430 | + resource=resource, |
| 431 | + codebase=codebase, |
| 432 | + ) |
| 433 | + package_name = package_data.name |
| 434 | + |
| 435 | + package = models.Package.from_package_data( |
| 436 | + package_data=package_data, |
| 437 | + datafile_path=resource.path, |
| 438 | + ) |
| 439 | + package_uid = package.package_uid |
| 440 | + |
| 441 | + assemblable_paths = tuple(set([ |
| 442 | + f'*usr/share/licenses/{package_name}/COPYING*', |
| 443 | + f'*usr/share/licenses/{package_name}/LICENSE*', |
| 444 | + ])) |
| 445 | + |
| 446 | + resources = [] |
| 447 | + for res in root_resource.walk(codebase): |
| 448 | + if TRACE: |
| 449 | + logger_debug(f' rpm: mariner assemble: root_walk: res: {res}') |
| 450 | + if not any([ |
| 451 | + fnmatch.fnmatch(name=res.location, pat=pattern) |
| 452 | + for pattern in assemblable_paths |
| 453 | + ]): |
| 454 | + continue |
| 455 | + |
| 456 | + if TRACE: |
| 457 | + logger_debug(f' rpm: mariner assemble: pattern matched for: res: {res}') |
| 458 | + |
| 459 | + for pkgdt in res.package_data: |
| 460 | + package_data = models.PackageData.from_dict(pkgdt) |
| 461 | + if TRACE: |
| 462 | + logger_debug(f' rpm: mariner assemble: package_data: {package_data.declared_license_expression}') |
| 463 | + |
| 464 | + package.update( |
| 465 | + package_data=package_data, |
| 466 | + datafile_path=res.path, |
| 467 | + check_compatible=False, |
| 468 | + replace=False, |
| 469 | + include_version=False, |
| 470 | + include_qualifiers=False, |
| 471 | + include_subpath=False, |
| 472 | + ) |
| 473 | + |
| 474 | + package_adder(package_uid, res, codebase) |
| 475 | + resources.append(res) |
| 476 | + |
| 477 | + yield package |
| 478 | + yield from resources |
| 479 | + |
| 480 | + @staticmethod |
| 481 | + def clean_mariner_manifest_data(package_data): |
| 482 | + ignore_attributes = ["n1", "n2", "n3", "n4", "checksum_algo"] |
| 483 | + for attribute in ignore_attributes: |
| 484 | + package_data.pop(attribute) |
| 485 | + |
| 486 | + if arch := package_data.pop("arch"): |
| 487 | + package_data["qualifiers"] = {"arch": arch} |
| 488 | + |
| 489 | + if filename := package_data.pop("filename"): |
| 490 | + package_data["extra_data"] = {"filename": filename} |
| 491 | + |
| 492 | + if party := package_data.pop("party"): |
| 493 | + party_obj = models.Party( |
| 494 | + type=models.party_org, |
| 495 | + role="owner", |
| 496 | + name=party, |
| 497 | + ) |
| 498 | + package_data["parties"] = [party_obj.to_dict()] |
| 499 | + |
| 500 | + return package_data |
| 501 | + |
| 502 | + |
| 503 | +class RpmLicenseFilesHandler(models.NonAssemblableDatafileHandler): |
| 504 | + datasource_id = 'rpm_package_licenses' |
| 505 | + path_patterns = ( |
| 506 | + '*usr/share/licenses/*/COPYING*', |
| 507 | + '*usr/share/licenses/*/LICENSE*', |
| 508 | + ) |
| 509 | + default_package_type = 'rpm' |
| 510 | + default_package_namespace = 'mariner' |
| 511 | + description = 'RPM mariner distroless package license files' |
| 512 | + documentation_url = 'https://github.com/microsoft/marinara/' |
| 513 | + |
| 514 | + @classmethod |
| 515 | + def parse(cls, location, package_only=False): |
| 516 | + |
| 517 | + # The license files are in a directory which is the package name, |
| 518 | + # for example: "/usr/share/licenses/openssl/LICENSE" |
| 519 | + name = location.split('/usr/share/licenses/').pop().split('/')[0] |
| 520 | + package_data = models.PackageData( |
| 521 | + type=cls.default_package_type, |
| 522 | + namespace=cls.default_package_namespace, |
| 523 | + name=name, |
| 524 | + datasource_id=cls.datasource_id, |
| 525 | + ) |
| 526 | + |
| 527 | + if package_only: |
| 528 | + yield package_data |
| 529 | + |
| 530 | + resource_license_attributes = get_licenses( |
| 531 | + location=location, |
| 532 | + include_text=True, |
| 533 | + license_diagnostics=True, |
| 534 | + license_text_diagnostics=True, |
| 535 | + ) |
| 536 | + for key, key_pkg in RESOURCE_TO_PACKAGE_LICENSE_FIELDS.items(): |
| 537 | + setattr(package_data, key_pkg, resource_license_attributes.get(key)) |
| 538 | + |
| 539 | + yield package_data |
| 540 | + |
| 541 | + |
377 | 542 | ALGO_BY_ID = { |
378 | 543 | None: 'md5', |
379 | 544 | 0: 'md5', |
|
0 commit comments