Skip to content

Commit ea4cf7d

Browse files
committed
Add unified tags mapping across environment builders
Implement consistent support for CycloneDX component 'tags' field (https://cyclonedx.org/docs/1.6/json/#components_items_tags) by mapping Python packaging metadata 'keywords' across build backends. - EnvironmentBB: parse 'Keywords' from package metadata - PipenvBB: normalize 'keywords' field from Pipfile.lock entries - PoetryBB: handle 'keywords' from pyproject and lock data - Introduce shared _to_tags() helper for safe normalization This ensures all SBOM builders populate CycloneDX tags consistently, aligning with PEP 621 and other Python packaging metadata standards. Signed-off-by: Aryan Thakur <[email protected]> Signed-off-by: aryan thakur <[email protected]> Signed-off-by: rn23thakur <[email protected]>
1 parent e749cd8 commit ea4cf7d

File tree

3 files changed

+57
-1
lines changed

3 files changed

+57
-1
lines changed

cyclonedx_py/_internal/environment.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

1818

19+
import re
1920
from argparse import OPTIONAL, ArgumentParser
2021
from collections.abc import Iterable
2122
from importlib.metadata import distributions
@@ -41,6 +42,19 @@
4142
from .utils.pep639 import dist2licenses_from_files as pep639_dist2licenses_from_files
4243
from .utils.pyproject import pyproject2component, pyproject2dependencies, pyproject_load
4344

45+
_TAG_SPLIT = re.compile(r'[;,]\s*|\s+')
46+
47+
48+
def _to_tags(raw):
49+
if raw is None:
50+
return []
51+
if isinstance(raw, str):
52+
return [t for t in (s.strip() for s in _TAG_SPLIT.split(raw)) if t]
53+
if isinstance(raw, (list, tuple, set)):
54+
return [t for t in (str(s).strip() for s in raw) if t]
55+
return []
56+
57+
4458
if TYPE_CHECKING: # pragma: no cover
4559
from logging import Logger
4660

@@ -185,6 +199,10 @@ def __add_components(self, bom: 'Bom',
185199
# path of dist-package on disc? naaa... a package may have multiple files/folders on disc
186200
)
187201

202+
raw = dist.metadata.get('Keywords')
203+
if hasattr(component, "tags"):
204+
component.tags = _to_tags(raw)
205+
188206
# region licenses
189207
component.licenses.update(metadata2licenses(dist_meta, LicenseFactory(),
190208
gather_texts=self._gather_license_texts))

cyclonedx_py/_internal/pipenv.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

1818

19+
import re
1920
from argparse import OPTIONAL, ArgumentParser
2021
from collections.abc import Generator
2122
from json import loads as json_loads
@@ -37,6 +38,19 @@
3738
from .utils.pyproject import pyproject_file2component
3839
from .utils.secret import redact_auth_from_url
3940

41+
_TAG_SPLIT = re.compile(r'[;,]\s*|\s+')
42+
43+
44+
def _to_tags(raw):
45+
if raw is None:
46+
return []
47+
if isinstance(raw, str):
48+
return [t for t in (s.strip() for s in _TAG_SPLIT.split(raw)) if t]
49+
if isinstance(raw, (list, tuple, set)):
50+
return [t for t in (str(s).strip() for s in raw) if t]
51+
return []
52+
53+
4054
if TYPE_CHECKING: # pragma: no cover
4155
from logging import Logger
4256

@@ -175,6 +189,10 @@ def _make_bom(self, root_c: Optional['Component'],
175189
version=package_data['version'][2:] if 'version' in package_data else None,
176190
external_references=self.__make_extrefs(package_name, package_data, source_urls),
177191
)
192+
raw_keywords = package_data.get('keywords')
193+
if hasattr(component, "tags"):
194+
component.tags = _to_tags(raw_keywords)
195+
178196
component.purl = PackageURL(
179197
type=PurlTypePypi,
180198
name=component.name,

cyclonedx_py/_internal/poetry.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

1818

19+
import re
1920
from argparse import OPTIONAL, ArgumentParser
2021
from collections.abc import Generator, Iterable
2122
from dataclasses import dataclass
@@ -39,6 +40,19 @@
3940
from .utils.secret import redact_auth_from_url
4041
from .utils.toml import toml_loads
4142

43+
_TAG_SPLIT = re.compile(r'[;,]\s*|\s+')
44+
45+
46+
def _to_tags(raw):
47+
if raw is None:
48+
return []
49+
if isinstance(raw, str):
50+
return [t for t in (s.strip() for s in _TAG_SPLIT.split(raw)) if t]
51+
if isinstance(raw, (list, tuple, set)):
52+
return [t for t in (str(s).strip() for s in raw) if t]
53+
return []
54+
55+
4256
if TYPE_CHECKING: # pragma: no cover
4357
from logging import Logger
4458

@@ -404,7 +418,7 @@ def __make_component4lock(self, package: 'T_NameDict') -> 'Component':
404418
is_vcs = source.get('type') in self.__PACKAGE_SRC_VCS
405419
is_local = source.get('type') in self.__PACKAGE_SRC_LOCAL
406420

407-
return Component(
421+
component = Component(
408422
bom_ref=f'{package["name"]}@{package["version"]}',
409423
name=package['name'],
410424
version=package.get('version'),
@@ -433,6 +447,12 @@ def __make_component4lock(self, package: 'T_NameDict') -> 'Component':
433447
) if not is_local else None
434448
)
435449

450+
raw_keywords = package.get('keywords')
451+
if hasattr(component, "tags"):
452+
component.tags = _to_tags(raw_keywords)
453+
454+
return component
455+
436456
def __purl_qualifiers4lock(self, package: 'T_NameDict') -> 'T_NameDict':
437457
# see https://github.com/package-url/purl-spec/blob/master/PURL-SPECIFICATION.rst
438458
qs = {}

0 commit comments

Comments
 (0)