Skip to content

Commit a648775

Browse files
authored
feat: guarantee unique BomRefs in serialization result (#479)
Incorporate `output.BomRefDiscriminator` on serialization Signed-off-by: Jan Kowalleck <[email protected]>
1 parent f61a730 commit a648775

File tree

8 files changed

+129
-34
lines changed

8 files changed

+129
-34
lines changed

cyclonedx/model/bom.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import warnings
2020
from datetime import datetime
2121
from itertools import chain
22-
from typing import TYPE_CHECKING, Iterable, Optional, Set, Union
22+
from typing import TYPE_CHECKING, Generator, Iterable, Optional, Union
2323
from uuid import UUID, uuid4
2424

2525
import serializable
@@ -427,16 +427,11 @@ def external_references(self) -> 'SortedSet[ExternalReference]':
427427
def external_references(self, external_references: Iterable[ExternalReference]) -> None:
428428
self._external_references = SortedSet(external_references)
429429

430-
def _get_all_components(self) -> Set[Component]:
431-
components: Set[Component] = set()
430+
def _get_all_components(self) -> Generator[Component, None, None]:
432431
if self.metadata.component:
433-
components.update(self.metadata.component.get_all_nested_components(include_self=True))
434-
435-
# Add Components and sub Components
432+
yield from self.metadata.component.get_all_nested_components(include_self=True)
436433
for c in self.components:
437-
components.update(c.get_all_nested_components(include_self=True))
438-
439-
return components
434+
yield from c.get_all_nested_components(include_self=True)
440435

441436
def get_vulnerabilities_for_bom_ref(self, bom_ref: BomRef) -> 'SortedSet[Vulnerability]':
442437
"""
@@ -543,13 +538,13 @@ def validate(self) -> bool:
543538
self.register_dependency(target=_s)
544539

545540
# 1. Make sure dependencies are all in this Bom.
546-
all_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set(
541+
component_bom_refs = set(map(lambda c: c.bom_ref, self._get_all_components())) | set(
547542
map(lambda s: s.bom_ref, self.services))
548-
all_dependency_bom_refs = set(chain((d.ref for d in self.dependencies),
549-
chain.from_iterable(
550-
d.dependencies_as_bom_refs() for d in self.dependencies)))
551-
552-
dependency_diff = all_dependency_bom_refs - all_bom_refs
543+
dependency_bom_refs = set(chain(
544+
(d.ref for d in self.dependencies),
545+
chain.from_iterable(d.dependencies_as_bom_refs() for d in self.dependencies)
546+
))
547+
dependency_diff = dependency_bom_refs - component_bom_refs
553548
if len(dependency_diff) > 0:
554549
raise UnknownComponentDependencyException(
555550
f'One or more Components have Dependency references to Components/Services that are not known in this '

cyclonedx/output/__init__.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@
2222
import os
2323
import warnings
2424
from abc import ABC, abstractmethod
25-
from typing import TYPE_CHECKING, Any, Literal, Mapping, Optional, Type, Union, overload
25+
from itertools import chain
26+
from random import random
27+
from typing import TYPE_CHECKING, Any, Iterable, Literal, Mapping, Optional, Type, Union, overload
2628

2729
from ..schema import OutputFormat, SchemaVersion
2830

2931
if TYPE_CHECKING: # pragma: no cover
3032
from ..model.bom import Bom
33+
from ..model.bom_ref import BomRef
3134
from .json import Json as JsonOutputter
3235
from .xml import Xml as XmlOutputter
3336

@@ -144,3 +147,41 @@ def get_instance(bom: 'Bom', output_format: OutputFormat = OutputFormat.XML,
144147
category=DeprecationWarning, stacklevel=1
145148
)
146149
return make_outputter(bom, output_format, schema_version)
150+
151+
152+
class BomRefDiscriminator:
153+
154+
def __init__(self, bomrefs: Iterable['BomRef'], prefix: str = 'BomRef') -> None:
155+
# do not use dict/ set here, different BomRefs with same value have same hash abd would shadow each other
156+
self._bomrefs = tuple((bomref, bomref.value) for bomref in bomrefs)
157+
self._prefix = prefix
158+
159+
def __enter__(self) -> None:
160+
self.discriminate()
161+
162+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
163+
self.reset()
164+
165+
def discriminate(self) -> None:
166+
known_values = set()
167+
for bomref, _ in self._bomrefs:
168+
value = bomref.value
169+
if value in known_values:
170+
value = self._make_unique()
171+
bomref.value = value
172+
known_values.add(value)
173+
174+
def reset(self) -> None:
175+
for bomref, original_value in self._bomrefs:
176+
bomref.value = original_value
177+
178+
def _make_unique(self) -> str:
179+
return f'{self._prefix}{str(random())[1:]}{str(random())[1:]}' # nosec B311
180+
181+
@classmethod
182+
def from_bom(cls, bom: 'Bom', prefix: str = 'BomRef') -> 'BomRefDiscriminator':
183+
return cls(chain(
184+
map(lambda c: c.bom_ref, bom._get_all_components()),
185+
map(lambda s: s.bom_ref, bom.services),
186+
map(lambda v: v.bom_ref, bom.vulnerabilities)
187+
), prefix)

cyclonedx/output/json.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
SchemaVersion1Dot3,
3131
SchemaVersion1Dot4,
3232
)
33-
from . import BaseOutput
33+
from . import BaseOutput, BomRefDiscriminator
3434

3535
if TYPE_CHECKING: # pragma: no cover
3636
from ..model.bom import Bom
@@ -67,9 +67,10 @@ def generate(self, force_regeneration: bool = False) -> None:
6767
_view = SCHEMA_VERSIONS.get(self.schema_version_enum)
6868
bom = self.get_bom()
6969
bom.validate()
70-
bom_json: Dict[str, Any] = json_loads(
71-
bom.as_json( # type:ignore[attr-defined]
72-
view_=_view))
70+
with BomRefDiscriminator.from_bom(bom):
71+
bom_json: Dict[str, Any] = json_loads(
72+
bom.as_json( # type:ignore[attr-defined]
73+
view_=_view))
7374
bom_json.update(_json_core)
7475
self._bom_json = bom_json
7576
self.generated = True

cyclonedx/output/xml.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
SchemaVersion1Dot3,
3131
SchemaVersion1Dot4,
3232
)
33-
from . import BaseOutput
33+
from . import BaseOutput, BomRefDiscriminator
3434

3535
if TYPE_CHECKING: # pragma: no cover
3636
from ..model.bom import Bom
@@ -57,14 +57,16 @@ def generate(self, force_regeneration: bool = False) -> None:
5757
bom = self.get_bom()
5858
bom.validate()
5959
xmlns = self.get_target_namespace()
60-
self._bom_xml = '<?xml version="1.0" ?>\n' + xml_dumps(
61-
bom.as_xml( # type:ignore[attr-defined]
62-
_view, as_string=False, xmlns=xmlns),
63-
method='xml', default_namespace=xmlns, encoding='unicode',
64-
# `xml-declaration` is inconsistent/bugged in py38, especially on Windows it will print a non-UTF8 codepage.
65-
# Furthermore, it might add an encoding of "utf-8" which is redundant default value of XML.
66-
# -> so we write the declaration manually, as long as py38 is supported.
67-
xml_declaration=False)
60+
with BomRefDiscriminator.from_bom(bom):
61+
self._bom_xml = '<?xml version="1.0" ?>\n' + xml_dumps(
62+
bom.as_xml( # type:ignore[attr-defined]
63+
_view, as_string=False, xmlns=xmlns),
64+
method='xml', default_namespace=xmlns, encoding='unicode',
65+
# `xml-declaration` is inconsistent/bugged in py38,
66+
# especially on Windows it will print a non-UTF8 codepage.
67+
# Furthermore, it might add an encoding of "utf-8" which is redundant default value of XML.
68+
# -> so we write the declaration manually, as long as py38 is supported.
69+
xml_declaration=False)
6870

6971
self.generated = True
7072

tests/_data/models.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from datetime import datetime, timezone
2121
from decimal import Decimal
2222
from inspect import getmembers, isfunction
23-
from typing import Any, List, Optional
23+
from typing import Any, List, Optional, Tuple
2424
from uuid import UUID
2525

2626
# See https://github.com/package-url/packageurl-python/issues/65
@@ -754,6 +754,18 @@ def get_bom_with_multiple_licenses() -> Bom:
754754
)
755755

756756

757+
def bom_all_same_bomref() -> Tuple[Bom, int]:
758+
bom = Bom()
759+
bom.metadata.component = Component(name='root', bom_ref='foo', components=[
760+
Component(name='root.sub', bom_ref='foo')])
761+
bom.components.add(Component(name='comp', bom_ref='foo', components=[
762+
Component(name='comp.sub', bom_ref='foo')]))
763+
bom.services.add(Service(name='serv', bom_ref='foo'))
764+
bom.vulnerabilities.add(Vulnerability(id='vuln', bom_ref='foo'))
765+
nr_bomrefs = 6 # number of bom-refs used
766+
return bom, nr_bomrefs
767+
768+
757769
# ---
758770

759771

tests/test_output.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
from ddt import data, ddt, named_data, unpack
2525

2626
from cyclonedx.model.bom import Bom
27-
from cyclonedx.output import make_outputter
27+
from cyclonedx.model.bom_ref import BomRef
28+
from cyclonedx.output import BomRefDiscriminator, make_outputter
2829
from cyclonedx.schema import OutputFormat, SchemaVersion
2930

3031

@@ -49,3 +50,30 @@ def test_fails_on_wrong_args(self, of: OutputFormat, sv: SchemaVersion, raises_r
4950
bom = Mock(spec=Bom)
5051
with self.assertRaisesRegex(*raises_regex):
5152
make_outputter(bom, of, sv)
53+
54+
55+
class TestBomRefDiscriminator(TestCase):
56+
57+
def test_discriminate_and_reset_with(self) -> None:
58+
bomref1 = BomRef('djdlkfjdslkf')
59+
bomref2 = BomRef('djdlkfjdslkf')
60+
self.assertEqual(bomref1.value, bomref2.value, 'blank')
61+
discr = BomRefDiscriminator([bomref1, bomref2])
62+
self.assertEqual(bomref1.value, bomref2.value, 'init')
63+
discr.discriminate()
64+
self.assertNotEqual(bomref1.value, bomref2.value, 'should be discriminated')
65+
discr.reset()
66+
self.assertEqual('djdlkfjdslkf', bomref1.value)
67+
self.assertEqual('djdlkfjdslkf', bomref2.value)
68+
69+
def test_discriminate_and_reset_manually(self) -> None:
70+
bomref1 = BomRef('djdlkfjdslkf')
71+
bomref2 = BomRef('djdlkfjdslkf')
72+
self.assertEqual(bomref1.value, bomref2.value, 'blank')
73+
discr = BomRefDiscriminator([bomref1, bomref2])
74+
self.assertEqual(bomref1.value, bomref2.value, 'init')
75+
with discr:
76+
self.assertNotEqual(bomref1.value, bomref2.value, 'should be discriminated')
77+
discr.reset()
78+
self.assertEqual('djdlkfjdslkf', bomref1.value)
79+
self.assertEqual('djdlkfjdslkf', bomref2.value)

tests/test_output_json.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# SPDX-License-Identifier: Apache-2.0
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

18+
19+
import re
1820
from typing import Any, Callable
1921
from unittest import TestCase
2022
from unittest.mock import Mock, patch
@@ -29,7 +31,7 @@
2931
from cyclonedx.schema import OutputFormat, SchemaVersion
3032
from cyclonedx.validation.json import JsonStrictValidator
3133
from tests import SnapshotMixin, mksname, uuid_generator
32-
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid
34+
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid, bom_all_same_bomref
3335

3436
UNSUPPORTED_SV = frozenset((SchemaVersion.V1_1, SchemaVersion.V1_0,))
3537

@@ -80,6 +82,13 @@ def test_invalid(self, get_bom: Callable[[], Bom], sv: SchemaVersion) -> None:
8082
return None # expected
8183
raise error.exception
8284

85+
def test_bomref_not_duplicate(self) -> None:
86+
bom, nr_bomrefs = bom_all_same_bomref()
87+
output = BY_SCHEMA_VERSION[SchemaVersion.V1_4](bom).output_as_string()
88+
found = re.findall(r'"bom-ref":\s*"(.*?)"', output)
89+
self.assertEqual(nr_bomrefs, len(found))
90+
self.assertCountEqual(set(found), found, 'expected unique items')
91+
8392

8493
@ddt
8594
class TestFunctionalBySchemaVersion(TestCase):

tests/test_output_xml.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# SPDX-License-Identifier: Apache-2.0
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

18-
18+
import re
1919
from typing import Any, Callable
2020
from unittest import TestCase
2121
from unittest.mock import Mock, patch
@@ -29,7 +29,7 @@
2929
from cyclonedx.schema import OutputFormat, SchemaVersion
3030
from cyclonedx.validation.xml import XmlValidator
3131
from tests import SnapshotMixin, mksname, uuid_generator
32-
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid
32+
from tests._data.models import all_get_bom_funct_invalid, all_get_bom_funct_valid, bom_all_same_bomref
3333

3434

3535
@ddt
@@ -68,6 +68,13 @@ def test_invalid(self, get_bom: Callable[[], Bom], sv: SchemaVersion) -> None:
6868
return None # expected
6969
raise error.exception
7070

71+
def test_bomref_not_duplicate(self) -> None:
72+
bom, nr_bomrefs = bom_all_same_bomref()
73+
output = BY_SCHEMA_VERSION[SchemaVersion.V1_4](bom).output_as_string()
74+
found = re.findall(r'bom-ref="(.*?)"', output)
75+
self.assertEqual(nr_bomrefs, len(found))
76+
self.assertCountEqual(set(found), found, 'expected unique items')
77+
7178

7279
@ddt
7380
class TestFunctionalBySchemaVersion(TestCase):

0 commit comments

Comments
 (0)