Skip to content

Commit 072c8f1

Browse files
authored
feat: Update purl to match specification when ingesting packages from Conda - thanks to @RodneyRichardson
2 parents b028c2b + 2999022 commit 072c8f1

File tree

4 files changed

+69
-23
lines changed

4 files changed

+69
-23
lines changed

cyclonedx_py/parser/conda.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,12 @@
2525
from cyclonedx.model.component import Component
2626
from cyclonedx.parser import BaseParser
2727

28-
# See https://github.com/package-url/packageurl-python/issues/65
29-
from packageurl import PackageURL # type: ignore
30-
31-
from ..utils.conda import CondaPackage, parse_conda_json_to_conda_package, parse_conda_list_str_to_conda_package
28+
from ..utils.conda import (
29+
CondaPackage,
30+
conda_package_to_purl,
31+
parse_conda_json_to_conda_package,
32+
parse_conda_list_str_to_conda_package,
33+
)
3234

3335

3436
class _BaseCondaParser(BaseParser, metaclass=ABCMeta):
@@ -60,11 +62,10 @@ def _conda_packages_to_components(self) -> None:
6062
6163
"""
6264
for conda_package in self._conda_packages:
65+
purl = conda_package_to_purl(conda_package)
6366
c = Component(
64-
name=conda_package['name'], version=str(conda_package['version']),
65-
purl=PackageURL(
66-
type='pypi', name=conda_package['name'], version=str(conda_package['version'])
67-
)
67+
name=conda_package['name'], version=conda_package['version'],
68+
purl=purl
6869
)
6970
c.external_references.add(ExternalReference(
7071
reference_type=ExternalReferenceType.DISTRIBUTION,

cyclonedx_py/utils/conda.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@
2323
from typing import Optional, Tuple
2424
from urllib.parse import urlparse
2525

26+
# See https://github.com/package-url/packageurl-python/issues/65
27+
from packageurl import PackageURL # type: ignore
28+
2629
if sys.version_info >= (3, 8):
2730
from typing import TypedDict
2831
else:
@@ -41,9 +44,29 @@ class CondaPackage(TypedDict):
4144
name: str
4245
platform: str
4346
version: str
47+
package_format: Optional[str]
4448
md5_hash: Optional[str]
4549

4650

51+
def conda_package_to_purl(pkg: CondaPackage) -> PackageURL:
52+
"""
53+
Return the purl for the specified package.
54+
See https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst#conda
55+
"""
56+
qualifiers = {
57+
'build': pkg['build_string'],
58+
'channel': pkg['channel'],
59+
'subdir': pkg['platform'],
60+
}
61+
if pkg['package_format'] is not None:
62+
qualifiers['type'] = str(pkg['package_format'])
63+
64+
purl = PackageURL(
65+
type='conda', name=pkg['name'], version=pkg['version'], qualifiers=qualifiers
66+
)
67+
return purl
68+
69+
4770
def parse_conda_json_to_conda_package(conda_json_str: str) -> Optional[CondaPackage]:
4871
try:
4972
package_data = json.loads(conda_json_str)
@@ -53,6 +76,7 @@ def parse_conda_json_to_conda_package(conda_json_str: str) -> Optional[CondaPack
5376
if not isinstance(package_data, dict):
5477
return None
5578

79+
package_data.setdefault('package_format', None)
5680
package_data.setdefault('md5_hash', None)
5781
return CondaPackage(package_data) # type: ignore # @FIXME write proper type safe dict at this point
5882

@@ -87,17 +111,18 @@ def parse_conda_list_str_to_conda_package(conda_list_str: str) -> Optional[Conda
87111
*_package_url_parts, package_arch, package_name_version_build_string = package_parts
88112
package_url = urlparse('/'.join(_package_url_parts))
89113

90-
package_name, build_version, build_string = split_package_string(package_name_version_build_string)
114+
package_name, build_version, build_string, package_format = split_package_string(package_name_version_build_string)
91115
build_string, build_number = split_package_build_string(build_string)
92116

93117
return CondaPackage(
94118
base_url=package_url.geturl(), build_number=build_number, build_string=build_string,
95119
channel=package_url.path[1:], dist_name=f'{package_name}-{build_version}-{build_string}',
96-
name=package_name, platform=package_arch, version=build_version, md5_hash=package_hash
120+
name=package_name, platform=package_arch, version=build_version, package_format=package_format,
121+
md5_hash=package_hash
97122
)
98123

99124

100-
def split_package_string(package_name_version_build_string: str) -> Tuple[str, str, str]:
125+
def split_package_string(package_name_version_build_string: str) -> Tuple[str, str, str, str]:
101126
"""Helper method for parsing package_name_version_build_string.
102127
103128
Returns:
@@ -110,12 +135,12 @@ def split_package_string(package_name_version_build_string: str) -> Tuple[str, s
110135
*_package_name_parts, build_version, build_string = package_nvbs_parts
111136
package_name = '-'.join(_package_name_parts)
112137

138+
# Split package_format (.conda or .tar.gz) at the end
113139
_pos = build_string.find('.')
114-
if _pos >= 0:
115-
# Remove any .conda at the end if present or other package type eg .tar.gz
116-
build_string = build_string[0:_pos]
140+
package_format = build_string[_pos + 1:]
141+
build_string = build_string[0:_pos]
117142

118-
return package_name, build_version, build_string
143+
return package_name, build_version, build_string, package_format
119144

120145

121146
def split_package_build_string(build_string: str) -> Tuple[str, Optional[int]]:

tests/test_parser_conda.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,10 @@
2929
class TestCondaParser(TestCase):
3030

3131
def test_conda_list_json(self) -> None:
32-
conda_list_ouptut_file = os.path.join(os.path.dirname(__file__),
32+
conda_list_output_file = os.path.join(os.path.dirname(__file__),
3333
'fixtures/conda-list-output.json')
3434

35-
with (open(conda_list_ouptut_file, 'r')) as conda_list_output_fh:
35+
with (open(conda_list_output_file, 'r')) as conda_list_output_fh:
3636
parser = CondaListJsonParser(conda_data=conda_list_output_fh.read())
3737

3838
self.assertEqual(34, parser.component_count())
@@ -42,15 +42,17 @@ def test_conda_list_json(self) -> None:
4242
self.assertIsNotNone(c_idna)
4343
self.assertEqual('idna', c_idna.name)
4444
self.assertEqual('2.10', c_idna.version)
45+
self.assertEqual('pkg:conda/idna@2.10?build=pyhd3eb1b0_0&channel=pkgs/main&subdir=noarch',
46+
c_idna.purl.to_string())
4547
self.assertEqual(1, len(c_idna.external_references), f'{c_idna.external_references}')
4648
self.assertEqual(0, len(c_idna.external_references.pop().hashes))
4749
self.assertEqual(0, len(c_idna.hashes), f'{c_idna.hashes}')
4850

4951
def test_conda_list_explicit_md5(self) -> None:
50-
conda_list_ouptut_file = os.path.join(os.path.dirname(__file__),
52+
conda_list_output_file = os.path.join(os.path.dirname(__file__),
5153
'fixtures/conda-list-explicit-md5.txt')
5254

53-
with (open(conda_list_ouptut_file, 'r')) as conda_list_output_fh:
55+
with (open(conda_list_output_file, 'r')) as conda_list_output_fh:
5456
parser = CondaListExplicitParser(conda_data=conda_list_output_fh.read())
5557

5658
self.assertEqual(34, parser.component_count())
@@ -60,6 +62,8 @@ def test_conda_list_explicit_md5(self) -> None:
6062
self.assertIsNotNone(c_idna)
6163
self.assertEqual('idna', c_idna.name)
6264
self.assertEqual('2.10', c_idna.version)
65+
self.assertEqual('pkg:conda/idna@2.10?build=pyhd3eb1b0_0&channel=pkgs/main&subdir=noarch&type=tar.bz2',
66+
c_idna.purl.to_string())
6367
self.assertEqual(1, len(c_idna.external_references), f'{c_idna.external_references}')
6468
self.assertEqual(0, len(c_idna.external_references.pop().hashes))
6569
self.assertEqual(1, len(c_idna.hashes), f'{c_idna.hashes}')
@@ -70,8 +74,8 @@ def test_conda_list_explicit_md5(self) -> None:
7074
def test_conda_list_build_number_text(self) -> None:
7175
conda_list_output_file = os.path.join(os.path.dirname(__file__), 'fixtures/conda-list-build-number-text.txt')
7276

73-
with (open(conda_list_output_file, 'r')) as conda_list_ouptut_fh:
74-
parser = CondaListExplicitParser(conda_data=conda_list_ouptut_fh.read())
77+
with (open(conda_list_output_file, 'r')) as conda_list_output_fh:
78+
parser = CondaListExplicitParser(conda_data=conda_list_output_fh.read())
7579

7680
self.assertEqual(39, parser.component_count())
7781
components = parser.get_components()
@@ -80,21 +84,29 @@ def test_conda_list_build_number_text(self) -> None:
8084
self.assertIsNotNone(c_libgcc_mutex)
8185
self.assertEqual('_libgcc_mutex', c_libgcc_mutex.name)
8286
self.assertEqual('0.1', c_libgcc_mutex.version)
87+
self.assertEqual('pkg:conda/_libgcc_mutex@0.1?build=main&channel=pkgs/main&subdir=linux-64&type=conda',
88+
c_libgcc_mutex.purl.to_string())
8389
self.assertEqual(0, len(c_libgcc_mutex.hashes), f'{c_libgcc_mutex.hashes}')
90+
8491
c_pycparser = next(filter(lambda c: c.name == 'pycparser', components), None)
8592
self.assertIsNotNone(c_pycparser)
8693
self.assertEqual('pycparser', c_pycparser.name)
8794
self.assertEqual('2.21', c_pycparser.version)
95+
self.assertEqual('pkg:conda/pycparser@2.21?build=pyhd3eb1b0_0&channel=pkgs/main&subdir=noarch&type=conda',
96+
c_pycparser.purl.to_string())
8897
self.assertEqual(0, len(c_pycparser.hashes), f'{c_pycparser.hashes}')
98+
8999
c_openmp_mutex = next(filter(lambda c: c.name == '_openmp_mutex', components), None)
90100
self.assertIsNotNone(c_openmp_mutex)
91101
self.assertEqual('_openmp_mutex', c_openmp_mutex.name)
92102
self.assertEqual('4.5', c_openmp_mutex.version)
103+
self.assertEqual('pkg:conda/_openmp_mutex@4.5?build=1_gnu&channel=pkgs/main&subdir=linux-64&type=tar.bz2',
104+
c_openmp_mutex.purl.to_string())
93105
self.assertEqual(0, len(c_openmp_mutex.hashes), f'{c_openmp_mutex.hashes}')
94106

95107
def test_conda_list_malformed(self) -> None:
96108
conda_list_output_file = os.path.join(os.path.dirname(__file__), 'fixtures/conda-list-broken.txt')
97109

98-
with (open(conda_list_output_file, 'r')) as conda_list_ouptut_fh:
110+
with (open(conda_list_output_file, 'r')) as conda_list_output_fh:
99111
with self.assertRaisesRegex(ValueError, re.compile(r'^unexpected format', re.IGNORECASE)):
100-
CondaListExplicitParser(conda_data=conda_list_ouptut_fh.read())
112+
CondaListExplicitParser(conda_data=conda_list_output_fh.read())

tests/test_utils_conda.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def test_parse_conda_list_str_no_hash(self) -> None:
6060
self.assertEqual('chardet', cp['name'])
6161
self.assertEqual('osx-64', cp['platform'])
6262
self.assertEqual('4.0.0', cp['version'])
63+
self.assertEqual('conda', cp['package_format'])
6364
self.assertIsNone(cp['md5_hash'])
6465

6566
def test_parse_conda_list_str_with_hash_1(self) -> None:
@@ -77,6 +78,7 @@ def test_parse_conda_list_str_with_hash_1(self) -> None:
7778
self.assertEqual('tzdata', cp['name'])
7879
self.assertEqual('noarch', cp['platform'])
7980
self.assertEqual('2021a', cp['version'], )
81+
self.assertEqual('conda', cp['package_format'])
8082
self.assertEqual('d42e4db918af84a470286e4c300604a3', cp['md5_hash'])
8183

8284
def test_parse_conda_list_str_with_hash_2(self) -> None:
@@ -94,6 +96,7 @@ def test_parse_conda_list_str_with_hash_2(self) -> None:
9496
self.assertEqual('ca-certificates', cp['name'])
9597
self.assertEqual('osx-64', cp['platform'])
9698
self.assertEqual('2021.7.5', cp['version'], )
99+
self.assertEqual('conda', cp['package_format'])
97100
self.assertEqual('c2d0ae65c08dacdcf86770b7b5bbb187', cp['md5_hash'])
98101

99102
def test_parse_conda_list_str_with_hash_3(self) -> None:
@@ -111,6 +114,7 @@ def test_parse_conda_list_str_with_hash_3(self) -> None:
111114
self.assertEqual('idna', cp['name'])
112115
self.assertEqual('noarch', cp['platform'])
113116
self.assertEqual('2.10', cp['version'], )
117+
self.assertEqual('tar.bz2', cp['package_format'])
114118
self.assertEqual('153ff132f593ea80aae2eea61a629c92', cp['md5_hash'])
115119

116120
def test_parse_conda_list_str_with_hash_4(self) -> None:
@@ -128,6 +132,7 @@ def test_parse_conda_list_str_with_hash_4(self) -> None:
128132
self.assertEqual('_libgcc_mutex', cp['name'])
129133
self.assertEqual('linux-64', cp['platform'])
130134
self.assertEqual('0.1', cp['version'])
135+
self.assertEqual('tar.bz2', cp['package_format'])
131136
self.assertEqual('d7c89558ba9fa0495403155b64376d81', cp['md5_hash'])
132137

133138
def test_parse_conda_list_build_number(self) -> None:
@@ -144,6 +149,7 @@ def test_parse_conda_list_build_number(self) -> None:
144149
self.assertEqual('chardet', cp['name'])
145150
self.assertEqual('osx-64', cp['platform'])
146151
self.assertEqual('4.0.0', cp['version'])
152+
self.assertEqual('conda', cp['package_format'])
147153
self.assertIsNone(cp['md5_hash'])
148154

149155
def test_parse_conda_list_no_build_number(self) -> None:
@@ -160,6 +166,7 @@ def test_parse_conda_list_no_build_number(self) -> None:
160166
self.assertEqual('_libgcc_mutex', cp['name'])
161167
self.assertEqual('linux-64', cp['platform'])
162168
self.assertEqual('0.1', cp['version'])
169+
self.assertEqual('conda', cp['package_format'])
163170
self.assertIsNone(cp['md5_hash'])
164171

165172
def test_parse_conda_list_no_build_number2(self) -> None:
@@ -176,4 +183,5 @@ def test_parse_conda_list_no_build_number2(self) -> None:
176183
self.assertEqual('_openmp_mutex', cp['name'])
177184
self.assertEqual('linux-64', cp['platform'])
178185
self.assertEqual('4.5', cp['version'])
186+
self.assertEqual('tar.bz2', cp['package_format'])
179187
self.assertIsNone(cp['md5_hash'])

0 commit comments

Comments
 (0)