Skip to content

Commit 65246dd

Browse files
authored
fix(conda-parser): version recognition for strings (#332)
conda packacge string parser no longer raises unexpected errors, if the build-number is non-numeric. fixes #331 Signed-off-by: Jan Kowalleck <jan.kowalleck@gmail.com>
1 parent 5e344e2 commit 65246dd

File tree

5 files changed

+181
-41
lines changed

5 files changed

+181
-41
lines changed

cyclonedx_py/utils/conda.py

Lines changed: 56 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,14 @@
2020
import json
2121
import sys
2222
from json import JSONDecodeError
23-
from typing import Optional
23+
from typing import Optional, Tuple
24+
from urllib.parse import urlparse
2425

2526
if sys.version_info >= (3, 8):
2627
from typing import TypedDict
2728
else:
2829
from typing_extensions import TypedDict
2930

30-
from urllib.parse import urlparse
31-
3231

3332
class CondaPackage(TypedDict):
3433
"""
@@ -72,56 +71,72 @@ def parse_conda_list_str_to_conda_package(conda_list_str: str) -> Optional[Conda
7271

7372
line = conda_list_str.strip()
7473

75-
if line[0:1] == '#' or line[0:1] == '@' or len(line) == 0:
74+
if '' == line or line[0] in ['#', '@']:
7675
# Skip comments, @EXPLICT or empty lines
7776
return None
7877

7978
# Remove any hash
8079
package_hash = None
8180
if '#' in line:
82-
hash_parts = line.split('#')
83-
if len(hash_parts) > 1:
84-
package_hash = hash_parts.pop()
85-
line = ''.join(hash_parts)
81+
*_line_parts, package_hash = line.split('#')
82+
line = ''.join(*_line_parts)
8683

8784
package_parts = line.split('/')
88-
package_name_version_build_string = package_parts.pop()
89-
package_arch = package_parts.pop()
90-
package_url = urlparse('/'.join(package_parts))
85+
if len(package_parts) < 2:
86+
raise ValueError(f'Unexpected format in {package_parts}')
87+
*_package_url_parts, package_arch, package_name_version_build_string = package_parts
88+
package_url = urlparse('/'.join(_package_url_parts))
9189

92-
try:
93-
package_nvbs_parts = package_name_version_build_string.split('-')
94-
build_number_with_opt_string = package_nvbs_parts.pop()
95-
if '.' in build_number_with_opt_string:
96-
# Remove any .conda at the end if present or other package type eg .tar.gz
97-
pos = build_number_with_opt_string.find('.')
98-
build_number_with_opt_string = build_number_with_opt_string[0:pos]
99-
100-
build_string: str
101-
build_number: Optional[int]
102-
103-
if '_' in build_number_with_opt_string:
104-
bnbs_parts = build_number_with_opt_string.split('_')
105-
# Build number will be the last part - check if it's an integer
106-
# Updated logic given https://github.com/CycloneDX/cyclonedx-python-lib/issues/65
107-
candidate_build_number: str = bnbs_parts.pop()
108-
if candidate_build_number.isdigit():
109-
build_number = int(candidate_build_number)
110-
build_string = build_number_with_opt_string
111-
else:
112-
build_number = None
113-
build_string = build_number_with_opt_string
114-
else:
115-
build_string = ''
116-
build_number = int(build_number_with_opt_string)
117-
118-
build_version = package_nvbs_parts.pop()
119-
package_name = '-'.join(package_nvbs_parts)
120-
except IndexError as e:
121-
raise ValueError(f'Error parsing {package_nvbs_parts} from {conda_list_str}') from e
90+
package_name, build_version, build_string = split_package_string(package_name_version_build_string)
91+
build_string, build_number = split_package_build_string(build_string)
12292

12393
return CondaPackage(
12494
base_url=package_url.geturl(), build_number=build_number, build_string=build_string,
12595
channel=package_url.path[1:], dist_name=f'{package_name}-{build_version}-{build_string}',
12696
name=package_name, platform=package_arch, version=build_version, md5_hash=package_hash
12797
)
98+
99+
100+
def split_package_string(package_name_version_build_string: str) -> Tuple[str, str, str]:
101+
"""Helper method for parsing package_name_version_build_string.
102+
103+
Returns:
104+
Tuple (package_name, build_version, build_string)
105+
"""
106+
package_nvbs_parts = package_name_version_build_string.split('-')
107+
if len(package_nvbs_parts) < 3:
108+
raise ValueError(f'Unexpected format in {package_nvbs_parts}')
109+
110+
*_package_name_parts, build_version, build_string = package_nvbs_parts
111+
package_name = '-'.join(_package_name_parts)
112+
113+
_pos = build_string.find('.')
114+
if _pos >= 0:
115+
# Remove any .conda at the end if present or other package type eg .tar.gz
116+
build_string = build_string[0:_pos]
117+
118+
return package_name, build_version, build_string
119+
120+
121+
def split_package_build_string(build_string: str) -> Tuple[str, Optional[int]]:
122+
"""Helper method for parsing build_string.
123+
124+
Returns:
125+
Tuple (build_string, build_number)
126+
"""
127+
128+
if '' == build_string:
129+
return '', None
130+
131+
if build_string.isdigit():
132+
return '', int(build_string)
133+
134+
_pos = build_string.rindex('_') if '_' in build_string else -1
135+
if _pos >= 1:
136+
# Build number will be the last part - check if it's an integer
137+
# Updated logic given https://github.com/CycloneDX/cyclonedx-python-lib/issues/65
138+
build_number = build_string[_pos + 1:]
139+
if build_number.isdigit():
140+
return build_string, int(build_number)
141+
142+
return build_string, None
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# This package list id malformed.
2+
https://repo.anaconda.com/pkgs/main/linux-64/malformed_source.conda
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# This file is part of https://github.com/CycloneDX/cyclonedx-python/issues/331
2+
3+
# This file may be used to create an environment using:
4+
# $ conda create --name <env> --file <this file>
5+
# platform: linux-64
6+
@EXPLICIT
7+
https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda
8+
https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2022.2.1-h06a4308_0.conda
9+
https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.35.1-h7274673_9.conda
10+
https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-9.3.0-hd4cf53a_17.conda
11+
https://repo.anaconda.com/pkgs/main/noarch/tzdata-2021e-hda174b7_0.conda
12+
https://repo.anaconda.com/pkgs/main/linux-64/libgomp-9.3.0-h5101ec6_17.conda
13+
https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-4.5-1_gnu.tar.bz2
14+
https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-9.3.0-h5101ec6_17.conda
15+
https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda
16+
https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.3-h7f8727e_2.conda
17+
https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1m-h7f8727e_0.conda
18+
https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.5-h7b6447c_0.conda
19+
https://repo.anaconda.com/pkgs/main/linux-64/yaml-0.2.5-h7b6447c_0.conda
20+
https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.11-h7f8727e_4.conda
21+
https://repo.anaconda.com/pkgs/main/linux-64/readline-8.1.2-h7f8727e_1.conda
22+
https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.11-h1ccaba5_0.conda
23+
https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.38.0-hc218d9a_0.conda
24+
https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.7-h12debd9_1.conda
25+
https://repo.anaconda.com/pkgs/main/linux-64/certifi-2021.10.8-py39h06a4308_2.conda
26+
https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda
27+
https://repo.anaconda.com/pkgs/main/noarch/colorama-0.4.4-pyhd3eb1b0_0.conda
28+
https://repo.anaconda.com/pkgs/main/noarch/idna-3.3-pyhd3eb1b0_0.conda
29+
https://repo.anaconda.com/pkgs/main/linux-64/pycosat-0.6.3-py39h27cfd23_0.conda
30+
https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda
31+
https://repo.anaconda.com/pkgs/main/linux-64/pysocks-1.7.1-py39h06a4308_0.conda
32+
https://repo.anaconda.com/pkgs/main/linux-64/ruamel_yaml-0.15.100-py39h27cfd23_0.conda
33+
https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda
34+
https://repo.anaconda.com/pkgs/main/noarch/wheel-0.37.1-pyhd3eb1b0_0.conda
35+
https://repo.anaconda.com/pkgs/main/linux-64/cffi-1.15.0-py39hd667e15_1.conda
36+
https://repo.anaconda.com/pkgs/main/linux-64/setuptools-58.0.4-py39h06a4308_0.conda
37+
https://repo.anaconda.com/pkgs/main/noarch/tqdm-4.63.0-pyhd3eb1b0_0.conda
38+
https://repo.anaconda.com/pkgs/main/linux-64/brotlipy-0.7.0-py39h27cfd23_1003.conda
39+
https://repo.anaconda.com/pkgs/main/linux-64/conda-package-handling-1.7.3-py39h27cfd23_1.conda
40+
https://repo.anaconda.com/pkgs/main/linux-64/cryptography-36.0.0-py39h9ce1e76_0.conda
41+
https://repo.anaconda.com/pkgs/main/linux-64/pip-21.2.4-py39h06a4308_0.conda
42+
https://repo.anaconda.com/pkgs/main/noarch/pyopenssl-22.0.0-pyhd3eb1b0_0.conda
43+
https://repo.anaconda.com/pkgs/main/noarch/urllib3-1.26.8-pyhd3eb1b0_0.conda
44+
https://repo.anaconda.com/pkgs/main/noarch/requests-2.27.1-pyhd3eb1b0_0.conda
45+
https://repo.anaconda.com/pkgs/main/linux-64/conda-4.12.0-py39h06a4308_0.conda

tests/test_parser_conda.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# Copyright (c) OWASP Foundation. All Rights Reserved.
1919

2020
import os
21+
import re
2122
from unittest import TestCase
2223

2324
from cyclonedx_py.parser.conda import CondaListExplicitParser, CondaListJsonParser
@@ -54,3 +55,32 @@ def test_conda_list_explicit_md5(self) -> None:
5455
self.assertEqual('2.10', c_noarch.version)
5556
self.assertEqual(1, len(c_noarch.external_references))
5657
self.assertEqual(0, len(c_noarch.external_references.pop().hashes))
58+
59+
def test_conda_list_build_number_text(self) -> None:
60+
conda_list_output_file = os.path.join(os.path.dirname(__file__), 'fixtures/conda-list-build-number-text.txt')
61+
62+
with (open(conda_list_output_file, 'r')) as conda_list_ouptut_fh:
63+
parser = CondaListExplicitParser(conda_data=conda_list_ouptut_fh.read())
64+
65+
self.assertEqual(39, parser.component_count())
66+
components = parser.get_components()
67+
68+
c_libgcc_mutex = next(filter(lambda c: c.name == '_libgcc_mutex', components), None)
69+
self.assertIsNotNone(c_libgcc_mutex)
70+
self.assertEqual('_libgcc_mutex', c_libgcc_mutex.name)
71+
self.assertEqual('0.1', c_libgcc_mutex.version)
72+
c_pycparser = next(filter(lambda c: c.name == 'pycparser', components), None)
73+
self.assertIsNotNone(c_pycparser)
74+
self.assertEqual('pycparser', c_pycparser.name)
75+
self.assertEqual('2.21', c_pycparser.version)
76+
c_openmp_mutex = next(filter(lambda c: c.name == '_openmp_mutex', components), None)
77+
self.assertIsNotNone(c_openmp_mutex)
78+
self.assertEqual('_openmp_mutex', c_openmp_mutex.name)
79+
self.assertEqual('4.5', c_openmp_mutex.version)
80+
81+
def test_conda_list_malformed(self) -> None:
82+
conda_list_output_file = os.path.join(os.path.dirname(__file__), 'fixtures/conda-list-broken.txt')
83+
84+
with (open(conda_list_output_file, 'r')) as conda_list_ouptut_fh:
85+
with self.assertRaisesRegex(ValueError, re.compile(r'^unexpected format', re.IGNORECASE)):
86+
CondaListExplicitParser(conda_data=conda_list_ouptut_fh.read())

tests/test_utils_conda.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,51 @@ def test_parse_conda_list_str_with_hash_4(self) -> None:
129129
self.assertEqual(cp['platform'], 'linux-64')
130130
self.assertEqual(cp['version'], '0.1')
131131
self.assertEqual(cp['md5_hash'], 'd7c89558ba9fa0495403155b64376d81')
132+
133+
def test_parse_conda_list_build_number(self) -> None:
134+
cp: CondaPackage = parse_conda_list_str_to_conda_package(
135+
conda_list_str='https://repo.anaconda.com/pkgs/main/osx-64/chardet-4.0.0-py39hecd8cb5_1003.conda'
136+
)
137+
138+
self.assertIsInstance(cp, dict)
139+
self.assertEqual('https://repo.anaconda.com/pkgs/main', cp['base_url'])
140+
self.assertEqual(1003, cp['build_number'])
141+
self.assertEqual('py39hecd8cb5_1003', cp['build_string'])
142+
self.assertEqual('pkgs/main', cp['channel'])
143+
self.assertEqual('chardet-4.0.0-py39hecd8cb5_1003', cp['dist_name'])
144+
self.assertEqual('chardet', cp['name'])
145+
self.assertEqual('osx-64', cp['platform'])
146+
self.assertEqual('4.0.0', cp['version'])
147+
self.assertIsNone(cp['md5_hash'])
148+
149+
def test_parse_conda_list_no_build_number(self) -> None:
150+
cp: CondaPackage = parse_conda_list_str_to_conda_package(
151+
conda_list_str='https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda'
152+
)
153+
154+
self.assertIsInstance(cp, dict)
155+
self.assertEqual('https://repo.anaconda.com/pkgs/main', cp['base_url'])
156+
self.assertEqual(None, cp['build_number'])
157+
self.assertEqual('main', cp['build_string'])
158+
self.assertEqual('pkgs/main', cp['channel'])
159+
self.assertEqual('_libgcc_mutex-0.1-main', cp['dist_name'])
160+
self.assertEqual('_libgcc_mutex', cp['name'])
161+
self.assertEqual('linux-64', cp['platform'])
162+
self.assertEqual('0.1', cp['version'])
163+
self.assertIsNone(cp['md5_hash'])
164+
165+
def test_parse_conda_list_no_build_number2(self) -> None:
166+
cp: CondaPackage = parse_conda_list_str_to_conda_package(
167+
conda_list_str='https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-4.5-1_gnu.tar.bz2'
168+
)
169+
170+
self.assertIsInstance(cp, dict)
171+
self.assertEqual('https://repo.anaconda.com/pkgs/main', cp['base_url'])
172+
self.assertEqual(None, cp['build_number'])
173+
self.assertEqual('1_gnu', cp['build_string'])
174+
self.assertEqual('pkgs/main', cp['channel'])
175+
self.assertEqual('_openmp_mutex-4.5-1_gnu', cp['dist_name'])
176+
self.assertEqual('_openmp_mutex', cp['name'])
177+
self.assertEqual('linux-64', cp['platform'])
178+
self.assertEqual('4.5', cp['version'])
179+
self.assertIsNone(cp['md5_hash'])

0 commit comments

Comments
 (0)