Skip to content

Commit 5fce14d

Browse files
Improve conda packages and dependencies parsing
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 93ca65c commit 5fce14d

19 files changed

+2932
-156
lines changed

src/packagedcode/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@
7373
cocoapods.PodfileLockHandler,
7474
cocoapods.PodfileHandler,
7575

76-
conda.CondaYamlHandler,
7776
conda.CondaMetaYamlHandler,
77+
conda.CondaYamlHandler,
7878

7979
conan.ConanFileHandler,
8080
conan.ConanDataHandler,

src/packagedcode/conda.py

Lines changed: 163 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from packagedcode import models
1616
from packagedcode.pypi import BaseDependencyFileHandler
17+
from dparse2.parser import parse_requirement_line
1718

1819
"""
1920
Handle Conda manifests and metadata, see https://docs.conda.io/en/latest/
@@ -23,18 +24,37 @@
2324
"""
2425

2526
# TODO: there are likely other package data files for Conda
26-
# TODO: report platform
27-
2827

2928
class CondaYamlHandler(BaseDependencyFileHandler):
30-
# TODO: there are several other manifests worth adding
3129
datasource_id = 'conda_yaml'
32-
path_patterns = ('*conda.yaml', '*conda.yml',)
33-
default_package_type = 'pypi'
30+
path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
31+
default_package_type = 'conda'
3432
default_primary_language = 'Python'
3533
description = 'Conda yaml manifest'
3634
documentation_url = 'https://docs.conda.io/'
3735

36+
@classmethod
37+
def parse(cls, location, package_only=False):
38+
with open(location) as fi:
39+
conda_data = saneyaml.load(fi.read())
40+
dependencies = get_conda_yaml_dependencies(conda_data=conda_data)
41+
name = conda_data.get('name')
42+
extra_data = {}
43+
channels = conda_data.get('channels')
44+
if channels:
45+
extra_data['channels'] = channels
46+
if name or dependencies:
47+
package_data = dict(
48+
datasource_id=cls.datasource_id,
49+
type=cls.default_package_type,
50+
name=name,
51+
primary_language=cls.default_primary_language,
52+
dependencies=dependencies,
53+
extra_data=extra_data,
54+
is_private=True,
55+
)
56+
yield models.PackageData.from_data(package_data, package_only)
57+
3858

3959
class CondaMetaYamlHandler(models.DatafileHandler):
4060
datasource_id = 'conda_meta_yaml'
@@ -83,9 +103,7 @@ def parse(cls, location, package_only=False):
83103
metayaml = get_meta_yaml_data(location)
84104
package_element = metayaml.get('package') or {}
85105
package_name = package_element.get('name')
86-
if not package_name:
87-
return
88-
version = package_element.get('version')
106+
package_version = package_element.get('version')
89107

90108
# FIXME: source is source, not download
91109
source = metayaml.get('source') or {}
@@ -99,6 +117,7 @@ def parse(cls, location, package_only=False):
99117
vcs_url = about.get('dev_url')
100118

101119
dependencies = []
120+
extra_data = {}
102121
requirements = metayaml.get('requirements') or {}
103122
for scope, reqs in requirements.items():
104123
# requirements format is like:
@@ -107,33 +126,152 @@ def parse(cls, location, package_only=False):
107126
# u'progressbar2', u'python >=3.6'])])
108127
for req in reqs:
109128
name, _, requirement = req.partition(" ")
110-
purl = PackageURL(type=cls.default_package_type, name=name)
129+
version = None
130+
if requirement.startswith("=="):
131+
_, version = requirement.split("==")
132+
133+
# requirements may have namespace, version too
134+
# - conda-forge::numpy=1.15.4
135+
namespace = None
136+
if "::" in name:
137+
namespace, name = name.split("::")
138+
139+
is_pinned = False
140+
if "=" in name:
141+
name, version = name.split("=")
142+
is_pinned = True
143+
requirement = f"={version}"
144+
145+
if name in ('pip', 'python'):
146+
if not scope in extra_data:
147+
extra_data[scope] = [req]
148+
else:
149+
extra_data[scope].append(req)
150+
continue
151+
152+
purl = PackageURL(
153+
type=cls.default_package_type,
154+
name=name,
155+
namespace=namespace,
156+
version=version,
157+
)
158+
if "run" in scope:
159+
is_runtime = True
160+
is_optional = False
161+
else:
162+
is_runtime = False
163+
is_optional = True
164+
111165
dependencies.append(
112166
models.DependentPackage(
113167
purl=purl.to_string(),
114168
extracted_requirement=requirement,
115169
scope=scope,
116-
is_runtime=True,
117-
is_optional=False,
170+
is_runtime=is_runtime,
171+
is_optional=is_optional,
172+
is_pinned=is_pinned,
173+
is_direct=True,
118174
)
119175
)
120176

121177
package_data = dict(
122178
datasource_id=cls.datasource_id,
123179
type=cls.default_package_type,
124180
name=package_name,
125-
version=version,
181+
version=package_version,
126182
download_url=download_url,
127183
homepage_url=homepage_url,
128184
vcs_url=vcs_url,
129185
description=description,
130186
sha256=sha256,
131187
extracted_license_statement=extracted_license_statement,
132188
dependencies=dependencies,
189+
extra_data=extra_data,
133190
)
134191
yield models.PackageData.from_data(package_data, package_only)
135192

136193

194+
def get_conda_yaml_dependencies(conda_data):
195+
"""
196+
Return a list of DependentPackage mappins from conda and pypi
197+
dependencies present in a `conda_data` mapping.
198+
"""
199+
dependencies = conda_data.get('dependencies') or []
200+
deps = []
201+
for dep in dependencies:
202+
if isinstance(dep, str):
203+
namespace = None
204+
specs = None
205+
is_pinned = False
206+
207+
if "::" in dep:
208+
namespace, dep = dep.split("::")
209+
210+
req = parse_requirement_line(dep)
211+
if req:
212+
name = req.name
213+
version = None
214+
215+
specs = str(req.specs)
216+
if '==' in specs:
217+
version = specs.replace('==','')
218+
is_pinned = True
219+
purl = PackageURL(type='pypi', name=name, version=version)
220+
else:
221+
if "=" in dep:
222+
dep, version = dep.split("=")
223+
is_pinned = True
224+
specs = f"={version}"
225+
226+
purl = PackageURL(
227+
type='conda',
228+
namespace=namespace,
229+
name=dep,
230+
version=version,
231+
)
232+
233+
if purl.name in ('pip', 'python'):
234+
continue
235+
236+
deps.append(
237+
models.DependentPackage(
238+
purl=purl.to_string(),
239+
extracted_requirement=specs,
240+
scope='dependencies',
241+
is_runtime=True,
242+
is_optional=False,
243+
is_pinned=is_pinned,
244+
is_direct=True,
245+
).to_dict()
246+
)
247+
248+
elif isinstance(dep, dict):
249+
for line in dep.get('pip', []):
250+
req = parse_requirement_line(line)
251+
if req:
252+
name = req.name
253+
version = None
254+
is_pinned = False
255+
specs = str(req.specs)
256+
if '==' in specs:
257+
version = specs.replace('==','')
258+
is_pinned = True
259+
purl = PackageURL(type='pypi', name=name, version=version)
260+
deps.append(
261+
models.DependentPackage(
262+
purl=purl.to_string(),
263+
extracted_requirement=specs,
264+
scope='dependencies',
265+
is_runtime=True,
266+
is_optional=False,
267+
is_pinned=is_pinned,
268+
is_direct=True,
269+
).to_dict()
270+
)
271+
272+
return deps
273+
274+
137275
def get_meta_yaml_data(location):
138276
"""
139277
Return a mapping of conda metadata loaded from a meta.yaml files. The format
@@ -158,10 +296,21 @@ def get_meta_yaml_data(location):
158296
# Replace the variable with the value
159297
if '{{' in line and '}}' in line:
160298
for variable, value in variables.items():
161-
line = line.replace('{{ ' + variable + ' }}', value)
299+
if "|lower" in line:
300+
line = line.replace('{{ ' + variable + '|lower' + ' }}', value.lower())
301+
else:
302+
line = line.replace('{{ ' + variable + ' }}', value)
162303
yaml_lines.append(line)
163304

164-
return saneyaml.load('\n'.join(yaml_lines))
305+
# Cleanup any remaining complex jinja template lines
306+
# as the yaml load fails otherwise for unresolved jinja
307+
cleaned_yaml_lines = [
308+
line
309+
for line in yaml_lines
310+
if not "{{" in line
311+
]
312+
313+
return saneyaml.load(''.join(cleaned_yaml_lines))
165314

166315

167316
def get_variables(location):

src/packagedcode/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,16 @@ def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False):
11001100
"""
11011101
if filetype.is_file(location) or _bare_filename:
11021102
loc = as_posixpath(location)
1103-
if any(fnmatchcase(loc, pat) for pat in cls.path_patterns):
1103+
1104+
# Some extension strings are used interchangebly
1105+
extension_aliases = {"yaml": "yml"}
1106+
path_patterns = list(cls.path_patterns)
1107+
for pattern in cls.path_patterns:
1108+
for extension, extension_alias in extension_aliases.items():
1109+
new_pattern = pattern.replace(extension, extension_alias)
1110+
path_patterns.append(new_pattern)
1111+
1112+
if any(fnmatchcase(loc, pat) for pat in path_patterns):
11041113
filetypes = filetypes or cls.filetypes
11051114
if not filetypes:
11061115
return True
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# from https://raw.githubusercontent.com/bayer-science-for-a-better-life/phc-gnn/refs/heads/master/environment_gpu.yml
2+
3+
name: phc-gnn
4+
5+
channels:
6+
- anaconda
7+
- pytorch
8+
- conda-forge
9+
- defaults
10+
11+
dependencies:
12+
- pip=20.2.4
13+
- anaconda::python=3.8.5
14+
- anaconda::python-dateutil=2.8.1
15+
- cudatoolkit=10.1
16+
- magma-cuda101
17+
- cudnn=7.6.5
18+
- pytorch=1.7.1
19+
- torchvision=0.8.2
20+
- torchaudio=0.7.2
21+
- conda-forge::numpy=1.19.2
22+
- anaconda::scipy=1.5.2
23+
- conda-forge::matplotlib=3.3.2
24+
- anaconda::networkx=2.5
25+
- anaconda::scikit-learn=0.23.2
26+
- anaconda::notebook=6.1.4
27+
- anaconda::jupyter_client=6.1.7
28+
- anaconda::jupyter_core=4.6.3
29+
- anaconda::h5py=2.10.0
30+
- conda-forge::tqdm=4.50.0
31+
- conda-forge::tensorboard=2.4.0
32+
- pip:
33+
- ogb==1.2.4
34+
- pytest==6.2.1
35+
- bottleneck==1.3.2
36+
# - torch-cluster==1.5.8
37+
# - torch-scatter==2.0.5
38+
# - torch-sparse==0.6.8
39+
# - torch-spline-conv==1.2.0
40+
# - torch-geometric==1.6.1

0 commit comments

Comments
 (0)