Skip to content

Commit 4428112

Browse files
Improve conda packages and dependencies parsing
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 93ca65c commit 4428112

File tree

17 files changed

+2871
-63
lines changed

17 files changed

+2871
-63
lines changed

src/packagedcode/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@
7373
cocoapods.PodfileLockHandler,
7474
cocoapods.PodfileHandler,
7575

76-
conda.CondaYamlHandler,
7776
conda.CondaMetaYamlHandler,
77+
conda.CondaYamlHandler,
7878

7979
conan.ConanFileHandler,
8080
conan.ConanDataHandler,

src/packagedcode/conda.py

Lines changed: 167 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from packagedcode import models
1616
from packagedcode.pypi import BaseDependencyFileHandler
17+
from dparse2.parser import parse_requirement_line
1718

1819
"""
1920
Handle Conda manifests and metadata, see https://docs.conda.io/en/latest/
@@ -23,18 +24,37 @@
2324
"""
2425

2526
# TODO: there are likely other package data files for Conda
26-
# TODO: report platform
27-
2827

2928
class CondaYamlHandler(BaseDependencyFileHandler):
30-
# TODO: there are several other manifests worth adding
3129
datasource_id = 'conda_yaml'
32-
path_patterns = ('*conda.yaml', '*conda.yml',)
33-
default_package_type = 'pypi'
30+
path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
31+
default_package_type = 'conda'
3432
default_primary_language = 'Python'
3533
description = 'Conda yaml manifest'
3634
documentation_url = 'https://docs.conda.io/'
3735

36+
@classmethod
37+
def parse(cls, location, package_only=False):
38+
with open(location) as fi:
39+
conda_data = saneyaml.load(fi.read())
40+
dependencies = get_conda_yaml_dependencies(conda_data=conda_data)
41+
name = conda_data.get('name')
42+
extra_data = {}
43+
channels = conda_data.get('channels')
44+
if channels:
45+
extra_data['channels'] = channels
46+
if name or dependencies:
47+
package_data = dict(
48+
datasource_id=cls.datasource_id,
49+
type=cls.default_package_type,
50+
name=name,
51+
primary_language=cls.default_primary_language,
52+
dependencies=dependencies,
53+
extra_data=extra_data,
54+
is_private=True,
55+
)
56+
yield models.PackageData.from_data(package_data, package_only)
57+
3858

3959
class CondaMetaYamlHandler(models.DatafileHandler):
4060
datasource_id = 'conda_meta_yaml'
@@ -83,9 +103,7 @@ def parse(cls, location, package_only=False):
83103
metayaml = get_meta_yaml_data(location)
84104
package_element = metayaml.get('package') or {}
85105
package_name = package_element.get('name')
86-
if not package_name:
87-
return
88-
version = package_element.get('version')
106+
package_version = package_element.get('version')
89107

90108
# FIXME: source is source, not download
91109
source = metayaml.get('source') or {}
@@ -99,41 +117,165 @@ def parse(cls, location, package_only=False):
99117
vcs_url = about.get('dev_url')
100118

101119
dependencies = []
120+
extra_data = {}
102121
requirements = metayaml.get('requirements') or {}
103122
for scope, reqs in requirements.items():
123+
if scope == "build":
124+
extra_data[scope] = reqs
125+
continue
126+
104127
# requirements format is like:
105128
# (u'run', [u'mccortex ==1.0', u'nextflow ==19.01.0', u'cortexpy
106129
# ==0.45.7', u'kallisto ==0.44.0', u'bwa', u'pandas',
107130
# u'progressbar2', u'python >=3.6'])])
108131
for req in reqs:
109132
name, _, requirement = req.partition(" ")
110-
purl = PackageURL(type=cls.default_package_type, name=name)
133+
version = None
134+
if requirement.startswith("=="):
135+
_, version = requirement.split("==")
136+
137+
# requirements may have namespace, version too
138+
# - conda-forge::numpy=1.15.4
139+
namespace = None
140+
if "::" in name:
141+
namespace, name = name.split("::")
142+
143+
is_pinned = False
144+
if "=" in name:
145+
name, version = name.split("=")
146+
is_pinned = True
147+
requirement = f"={version}"
148+
149+
if name in ('pip', 'python'):
150+
if not scope in extra_data:
151+
extra_data[scope] = [req]
152+
else:
153+
extra_data[scope].append(req)
154+
continue
155+
156+
purl = PackageURL(
157+
type=cls.default_package_type,
158+
name=name,
159+
namespace=namespace,
160+
version=version,
161+
)
162+
if "run" in scope:
163+
is_runtime = True
164+
is_optional = False
165+
else:
166+
is_runtime = False
167+
is_optional = True
168+
111169
dependencies.append(
112170
models.DependentPackage(
113171
purl=purl.to_string(),
114172
extracted_requirement=requirement,
115173
scope=scope,
116-
is_runtime=True,
117-
is_optional=False,
174+
is_runtime=is_runtime,
175+
is_optional=is_optional,
176+
is_pinned=is_pinned,
177+
is_direct=True,
118178
)
119179
)
120180

121181
package_data = dict(
122182
datasource_id=cls.datasource_id,
123183
type=cls.default_package_type,
124184
name=package_name,
125-
version=version,
185+
version=package_version,
126186
download_url=download_url,
127187
homepage_url=homepage_url,
128188
vcs_url=vcs_url,
129189
description=description,
130190
sha256=sha256,
131191
extracted_license_statement=extracted_license_statement,
132192
dependencies=dependencies,
193+
extra_data=extra_data,
133194
)
134195
yield models.PackageData.from_data(package_data, package_only)
135196

136197

198+
def get_conda_yaml_dependencies(conda_data):
199+
"""
200+
Return a list of DependentPackage mappins from conda and pypi
201+
dependencies present in a `conda_data` mapping.
202+
"""
203+
dependencies = conda_data.get('dependencies') or []
204+
deps = []
205+
for dep in dependencies:
206+
if isinstance(dep, str):
207+
namespace = None
208+
specs = None
209+
is_pinned = False
210+
211+
if "::" in dep:
212+
namespace, dep = dep.split("::")
213+
214+
req = parse_requirement_line(dep)
215+
if req:
216+
name = req.name
217+
version = None
218+
219+
specs = str(req.specs)
220+
if '==' in specs:
221+
version = specs.replace('==','')
222+
is_pinned = True
223+
purl = PackageURL(type='pypi', name=name, version=version)
224+
else:
225+
if "=" in dep:
226+
dep, version = dep.split("=")
227+
is_pinned = True
228+
specs = f"={version}"
229+
230+
purl = PackageURL(
231+
type='conda',
232+
namespace=namespace,
233+
name=dep,
234+
version=version,
235+
)
236+
237+
if purl.name in ('pip', 'python'):
238+
continue
239+
240+
deps.append(
241+
models.DependentPackage(
242+
purl=purl.to_string(),
243+
extracted_requirement=specs,
244+
scope='dependencies',
245+
is_runtime=True,
246+
is_optional=False,
247+
is_pinned=is_pinned,
248+
is_direct=True,
249+
).to_dict()
250+
)
251+
252+
elif isinstance(dep, dict):
253+
for line in dep.get('pip', []):
254+
req = parse_requirement_line(line)
255+
if req:
256+
name = req.name
257+
version = None
258+
is_pinned = False
259+
specs = str(req.specs)
260+
if '==' in specs:
261+
version = specs.replace('==','')
262+
is_pinned = True
263+
purl = PackageURL(type='pypi', name=name, version=version)
264+
deps.append(
265+
models.DependentPackage(
266+
purl=purl.to_string(),
267+
extracted_requirement=specs,
268+
scope='dependencies',
269+
is_runtime=True,
270+
is_optional=False,
271+
is_pinned=is_pinned,
272+
is_direct=True,
273+
).to_dict()
274+
)
275+
276+
return deps
277+
278+
137279
def get_meta_yaml_data(location):
138280
"""
139281
Return a mapping of conda metadata loaded from a meta.yaml files. The format
@@ -158,10 +300,21 @@ def get_meta_yaml_data(location):
158300
# Replace the variable with the value
159301
if '{{' in line and '}}' in line:
160302
for variable, value in variables.items():
161-
line = line.replace('{{ ' + variable + ' }}', value)
303+
if "|lower" in line:
304+
line = line.replace('{{ ' + variable + '|lower' + ' }}', value.lower())
305+
else:
306+
line = line.replace('{{ ' + variable + ' }}', value)
162307
yaml_lines.append(line)
163308

164-
return saneyaml.load('\n'.join(yaml_lines))
309+
# Cleanup any remaining complex jinja template lines
310+
# as the yaml load fails otherwise for unresolved jinja
311+
cleaned_yaml_lines = [
312+
line
313+
for line in yaml_lines
314+
if not "{{" in line
315+
]
316+
317+
return saneyaml.load(''.join(cleaned_yaml_lines))
165318

166319

167320
def get_variables(location):

src/packagedcode/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,16 @@ def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False):
11001100
"""
11011101
if filetype.is_file(location) or _bare_filename:
11021102
loc = as_posixpath(location)
1103-
if any(fnmatchcase(loc, pat) for pat in cls.path_patterns):
1103+
1104+
# Some extension strings are used interchangebly
1105+
extension_aliases = {"yaml": "yml"}
1106+
path_patterns = list(cls.path_patterns)
1107+
for pattern in cls.path_patterns:
1108+
for extension, extension_alias in extension_aliases.items():
1109+
new_pattern = pattern.replace(extension, extension_alias)
1110+
path_patterns.append(new_pattern)
1111+
1112+
if any(fnmatchcase(loc, pat) for pat in path_patterns):
11041113
filetypes = filetypes or cls.filetypes
11051114
if not filetypes:
11061115
return True
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# from https://raw.githubusercontent.com/bayer-science-for-a-better-life/phc-gnn/refs/heads/master/environment_gpu.yml
2+
3+
name: phc-gnn
4+
5+
channels:
6+
- anaconda
7+
- pytorch
8+
- conda-forge
9+
- defaults
10+
11+
dependencies:
12+
- pip=20.2.4
13+
- anaconda::python=3.8.5
14+
- anaconda::python-dateutil=2.8.1
15+
- cudatoolkit=10.1
16+
- magma-cuda101
17+
- cudnn=7.6.5
18+
- pytorch=1.7.1
19+
- torchvision=0.8.2
20+
- torchaudio=0.7.2
21+
- conda-forge::numpy=1.19.2
22+
- anaconda::scipy=1.5.2
23+
- conda-forge::matplotlib=3.3.2
24+
- anaconda::networkx=2.5
25+
- anaconda::scikit-learn=0.23.2
26+
- anaconda::notebook=6.1.4
27+
- anaconda::jupyter_client=6.1.7
28+
- anaconda::jupyter_core=4.6.3
29+
- anaconda::h5py=2.10.0
30+
- conda-forge::tqdm=4.50.0
31+
- conda-forge::tensorboard=2.4.0
32+
- pip:
33+
- ogb==1.2.4
34+
- pytest==6.2.1
35+
- bottleneck==1.3.2
36+
# - torch-cluster==1.5.8
37+
# - torch-scatter==2.0.5
38+
# - torch-sparse==0.6.8
39+
# - torch-spline-conv==1.2.0
40+
# - torch-geometric==1.6.1

0 commit comments

Comments
 (0)