Skip to content

Commit d2c4e0a

Browse files
Improve conda packages and dependencies parsing
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 93ca65c commit d2c4e0a

File tree

16 files changed

+2913
-59
lines changed

16 files changed

+2913
-59
lines changed

src/packagedcode/conda.py

Lines changed: 166 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
from packagedcode import models
1616
from packagedcode.pypi import BaseDependencyFileHandler
17+
from dparse2.parser import parse_requirement_line
1718

1819
"""
1920
Handle Conda manifests and metadata, see https://docs.conda.io/en/latest/
@@ -29,12 +30,33 @@
2930
class CondaYamlHandler(BaseDependencyFileHandler):
3031
# TODO: there are several other manifests worth adding
3132
datasource_id = 'conda_yaml'
32-
path_patterns = ('*conda.yaml', '*conda.yml',)
33-
default_package_type = 'pypi'
33+
path_patterns = ('*conda*.yaml', '*env*.yaml', '*environment*.yaml')
34+
default_package_type = 'conda'
3435
default_primary_language = 'Python'
3536
description = 'Conda yaml manifest'
3637
documentation_url = 'https://docs.conda.io/'
3738

39+
@classmethod
40+
def parse(cls, location, package_only=False):
41+
with open(location) as fi:
42+
conda_data = saneyaml.load(fi.read())
43+
dependencies = get_conda_yaml_dependencies(conda_data=conda_data)
44+
name = conda_data.get('name')
45+
extra_data = {}
46+
channels = conda_data.get('channels')
47+
if channels:
48+
extra_data['channels'] = channels
49+
package_data = dict(
50+
datasource_id=cls.datasource_id,
51+
type=cls.default_package_type,
52+
name=name,
53+
primary_language=cls.default_primary_language,
54+
dependencies=dependencies,
55+
extra_data=extra_data,
56+
is_private=True,
57+
)
58+
yield models.PackageData.from_data(package_data, package_only)
59+
3860

3961
class CondaMetaYamlHandler(models.DatafileHandler):
4062
datasource_id = 'conda_meta_yaml'
@@ -83,9 +105,7 @@ def parse(cls, location, package_only=False):
83105
metayaml = get_meta_yaml_data(location)
84106
package_element = metayaml.get('package') or {}
85107
package_name = package_element.get('name')
86-
if not package_name:
87-
return
88-
version = package_element.get('version')
108+
package_version = package_element.get('version')
89109

90110
# FIXME: source is source, not download
91111
source = metayaml.get('source') or {}
@@ -99,41 +119,165 @@ def parse(cls, location, package_only=False):
99119
vcs_url = about.get('dev_url')
100120

101121
dependencies = []
122+
extra_data = {}
102123
requirements = metayaml.get('requirements') or {}
103124
for scope, reqs in requirements.items():
125+
if scope == "build":
126+
extra_data[scope] = reqs
127+
continue
128+
104129
# requirements format is like:
105130
# (u'run', [u'mccortex ==1.0', u'nextflow ==19.01.0', u'cortexpy
106131
# ==0.45.7', u'kallisto ==0.44.0', u'bwa', u'pandas',
107132
# u'progressbar2', u'python >=3.6'])])
108133
for req in reqs:
109134
name, _, requirement = req.partition(" ")
110-
purl = PackageURL(type=cls.default_package_type, name=name)
135+
version = None
136+
if requirement.startswith("=="):
137+
_, version = requirement.split("==")
138+
139+
# requirements may have namespace, version too
140+
# - conda-forge::numpy=1.15.4
141+
namespace = None
142+
if "::" in name:
143+
namespace, name = name.split("::")
144+
145+
is_pinned = False
146+
if "=" in name:
147+
name, version = name.split("=")
148+
is_pinned = True
149+
requirement = f"={version}"
150+
151+
if name in ('pip', 'python'):
152+
if not scope in extra_data:
153+
extra_data[scope] = [req]
154+
else:
155+
extra_data[scope].append(req)
156+
continue
157+
158+
purl = PackageURL(
159+
type=cls.default_package_type,
160+
name=name,
161+
namespace=namespace,
162+
version=version,
163+
)
164+
if "run" in scope:
165+
is_runtime = True
166+
is_optional = False
167+
else:
168+
is_runtime = False
169+
is_optional = True
170+
111171
dependencies.append(
112172
models.DependentPackage(
113173
purl=purl.to_string(),
114174
extracted_requirement=requirement,
115175
scope=scope,
116-
is_runtime=True,
117-
is_optional=False,
176+
is_runtime=is_runtime,
177+
is_optional=is_optional,
178+
is_pinned=is_pinned,
179+
is_direct=True,
118180
)
119181
)
120182

121183
package_data = dict(
122184
datasource_id=cls.datasource_id,
123185
type=cls.default_package_type,
124186
name=package_name,
125-
version=version,
187+
version=package_version,
126188
download_url=download_url,
127189
homepage_url=homepage_url,
128190
vcs_url=vcs_url,
129191
description=description,
130192
sha256=sha256,
131193
extracted_license_statement=extracted_license_statement,
132194
dependencies=dependencies,
195+
extra_data=extra_data,
133196
)
134197
yield models.PackageData.from_data(package_data, package_only)
135198

136199

200+
def get_conda_yaml_dependencies(conda_data):
201+
"""
202+
Return a list of DependentPackage mappins from conda and pypi
203+
dependencies present in a `conda_data` mapping.
204+
"""
205+
dependencies = conda_data.get('dependencies') or []
206+
deps = []
207+
for dep in dependencies:
208+
if isinstance(dep, str):
209+
namespace = None
210+
specs = None
211+
is_pinned = False
212+
213+
if "::" in dep:
214+
namespace, dep = dep.split("::")
215+
216+
req = parse_requirement_line(dep)
217+
if req:
218+
name = req.name
219+
version = None
220+
221+
specs = str(req.specs)
222+
if '==' in specs:
223+
version = specs.replace('==','')
224+
is_pinned = True
225+
purl = PackageURL(type='pypi', name=name, version=version)
226+
else:
227+
if "=" in dep:
228+
dep, version = dep.split("=")
229+
is_pinned = True
230+
specs = f"={version}"
231+
232+
purl = PackageURL(
233+
type='conda',
234+
namespace=namespace,
235+
name=dep,
236+
version=version,
237+
)
238+
239+
if purl.name in ('pip', 'python'):
240+
continue
241+
242+
deps.append(
243+
models.DependentPackage(
244+
purl=purl.to_string(),
245+
extracted_requirement=specs,
246+
scope='dependencies',
247+
is_runtime=True,
248+
is_optional=False,
249+
is_pinned=is_pinned,
250+
is_direct=True,
251+
).to_dict()
252+
)
253+
254+
elif isinstance(dep, dict):
255+
for line in dep.get('pip', []):
256+
req = parse_requirement_line(line)
257+
if req:
258+
name = req.name
259+
version = None
260+
is_pinned = False
261+
specs = str(req.specs)
262+
if '==' in specs:
263+
version = specs.replace('==','')
264+
is_pinned = True
265+
purl = PackageURL(type='pypi', name=name, version=version)
266+
deps.append(
267+
models.DependentPackage(
268+
purl=purl.to_string(),
269+
extracted_requirement=specs,
270+
scope='dependencies',
271+
is_runtime=True,
272+
is_optional=False,
273+
is_pinned=is_pinned,
274+
is_direct=True,
275+
).to_dict()
276+
)
277+
278+
return deps
279+
280+
137281
def get_meta_yaml_data(location):
138282
"""
139283
Return a mapping of conda metadata loaded from a meta.yaml files. The format
@@ -158,10 +302,21 @@ def get_meta_yaml_data(location):
158302
# Replace the variable with the value
159303
if '{{' in line and '}}' in line:
160304
for variable, value in variables.items():
161-
line = line.replace('{{ ' + variable + ' }}', value)
305+
if "|lower" in line:
306+
line = line.replace('{{ ' + variable + '|lower' + ' }}', value.lower())
307+
else:
308+
line = line.replace('{{ ' + variable + ' }}', value)
162309
yaml_lines.append(line)
163310

164-
return saneyaml.load('\n'.join(yaml_lines))
311+
# Cleanup any remaining complex jinja template lines
312+
# as the yaml load fails otherwise for unresolved jinja
313+
cleaned_yaml_lines = [
314+
line
315+
for line in yaml_lines
316+
if not "{{" in line
317+
]
318+
319+
return saneyaml.load(''.join(cleaned_yaml_lines))
165320

166321

167322
def get_variables(location):

src/packagedcode/models.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1100,7 +1100,16 @@ def is_datafile(cls, location, filetypes=tuple(), _bare_filename=False):
11001100
"""
11011101
if filetype.is_file(location) or _bare_filename:
11021102
loc = as_posixpath(location)
1103-
if any(fnmatchcase(loc, pat) for pat in cls.path_patterns):
1103+
1104+
# Some extension strings are used interchangebly
1105+
extension_aliases = {"yaml": "yml"}
1106+
path_patterns = list(cls.path_patterns)
1107+
for pattern in cls.path_patterns:
1108+
for extension, extension_alias in extension_aliases.items():
1109+
new_pattern = pattern.replace(extension, extension_alias)
1110+
path_patterns.append(new_pattern)
1111+
1112+
if any(fnmatchcase(loc, pat) for pat in path_patterns):
11041113
filetypes = filetypes or cls.filetypes
11051114
if not filetypes:
11061115
return True
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# from https://raw.githubusercontent.com/bayer-science-for-a-better-life/phc-gnn/refs/heads/master/environment_gpu.yml
2+
3+
name: phc-gnn
4+
5+
channels:
6+
- anaconda
7+
- pytorch
8+
- conda-forge
9+
- defaults
10+
11+
dependencies:
12+
- pip=20.2.4
13+
- anaconda::python=3.8.5
14+
- anaconda::python-dateutil=2.8.1
15+
- cudatoolkit=10.1
16+
- magma-cuda101
17+
- cudnn=7.6.5
18+
- pytorch=1.7.1
19+
- torchvision=0.8.2
20+
- torchaudio=0.7.2
21+
- conda-forge::numpy=1.19.2
22+
- anaconda::scipy=1.5.2
23+
- conda-forge::matplotlib=3.3.2
24+
- anaconda::networkx=2.5
25+
- anaconda::scikit-learn=0.23.2
26+
- anaconda::notebook=6.1.4
27+
- anaconda::jupyter_client=6.1.7
28+
- anaconda::jupyter_core=4.6.3
29+
- anaconda::h5py=2.10.0
30+
- conda-forge::tqdm=4.50.0
31+
- conda-forge::tensorboard=2.4.0
32+
- pip:
33+
- ogb==1.2.4
34+
- pytest==6.2.1
35+
- bottleneck==1.3.2
36+
# - torch-cluster==1.5.8
37+
# - torch-scatter==2.0.5
38+
# - torch-sparse==0.6.8
39+
# - torch-spline-conv==1.2.0
40+
# - torch-geometric==1.6.1

0 commit comments

Comments
 (0)