Skip to content

Commit 7433611

Browse files
authored
Merge pull request nf-core#3591 from nf-core/env-sorting
Update conda env sorting
2 parents 5081480 + 186ca11 commit 7433611

File tree

5 files changed

+437
-134
lines changed

5 files changed

+437
-134
lines changed

nf_core/modules/lint/environment_yml.py

Lines changed: 108 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,18 @@
22
import logging
33
from pathlib import Path
44

5-
import yaml
5+
import ruamel.yaml
66
from jsonschema import exceptions, validators
77

8-
from nf_core.components.lint import ComponentLint, LintExceptionError
8+
from nf_core.components.lint import ComponentLint, LintExceptionError, LintResult
99
from nf_core.components.nfcore_component import NFCoreComponent
10-
from nf_core.utils import custom_yaml_dumper
1110

1211
log = logging.getLogger(__name__)
1312

13+
# Configure ruamel.yaml for proper formatting
14+
yaml = ruamel.yaml.YAML()
15+
yaml.indent(mapping=2, sequence=2, offset=2)
16+
1417

1518
def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent, allow_missing: bool = False) -> None:
1619
"""
@@ -21,6 +24,15 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
2124
is sorted alphabetically.
2225
"""
2326
env_yml = None
27+
has_schema_header = False
28+
lines = []
29+
30+
# Define the schema lines to be added if missing
31+
schema_lines = [
32+
"---\n",
33+
"# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json\n",
34+
]
35+
2436
# load the environment.yml file
2537
if module.environment_yml is None:
2638
if allow_missing:
@@ -34,8 +46,23 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
3446
return
3547
raise LintExceptionError("Module does not have an `environment.yml` file")
3648
try:
49+
# Read the entire file content to handle headers properly
3750
with open(module.environment_yml) as fh:
38-
env_yml = yaml.safe_load(fh)
51+
lines = fh.readlines()
52+
53+
# Check if the first two lines contain schema configuration
54+
content_start = 0
55+
56+
if len(lines) >= 2 and lines[0] == "---\n" and lines[1].startswith("# yaml-language-server: $schema="):
57+
has_schema_header = True
58+
content_start = 2
59+
60+
content = "".join(lines[content_start:]) # Skip schema lines when reading content
61+
62+
# Parse the YAML content
63+
env_yml = yaml.load(content)
64+
if env_yml is None:
65+
raise ruamel.yaml.scanner.ScannerError("Empty YAML file")
3966

4067
module.passed.append(("environment_yml_exists", "Module's `environment.yml` exists", module.environment_yml))
4168

@@ -82,41 +109,91 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
82109
)
83110

84111
if valid_env_yml:
85-
# Check that the dependencies section is sorted alphabetically
86-
def sort_recursively(obj):
87-
"""Simple recursive sort for nested structures."""
88-
if isinstance(obj, list):
89-
90-
def get_key(x):
91-
if isinstance(x, dict):
92-
# For dicts like {"pip": [...]}, use the key "pip"
93-
return (list(x.keys())[0], 1)
94-
else:
95-
# For strings like "pip=23.3.1", use "pip" and for bioconda::samtools=1.15.1, use "bioconda::samtools"
96-
return (str(x).split("=")[0], 0)
112+
# Sort dependencies if they exist
113+
if "dependencies" in env_yml:
114+
dicts = []
115+
others = []
97116

98-
return sorted([sort_recursively(item) for item in obj], key=get_key)
99-
elif isinstance(obj, dict):
100-
return {k: sort_recursively(v) for k, v in obj.items()}
101-
else:
102-
return obj
117+
for term in env_yml["dependencies"]:
118+
if isinstance(term, dict):
119+
dicts.append(term)
120+
else:
121+
others.append(term)
103122

104-
sorted_dependencies = sort_recursively(env_yml["dependencies"])
123+
# Sort non-dict dependencies with special handling for pip
124+
def sort_key(x):
125+
# Convert to string for comparison
126+
str_x = str(x)
127+
# If it's a pip package (but not pip itself), put it after other conda packages
128+
if str_x.startswith("pip=") or str_x == "pip":
129+
return (1, str_x) # pip comes after other conda packages
130+
else:
131+
return (0, str_x) # regular conda packages come first
105132

106-
# Direct comparison of sorted vs original dependencies
107-
if sorted_dependencies == env_yml["dependencies"]:
108-
module.passed.append(
109-
(
133+
others.sort(key=sort_key)
134+
135+
# Sort any lists within dict dependencies
136+
for dict_term in dicts:
137+
for value in dict_term.values():
138+
if isinstance(value, list):
139+
value.sort(key=str)
140+
141+
# Sort dict dependencies alphabetically
142+
dicts.sort(key=str)
143+
144+
# Combine sorted dependencies
145+
sorted_deps = others + dicts
146+
147+
# Check if dependencies are already sorted
148+
is_sorted = env_yml["dependencies"] == sorted_deps and all(
149+
not isinstance(term, dict)
150+
or all(not isinstance(value, list) or value == sorted(value, key=str) for value in term.values())
151+
for term in env_yml["dependencies"]
152+
)
153+
else:
154+
sorted_deps = None
155+
is_sorted = True
156+
157+
if is_sorted:
158+
module_lint_object.passed.append(
159+
LintResult(
160+
module,
110161
"environment_yml_sorted",
111-
"The dependencies in the module's `environment.yml` are sorted alphabetically",
162+
"The dependencies in the module's `environment.yml` are sorted correctly",
112163
module.environment_yml,
113164
)
114165
)
115166
else:
116-
# sort it and write it back to the file
117167
log.info(
118-
f"Dependencies in {module.component_name}'s environment.yml were not sorted alphabetically. Sorting them now."
168+
f"Dependencies in {module.component_name}'s environment.yml were not sorted. Sorting them now."
119169
)
120-
env_yml["dependencies"] = sorted_dependencies
170+
171+
# Update dependencies if they need sorting
172+
if sorted_deps is not None:
173+
env_yml["dependencies"] = sorted_deps
174+
175+
# Write back to file with headers
121176
with open(Path(module.component_dir, "environment.yml"), "w") as fh:
122-
yaml.dump(env_yml, fh, Dumper=custom_yaml_dumper())
177+
# If file had a schema header, check if it's pointing to a different URL
178+
if has_schema_header and len(lines) >= 2:
179+
existing_schema_line = lines[1]
180+
# If the existing schema URL is different, update it
181+
if not existing_schema_line.endswith("/modules/master/modules/environment-schema.json\n"):
182+
fh.writelines(schema_lines)
183+
else:
184+
# Keep the existing schema lines
185+
fh.writelines(lines[:2])
186+
else:
187+
# No schema header present, add the default one
188+
fh.writelines(schema_lines)
189+
# Then dump the sorted YAML
190+
yaml.dump(env_yml, fh)
191+
192+
module_lint_object.passed.append(
193+
LintResult(
194+
module,
195+
"environment_yml_sorted",
196+
"The dependencies in the module's `environment.yml` have been sorted",
197+
module.environment_yml,
198+
)
199+
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ build-backend = "setuptools.build_meta"
33
requires = ["setuptools>=40.6.0", "wheel"]
44

55
[tool.pytest.ini_options]
6-
markers = ["datafiles: load datafiles"]
6+
markers = ["datafiles: load datafiles", "integration"]
77
testpaths = ["tests"]
88
python_files = ["test_*.py"]
99
asyncio_mode = "auto"

tests/modules/lint/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)