22import logging
33from pathlib import Path
44
5- import yaml
5+ import ruamel . yaml
66from jsonschema import exceptions , validators
77
8- from nf_core .components .lint import ComponentLint , LintExceptionError
8+ from nf_core .components .lint import ComponentLint , LintExceptionError , LintResult
99from nf_core .components .nfcore_component import NFCoreComponent
10- from nf_core .utils import custom_yaml_dumper
1110
1211log = logging .getLogger (__name__ )
1312
13+ # Configure ruamel.yaml for proper formatting
14+ yaml = ruamel .yaml .YAML ()
15+ yaml .indent (mapping = 2 , sequence = 2 , offset = 2 )
16+
1417
1518def environment_yml (module_lint_object : ComponentLint , module : NFCoreComponent , allow_missing : bool = False ) -> None :
1619 """
@@ -21,6 +24,15 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
2124 is sorted alphabetically.
2225 """
2326 env_yml = None
27+ has_schema_header = False
28+ lines = []
29+
30+ # Define the schema lines to be added if missing
31+ schema_lines = [
32+ "---\n " ,
33+ "# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json\n " ,
34+ ]
35+
2436 # load the environment.yml file
2537 if module .environment_yml is None :
2638 if allow_missing :
@@ -34,8 +46,23 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
3446 return
3547 raise LintExceptionError ("Module does not have an `environment.yml` file" )
3648 try :
49+ # Read the entire file content to handle headers properly
3750 with open (module .environment_yml ) as fh :
38- env_yml = yaml .safe_load (fh )
51+ lines = fh .readlines ()
52+
53+ # Check if the first two lines contain schema configuration
54+ content_start = 0
55+
56+ if len (lines ) >= 2 and lines [0 ] == "---\n " and lines [1 ].startswith ("# yaml-language-server: $schema=" ):
57+ has_schema_header = True
58+ content_start = 2
59+
60+ content = "" .join (lines [content_start :]) # Skip schema lines when reading content
61+
62+ # Parse the YAML content
63+ env_yml = yaml .load (content )
64+ if env_yml is None :
65+ raise ruamel .yaml .scanner .ScannerError ("Empty YAML file" )
3966
4067 module .passed .append (("environment_yml_exists" , "Module's `environment.yml` exists" , module .environment_yml ))
4168
@@ -82,41 +109,91 @@ def environment_yml(module_lint_object: ComponentLint, module: NFCoreComponent,
82109 )
83110
84111 if valid_env_yml :
85- # Check that the dependencies section is sorted alphabetically
86- def sort_recursively (obj ):
87- """Simple recursive sort for nested structures."""
88- if isinstance (obj , list ):
89-
90- def get_key (x ):
91- if isinstance (x , dict ):
92- # For dicts like {"pip": [...]}, use the key "pip"
93- return (list (x .keys ())[0 ], 1 )
94- else :
95- # For strings like "pip=23.3.1", use "pip" and for bioconda::samtools=1.15.1, use "bioconda::samtools"
96- return (str (x ).split ("=" )[0 ], 0 )
112+ # Sort dependencies if they exist
113+ if "dependencies" in env_yml :
114+ dicts = []
115+ others = []
97116
98- return sorted ([ sort_recursively ( item ) for item in obj ], key = get_key )
99- elif isinstance (obj , dict ):
100- return { k : sort_recursively ( v ) for k , v in obj . items ()}
101- else :
102- return obj
117+ for term in env_yml [ "dependencies" ]:
118+ if isinstance (term , dict ):
119+ dicts . append ( term )
120+ else :
121+ others . append ( term )
103122
104- sorted_dependencies = sort_recursively (env_yml ["dependencies" ])
123+ # Sort non-dict dependencies with special handling for pip
124+ def sort_key (x ):
125+ # Convert to string for comparison
126+ str_x = str (x )
127+ # If it's a pip package (but not pip itself), put it after other conda packages
128+ if str_x .startswith ("pip=" ) or str_x == "pip" :
129+ return (1 , str_x ) # pip comes after other conda packages
130+ else :
131+ return (0 , str_x ) # regular conda packages come first
105132
106- # Direct comparison of sorted vs original dependencies
107- if sorted_dependencies == env_yml ["dependencies" ]:
108- module .passed .append (
109- (
133+ others .sort (key = sort_key )
134+
135+ # Sort any lists within dict dependencies
136+ for dict_term in dicts :
137+ for value in dict_term .values ():
138+ if isinstance (value , list ):
139+ value .sort (key = str )
140+
141+ # Sort dict dependencies alphabetically
142+ dicts .sort (key = str )
143+
144+ # Combine sorted dependencies
145+ sorted_deps = others + dicts
146+
147+ # Check if dependencies are already sorted
148+ is_sorted = env_yml ["dependencies" ] == sorted_deps and all (
149+ not isinstance (term , dict )
150+ or all (not isinstance (value , list ) or value == sorted (value , key = str ) for value in term .values ())
151+ for term in env_yml ["dependencies" ]
152+ )
153+ else :
154+ sorted_deps = None
155+ is_sorted = True
156+
157+ if is_sorted :
158+ module_lint_object .passed .append (
159+ LintResult (
160+ module ,
110161 "environment_yml_sorted" ,
111- "The dependencies in the module's `environment.yml` are sorted alphabetically " ,
162+ "The dependencies in the module's `environment.yml` are sorted correctly " ,
112163 module .environment_yml ,
113164 )
114165 )
115166 else :
116- # sort it and write it back to the file
117167 log .info (
118- f"Dependencies in { module .component_name } 's environment.yml were not sorted alphabetically . Sorting them now."
168+ f"Dependencies in { module .component_name } 's environment.yml were not sorted. Sorting them now."
119169 )
120- env_yml ["dependencies" ] = sorted_dependencies
170+
171+ # Update dependencies if they need sorting
172+ if sorted_deps is not None :
173+ env_yml ["dependencies" ] = sorted_deps
174+
175+ # Write back to file with headers
121176 with open (Path (module .component_dir , "environment.yml" ), "w" ) as fh :
122- yaml .dump (env_yml , fh , Dumper = custom_yaml_dumper ())
177+ # If file had a schema header, check if it's pointing to a different URL
178+ if has_schema_header and len (lines ) >= 2 :
179+ existing_schema_line = lines [1 ]
180+ # If the existing schema URL is different, update it
181+ if not existing_schema_line .endswith ("/modules/master/modules/environment-schema.json\n " ):
182+ fh .writelines (schema_lines )
183+ else :
184+ # Keep the existing schema lines
185+ fh .writelines (lines [:2 ])
186+ else :
187+ # No schema header present, add the default one
188+ fh .writelines (schema_lines )
189+ # Then dump the sorted YAML
190+ yaml .dump (env_yml , fh )
191+
192+ module_lint_object .passed .append (
193+ LintResult (
194+ module ,
195+ "environment_yml_sorted" ,
196+ "The dependencies in the module's `environment.yml` have been sorted" ,
197+ module .environment_yml ,
198+ )
199+ )
0 commit comments