diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py index 3720a283d9..43e8d5d822 100644 --- a/nf_core/components/nfcore_component.py +++ b/nf_core/components/nfcore_component.py @@ -7,6 +7,8 @@ from pathlib import Path from typing import Any +from reftrace import Module + log = logging.getLogger(__name__) @@ -179,12 +181,11 @@ def _get_included_components_in_chained_tests(self, main_nf_test: Path | str): return included_components def _get_process_name(self): - with open(self.main_nf) as fh: - for line in fh: - if re.search(r"^\s*process\s*\w*\s*{", line): - return re.search(r"^\s*process\s*(\w*)\s*{.*", line).group(1) or "" - return "" - + try: + return Module.from_file(str(self.main_nf)).processes[0].name + except IndexError: + return "" + def get_inputs_from_main_nf(self) -> None: """Collect all inputs from the main.nf file.""" inputs: Any = [] # Can be 'list[list[dict[str, dict[str, str]]]]' or 'list[str]' diff --git a/nf_core/modules/lint/main_nf.py b/nf_core/modules/lint/main_nf.py index 60bda6590a..8e5b923f7b 100644 --- a/nf_core/modules/lint/main_nf.py +++ b/nf_core/modules/lint/main_nf.py @@ -10,6 +10,7 @@ import requests import yaml +from reftrace import Module, ParseError from rich.progress import Progress import nf_core @@ -308,8 +309,13 @@ def check_process_section(self, lines, registry, fix_version, progress_bar): else: self.failed.append(("main_nf", "process_capitals", "Process name is not in capital letters", self.main_nf)) - # Check that process labels are correct - check_process_labels(self, lines) + reftrace_mod = Module.from_file(str(self.main_nf)) + if not isinstance(reftrace_mod, ParseError): + check_process_labels(self, reftrace_mod) + else: + self.failed.append( + ("process_standard_label", f"Failed to parse module with Reftrace: {reftrace_mod.error}", self.main_nf) + ) # Deprecated enable_conda for i, raw_line in enumerate(lines): @@ -524,7 +530,17 @@ def check_process_section(self, lines, registry, fix_version, progress_bar): return docker_tag == singularity_tag -def check_process_labels(self, lines): +def check_process_labels(self, mod:Module): + """ + Check process labels using Reftrace parsing. + + This function validates that process labels conform to nf-core standards using + structured parsing via the Reftrace library. + + Args: + self: ModuleLint object with passed/warned/failed lists and main_nf path + mod: Reftrace Module object containing parsed Nextflow processes + """ correct_process_labels = [ "process_single", "process_low", @@ -533,14 +549,30 @@ def check_process_labels(self, lines): "process_long", "process_high_memory", ] - all_labels = [line.strip() for line in lines if line.lstrip().startswith("label ")] + + # Defensive checks for Reftrace module structure + if not mod.processes: + self.warned.append(("process_standard_label", "No processes found in module", self.main_nf)) + return + + process = mod.processes[0] + if not process.labels: + self.warned.append(("process_standard_label", "No label found for process", self.main_nf)) + return + + # Extract label values from all label directives + all_labels = [] + for label_directive in process.labels: + # Get the label value from Reftrace Label objects + if hasattr(label_directive, "value") and label_directive.value: + label_value = label_directive.value + all_labels.append(label_value) bad_labels = [] good_labels = [] + invalid_labels_count = 0 if len(all_labels) > 0: for label in all_labels: - try: - label = re.match(r"^label\s+'?\"?([a-zA-Z0-9_-]+)'?\"?$", label).group(1) - except AttributeError: + if not label.replace("_", "").isalnum(): self.warned.append( ( "main_nf", @@ -549,11 +581,13 @@ def check_process_labels(self, lines): self.main_nf, ) ) + invalid_labels_count += 1 continue if label not in correct_process_labels: bad_labels.append(label) else: good_labels.append(label) + if len(good_labels) > 1: self.warned.append( ( diff --git a/requirements.txt b/requirements.txt index 6e1102d936..4af22b2294 100644 --- a/requirements.txt +++ b/requirements.txt @@ -24,3 +24,4 @@ textual==6.2.1 trogon pdiff ruamel.yaml +reftrace \ No newline at end of file diff --git a/tests/modules/lint/test_lint_utils.py b/tests/modules/lint/test_lint_utils.py index 006bec978a..3cab90d33b 100644 --- a/tests/modules/lint/test_lint_utils.py +++ b/tests/modules/lint/test_lint_utils.py @@ -1,3 +1,6 @@ +import os +import tempfile + import nf_core.modules.lint from ...test_modules import TestModules @@ -12,7 +15,32 @@ def __init__(self): self.warned = [] self.failed = [] - self.main_nf = "main_nf" + # Create a temporary file with basic Nextflow process structure + # that Reftrace can parse + self._temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".nf", delete=False) + basic_process = """process TEST_PROCESS { + label 'process_high' + + input: + path input_file + + output: + path "output.txt" + + script: + ''' + echo "test" > output.txt + ''' +} +""" + self._temp_file.write(basic_process) + self._temp_file.close() + self.main_nf = self._temp_file.name + + def cleanup(self): + """Clean up the temporary file""" + if hasattr(self, "_temp_file") and os.path.exists(self._temp_file.name): + os.unlink(self._temp_file.name) class TestModulesLint(TestModules): @@ -32,4 +60,4 @@ def test_mock_module_lint(self): assert isinstance(mock_lint.passed, list) assert isinstance(mock_lint.warned, list) assert isinstance(mock_lint.failed, list) - assert mock_lint.main_nf == "main_nf" + assert mock_lint.main_nf == mock_lint._temp_file.name diff --git a/tests/modules/lint/test_main_nf.py b/tests/modules/lint/test_main_nf.py index 227a32511c..da0569b95b 100644 --- a/tests/modules/lint/test_main_nf.py +++ b/tests/modules/lint/test_main_nf.py @@ -1,4 +1,7 @@ +from pathlib import Path + import pytest +from reftrace import Module, ParseError import nf_core.modules.lint import nf_core.modules.patch @@ -7,36 +10,103 @@ from ...test_modules import TestModules from .test_lint_utils import MockModuleLint +# @pytest.mark.parametrize( +# "content,passed,warned,failed", +# [ +# # Valid process label +# ("label 'process_high'\ncpus 12", 1, 0, 0), +# # Non-alphanumeric characters in label +# ("label 'a:label:with:colons'\ncpus 12", 0, 2, 0), +# # Conflicting labels +# ("label 'process_high'\nlabel 'process_low'\ncpus 12", 0, 1, 0), +# # Duplicate labels +# ("label 'process_high'\nlabel 'process_high'\ncpus 12", 0, 2, 0), +# # Valid and non-standard labels +# ("label 'process_high'\nlabel 'process_extra_label'\ncpus 12", 1, 1, 0), +# # Non-standard label only +# ("label 'process_extra_label'\ncpus 12", 0, 2, 0), +# # Non-standard duplicates without quotes +# ("label process_extra_label\nlabel process_extra_label\ncpus 12", 0, 3, 0), +# # No label found +# ("cpus 12", 0, 1, 0), +# ], +# ) + @pytest.mark.parametrize( - "content,passed,warned,failed", + "label_content,passed,warned,failed", [ # Valid process label - ("label 'process_high'\ncpus 12", 1, 0, 0), + ("label 'process_high'", 1, 0, 0), # Non-alphanumeric characters in label - ("label 'a:label:with:colons'\ncpus 12", 0, 2, 0), - # Conflicting labels - ("label 'process_high'\nlabel 'process_low'\ncpus 12", 0, 1, 0), + ("label 'a:label:with:colons'", 0, 2, 0), + # Conflicting labels (multiple label lines) + ("label 'process_low'\nlabel 'process_high'", 0, 1, 0), # Duplicate labels - ("label 'process_high'\nlabel 'process_high'\ncpus 12", 0, 2, 0), + ("label 'process_high'\nlabel 'process_high'", 0, 2, 0), # Valid and non-standard labels - ("label 'process_high'\nlabel 'process_extra_label'\ncpus 12", 1, 1, 0), + ("label 'process_high'\nlabel 'process_extra_label'", 1, 1, 0), # Non-standard label only - ("label 'process_extra_label'\ncpus 12", 0, 2, 0), - # Non-standard duplicates without quotes - ("label process_extra_label\nlabel process_extra_label\ncpus 12", 0, 3, 0), + ("label 'process_extra_label'", 0, 2, 0), + # Duplicate non-standard labels + ("label 'process_extra_label'\nlabel 'process_extra_label'", 0, 3, 0), # No label found - ("cpus 12", 0, 1, 0), + ("cpus 2", 0, 1, 0), ], ) -def test_process_labels(content, passed, warned, failed): +def test_process_labels(label_content, passed, warned, failed): """Test process label validation""" - mock_lint = MockModuleLint() - check_process_labels(mock_lint, content.splitlines()) + # Create a temporary file with the specific label content + import tempfile + + # Create proper Nextflow content with the label + process_content = f"""process TEST_PROCESS {{ + {label_content} + + input: + path input_file + + output: + path "output.txt" + + script: + ''' + echo "test" > output.txt + ''' +}} +""" + + temp_file = tempfile.NamedTemporaryFile(mode="w", suffix=".nf", delete=False) + temp_file.write(process_content) + temp_file.close() + + try: + # Create MockModuleLint but override with our specific test file + mock_lint = MockModuleLint() + mock_lint.cleanup() # Clean up the default temp file + mock_lint.main_nf = temp_file.name + + # Parse with Reftrace + module = Module.from_file(temp_file.name) + assert not isinstance(module, ParseError), f"Failed to parse test file: {module}" + # Run the check_process_labels function + check_process_labels(mock_lint, module) + + # Verify results + assert len(mock_lint.passed) == passed, ( + f"Expected {passed} passed tests, got {len(mock_lint.passed)}: {mock_lint.passed}" + ) + assert len(mock_lint.warned) == warned, ( + f"Expected {warned} warned tests, got {len(mock_lint.warned)}: {mock_lint.warned}" + ) + assert len(mock_lint.failed) == failed, ( + f"Expected {failed} failed tests, got {len(mock_lint.failed)}: {mock_lint.failed}" + ) - assert len(mock_lint.passed) == passed - assert len(mock_lint.warned) == warned - assert len(mock_lint.failed) == failed + finally: + # Clean up the temporary file + if Path(temp_file.name).exists(): + Path(temp_file.name).unlink() @pytest.mark.parametrize(