Skip to content

Commit da4cb3f

Browse files
authored
Merge pull request #3779 from nvnieuwk/feat/topics
Add `topics` to the template + update linting
2 parents 24d2916 + 5e39aa4 commit da4cb3f

File tree

11 files changed

+299
-118
lines changed

11 files changed

+299
-118
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
- ignore files in gitignore also for pipeline_if_empty_null lint test ([#3722](https://github.com/nf-core/tools/pull/3722))
5151
- do not check pytest_modules.yml file, deprecating ([#3748](https://github.com/nf-core/tools/pull/3748))
5252
- Use the org from the .nf-core.yml when linting manifest name and homePage. ([#3767](https://github.com/nf-core/tools/pull/3767))
53+
- Add `topics` to the template + update linting ([#3779](https://github.com/nf-core/tools/pull/3779))
5354
- Use the org from .nf-core.yml when linting multiqc_config report_comment ([#3800](https://github.com/nf-core/tools/pull/3800))
5455
- Linting of patched subworkflows ([#3755](https://github.com/nf-core/tools/pull/3755))
5556
- Add link to modules and subworkflows linting error docs ([#3818](https://github.com/nf-core/tools/pull/3818))

nf_core/components/create.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -528,21 +528,31 @@ def generate_meta_yml_file(self) -> None:
528528
with open(self.file_paths["meta.yml"]) as fh:
529529
meta_yml: ruamel.yaml.comments.CommentedMap = yaml.load(fh)
530530

531-
versions: dict[str, list[dict[str, dict]]] = {
531+
versions: dict[str, list | dict] = {
532+
f"versions_{self.component}": [
533+
[
534+
{"${task.process}": {"type": "string", "description": "The name of the process"}},
535+
{f"{self.component}": {"type": "string", "description": "The name of the tool"}},
536+
{
537+
f"{self.component} --version": {"type": "string", "description": "The version of the tool"},
538+
},
539+
]
540+
]
541+
}
542+
543+
versions_topic: dict[str, list | dict] = {
532544
"versions": [
533-
{
534-
"versions.yml": {
535-
"type": "file",
536-
"description": "File containing software versions",
537-
"pattern": "versions.yml",
538-
"ontologies": [
539-
ruamel.yaml.comments.CommentedMap({"edam": "http://edamontology.org/format_3750"})
540-
],
541-
}
542-
}
545+
[
546+
{"process": {"type": "string", "description": "The process the versions were collected from"}},
547+
{
548+
"tool": {"type": "string", "description": "The tool name the version was collected for"},
549+
},
550+
{
551+
"version": {"type": "string", "description": "The version of the tool"},
552+
},
553+
]
543554
]
544555
}
545-
versions["versions"][0]["versions.yml"]["ontologies"][0].yaml_add_eol_comment("YAML", "edam")
546556

547557
if self.not_empty_template:
548558
meta_yml.yaml_set_comment_before_after_key(
@@ -557,8 +567,11 @@ def generate_meta_yml_file(self) -> None:
557567
meta_yml["output"].yaml_set_start_comment(
558568
"### TODO nf-core: Add a description of all of the variables used as output", indent=2
559569
)
570+
meta_yml["topics"].yaml_set_start_comment(
571+
"### TODO nf-core: Add a description of all of the variables used as topics", indent=2
572+
)
560573

561-
if hasattr(self, "inputs"):
574+
if hasattr(self, "inputs") and len(self.inputs) > 0:
562575
inputs_array: list[dict | list[dict]] = []
563576
for i, (input_name, ontologies) in enumerate(self.inputs.items()):
564577
channel_entry: dict[str, dict] = {
@@ -607,7 +620,7 @@ def generate_meta_yml_file(self) -> None:
607620
meta_yml["input"][0]["bam"]["ontologies"][1].yaml_add_eol_comment("CRAM", "edam")
608621
meta_yml["input"][0]["bam"]["ontologies"][2].yaml_add_eol_comment("SAM", "edam")
609622

610-
if hasattr(self, "outputs"):
623+
if hasattr(self, "outputs") and len(self.outputs) > 0:
611624
outputs_dict: dict[str, list | dict] = {}
612625
for i, (output_name, ontologies) in enumerate(self.outputs.items()):
613626
channel_contents: list[list[dict] | dict] = []
@@ -668,6 +681,8 @@ def generate_meta_yml_file(self) -> None:
668681
meta_yml["output"]["bam"][0]["*.bam"]["ontologies"][2].yaml_add_eol_comment("SAM", "edam")
669682
meta_yml["output"].update(versions)
670683

684+
meta_yml["topics"] = versions_topic
685+
671686
else:
672687
input_entry: list[dict] = [
673688
{"input": {"type": "file", "description": "", "pattern": "", "ontologies": [{"edam": ""}]}}
@@ -690,6 +705,7 @@ def generate_meta_yml_file(self) -> None:
690705
meta_yml["input"] = input_entry
691706
meta_yml["output"] = {"output": output_entry}
692707
meta_yml["output"].update(versions)
708+
meta_yml["topics"] = versions_topic
693709

694710
with open(self.file_paths["meta.yml"], "w") as fh:
695711
yaml.dump(meta_yml, fh)

nf_core/components/nfcore_component.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def get_outputs_from_main_nf(self):
253253
return outputs
254254
output_data = data.split("output:")[1].split("when:")[0]
255255
regex_emit = r"emit:\s*([^)\s,]+)"
256-
regex_elements = r"\b(val|path|env|stdout)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
256+
regex_elements = r"\b(val|path|env|stdout|eval)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
257257
for line in output_data.split("\n"):
258258
match_emit = re.search(regex_emit, line)
259259
matches_elements = re.finditer(regex_elements, line)
@@ -294,3 +294,41 @@ def get_outputs_from_main_nf(self):
294294
pass
295295
log.debug(f"Found {len(outputs)} outputs in {self.main_nf}")
296296
self.outputs = outputs
297+
298+
def get_topics_from_main_nf(self) -> None:
299+
with open(self.main_nf) as f:
300+
data = f.read()
301+
if self.component_type == "modules":
302+
topics: dict[str, list[dict[str, dict] | list[dict[str, dict[str, str]]]]] = {}
303+
# get topic name from main.nf after "output:". the names are always after "topic:"
304+
if "output:" not in data:
305+
log.debug(f"Could not find any outputs in {self.main_nf}")
306+
self.topics = topics
307+
return
308+
output_data = data.split("output:")[1].split("when:")[0]
309+
regex_topic = r"topic:\s*([^)\s,]+)"
310+
regex_elements = r"\b(val|path|env|stdout|eval)\s*(\(([^)]+)\)|\s*([^)\s,]+))"
311+
for line in output_data.split("\n"):
312+
match_topic = re.search(regex_topic, line)
313+
matches_elements = re.finditer(regex_elements, line)
314+
if not match_topic:
315+
continue
316+
channel_elements: list[dict[str, dict]] = []
317+
topic_name = match_topic.group(1)
318+
if topic_name in topics:
319+
continue
320+
topics[match_topic.group(1)] = []
321+
for count, match_element in enumerate(matches_elements, start=1):
322+
output_val = None
323+
if match_element.group(3):
324+
output_val = match_element.group(3)
325+
elif match_element.group(4):
326+
output_val = match_element.group(4)
327+
if output_val:
328+
channel_elements.append({f"value{count}": {}})
329+
if len(channel_elements) == 1:
330+
topics[match_topic.group(1)].append(channel_elements[0])
331+
elif len(channel_elements) > 1:
332+
topics[match_topic.group(1)].append(channel_elements)
333+
log.debug(f"Found {len(list(topics.keys()))} topics in {self.main_nf}")
334+
self.topics = topics

nf_core/module-template/main.nf

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,12 @@ process {{ component_name_underscore|upper }} {
6565
{{ 'tuple val(meta), path("*")' if has_meta else 'path "*"' }}, emit: output
6666
{%- endif %}
6767
{%- endif %}
68-
path "versions.yml" , emit: versions
68+
{% if not_empty_template -%}
69+
// TODO nf-core: Update the command here to obtain the version number of the software used in this module
70+
// TODO nf-core: If multiple software packages are used in this module, all MUST be added here
71+
// by copying the line below and replacing the current tool with the extra tool(s)
72+
{%- endif %}
73+
tuple val("${task.process}"), val('{{ component }}'), eval("{{ component }} --version"), topic: versions, emit: versions_{{ component }}
6974

7075
when:
7176
task.ext.when == null || task.ext.when
@@ -111,11 +116,6 @@ process {{ component_name_underscore|upper }} {
111116
$bam
112117
{%- endif %}
113118
{%- endif %}
114-
115-
cat <<-END_VERSIONS > versions.yml
116-
"${task.process}":
117-
{{ component }}: \$({{ component }} --version)
118-
END_VERSIONS
119119
"""
120120

121121
stub:
@@ -146,10 +146,5 @@ process {{ component_name_underscore|upper }} {
146146
touch ${prefix}.bam
147147
{%- endif %}
148148
{%- endif %}
149-
150-
cat <<-END_VERSIONS > versions.yml
151-
"${task.process}":
152-
{{ component }}: \$({{ component }} --version)
153-
END_VERSIONS
154149
"""
155150
}

nf_core/module-template/meta.yml

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,28 @@ output:
4646
- edam: "http://edamontology.org/format_2572" # BAM
4747
- edam: "http://edamontology.org/format_2573" # CRAM
4848
- edam: "http://edamontology.org/format_3462" # SAM
49+
versions_{{ component }}:
50+
- - "${task.process}":
51+
type: string
52+
description: The name of the process
53+
- "{{ component }}":
54+
type: string
55+
description: The name of the tool
56+
- "{{ component }} --version":
57+
type: string
58+
description: The version of the tool
59+
60+
topics:
4961
versions:
50-
- "versions.yml":
51-
type: file
52-
description: File containing software versions
53-
pattern: "versions.yml"
54-
ontologies:
55-
- edam: "http://edamontology.org/format_3750" # YAML
62+
- - process:
63+
type: string
64+
description: The process the versions were collected from
65+
- tool:
66+
type: string
67+
description: The tool name the version was collected for
68+
- version:
69+
type: string
70+
description: The version of the tool
5671

5772
authors:
5873
- "{{ author }}"

nf_core/modules/lint/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ def lint_module(
234234
if local:
235235
mod.get_inputs_from_main_nf()
236236
mod.get_outputs_from_main_nf()
237+
mod.get_topics_from_main_nf()
237238
# Update meta.yml file if requested
238239
if self.fix and mod.meta_yml is not None:
239240
self.update_meta_yml_file(mod)
@@ -260,6 +261,7 @@ def lint_module(
260261
else:
261262
mod.get_inputs_from_main_nf()
262263
mod.get_outputs_from_main_nf()
264+
mod.get_topics_from_main_nf()
263265
# Update meta.yml file if requested
264266
if self.fix:
265267
self.update_meta_yml_file(mod)

nf_core/modules/lint/main_nf.py

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,15 @@ def main_nf(
3636
* The module has a process label and it is among
3737
the standard ones.
3838
* If a ``meta`` map is defined as one of the modules
39-
inputs it should be defined as one of the outputs,
39+
inputs it should be defined as one of the emits,
4040
and be correctly configured in the ``saveAs`` function.
4141
* The module script section should contain definitions
4242
of ``software`` and ``prefix``
4343
"""
4444

4545
inputs: list[str] = []
46-
outputs: list[str] = []
46+
emits: list[str] = []
47+
topics: list[str] = []
4748

4849
# Check if we have a patch file affecting the 'main.nf' file
4950
# otherwise read the lines directly from the module
@@ -132,8 +133,9 @@ def main_nf(
132133
line = joint_tuple
133134
inputs.extend(_parse_input(module, line))
134135
if state == "output" and not _is_empty(line):
135-
outputs += _parse_output(module, line)
136-
outputs = list(set(outputs)) # remove duplicate 'meta's
136+
emits += _parse_output_emits(module, line)
137+
emits = list(set(emits)) # remove duplicate 'meta's
138+
topics += _parse_output_topics(module, line)
137139
if state == "when" and not _is_empty(line):
138140
when_lines.append(line)
139141
if state == "script" and not _is_empty(line):
@@ -144,7 +146,7 @@ def main_nf(
144146
exec_lines.append(line)
145147

146148
# Check that we have required sections
147-
if not len(outputs):
149+
if not len(emits):
148150
module.failed.append(("main_nf", "main_nf_script_outputs", "No process 'output' block found", module.main_nf))
149151
else:
150152
module.passed.append(("main_nf", "main_nf_script_outputs", "Process 'output' block found", module.main_nf))
@@ -192,8 +194,8 @@ def main_nf(
192194
if inputs:
193195
if "meta" in inputs:
194196
module.has_meta = True
195-
if outputs:
196-
if "meta" in outputs:
197+
if emits:
198+
if "meta" in emits:
197199
module.passed.append(
198200
(
199201
"main_nf",
@@ -213,22 +215,43 @@ def main_nf(
213215
)
214216

215217
# Check that a software version is emitted
216-
if outputs:
217-
if "versions" in outputs:
218+
if topics:
219+
if "versions" in topics:
218220
module.passed.append(
219-
("main_nf", "main_nf_version_emitted", "Module emits software version", module.main_nf)
221+
("main_nf", "main_nf_version_topic", "Module emits software versions as topic", module.main_nf)
220222
)
221223
else:
224+
module.warned.append(
225+
("main_nf", "main_nf_version_topic", "Module does not emit software versions as topic", module.main_nf)
226+
)
227+
228+
if emits:
229+
topic_versions_amount = sum(1 for t in topics if t == "versions")
230+
emit_versions_amount = sum(1 for e in emits if e.startswith("versions"))
231+
if topic_versions_amount == emit_versions_amount:
232+
module.passed.append(
233+
("main_nf", "main_nf_version_emit", "Module emits each software version", module.main_nf)
234+
)
235+
elif "versions" in emits:
222236
module.warned.append(
223237
(
224238
"main_nf",
225-
"main_nf_version_emitted",
226-
"Module does not emit software version",
239+
"main_nf_version_emit",
240+
"Module emits software versions YAML, please update this to topics output",
241+
module.main_nf,
242+
)
243+
)
244+
else:
245+
module.failed.append(
246+
(
247+
"main_nf",
248+
"main_nf_version_emit",
249+
"Module does not have an `emit:` and `topic:` for each software version",
227250
module.main_nf,
228251
)
229252
)
230253

231-
return inputs, outputs
254+
return inputs, emits
232255

233256

234257
def check_script_section(self, lines):
@@ -238,14 +261,6 @@ def check_script_section(self, lines):
238261
"""
239262
script = "".join(lines)
240263

241-
# check that process name is used for `versions.yml`
242-
if re.search(r"\$\{\s*task\.process\s*\}", script):
243-
self.passed.append(("main_nf", "main_nf_version_script", "Process name used for versions.yml", self.main_nf))
244-
else:
245-
self.warned.append(
246-
("main_nf", "main_nf_version_script", "Process name not used for versions.yml", self.main_nf)
247-
)
248-
249264
# check for prefix (only if module has a meta map as input)
250265
if self.has_meta:
251266
if re.search(r"\s*prefix\s*=\s*task.ext.prefix", script):
@@ -705,16 +720,43 @@ def _parse_input(self, line_raw):
705720
return inputs
706721

707722

708-
def _parse_output(self, line):
723+
def _parse_output_emits(self, line: str) -> list[str]:
709724
output = []
710725
if "meta" in line:
711726
output.append("meta")
712-
if "emit:" not in line:
713-
self.failed.append(("main_nf", "missing_emit", f"Missing emit statement: {line.strip()}", self.main_nf))
727+
emit_regex = re.search(r"^.*emit:\s*([^,\s]*)", line)
728+
if not emit_regex:
729+
self.failed.append(("missing_emit", f"Missing emit statement: {line.strip()}", self.main_nf))
714730
else:
715-
output.append(line.split("emit:")[1].strip())
716-
self.passed.append(("main_nf", "missing_emit", f"Emit statement found: {line.strip()}", self.main_nf))
731+
output.append(emit_regex.group(1).strip())
732+
return output
733+
717734

735+
def _parse_output_topics(self, line: str) -> list[str]:
736+
output = []
737+
if "meta" in line:
738+
output.append("meta")
739+
topic_regex = re.search(r"^.*topic:\s*([^,\s]*)", line)
740+
if topic_regex:
741+
topic_name = topic_regex.group(1).strip()
742+
output.append(topic_name)
743+
if topic_name == "versions":
744+
if not re.search(r'tuple\s+val\("\${\s*task\.process\s*}"\),\s*val\(.*\),\s*eval\(.*\)', line):
745+
self.failed.append(
746+
(
747+
"wrong_version_output",
748+
'Versions topic output is not correctly formatted, expected `tuple val("${task.process}"), val(\'<tool>\'), eval("<version_command>")`',
749+
self.main_nf,
750+
)
751+
)
752+
if not re.search(r"emit:\s*versions_[\d\w]+", line):
753+
self.failed.append(
754+
(
755+
"wrong_version_emit",
756+
"Version emit should follow the format `versions_<tool_or_package>`, e.g.: `versions_samtools`, `versions_gatk4`",
757+
self.main_nf,
758+
)
759+
)
718760
return output
719761

720762

0 commit comments

Comments
 (0)