Skip to content

Commit f6ee0f5

Browse files
committed
hot fix for incorrect resolution of references
1 parent 413a873 commit f6ee0f5

File tree

5 files changed

+801
-663
lines changed

5 files changed

+801
-663
lines changed

modules/reference.nf

Lines changed: 86 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,88 +2,140 @@
22
* Reference sequence resolution processes
33
*
44
* These processes resolve reference inputs that can be either:
5-
* - Local file paths (validated and normalized)
6-
* - NCBI accessions (fetched via Entrez API)
5+
* - Local file paths (validated and normalized via *_LOCAL processes)
6+
* - NCBI accessions (fetched via Entrez API via *_ACCESSION processes)
77
*
88
* Sequences are normalized to contain only valid IUPAC nucleotide characters,
99
* with invalid characters replaced by 'N'.
1010
*
11-
* Usage in main.nf:
12-
* RESOLVE_REFSEQ(Channel.value(params.refseq))
13-
* RESOLVE_REF_GBK(Channel.value(params.ref_gbk))
11+
* The workflow must determine which process to call based on whether the input
12+
* is a local file path or an NCBI accession. This separation ensures proper
13+
* Nextflow file staging for local files.
14+
*
15+
* Usage in workflows:
16+
* if (file(params.refseq).exists()) {
17+
* RESOLVE_REFSEQ_LOCAL(Channel.fromPath(params.refseq))
18+
* ch_refseq = RESOLVE_REFSEQ_LOCAL.out.refseq
19+
* } else {
20+
* RESOLVE_REFSEQ_ACCESSION(Channel.value(params.refseq))
21+
* ch_refseq = RESOLVE_REFSEQ_ACCESSION.out.refseq
22+
* }
1423
*/
1524

1625

1726
/*
18-
* Resolve a FASTA reference from a local path or NCBI accession.
27+
* Resolve a FASTA reference from a local file path.
1928
*
29+
* The file is properly staged by Nextflow, validated, and normalized.
2030
* This process is REQUIRED - if it fails, the pipeline should stop.
21-
* Uses 'finish' error strategy to allow other running processes to complete
22-
* before terminating.
2331
*/
24-
process RESOLVE_REFSEQ {
32+
process RESOLVE_REFSEQ_LOCAL {
2533

26-
tag "${ref_input}"
34+
tag "${local_ref.name}"
2735

28-
// Use standard Nextflow caching (respects -resume) rather than storeDir
29-
// to avoid cross-run cache conflicts when switching references
3036
publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true
3137

3238
errorStrategy { task.attempt < 3 ? 'retry' : 'finish' }
3339
maxRetries 2
3440

3541
input:
36-
val ref_input // Local path string or NCBI accession
42+
path local_ref
3743

3844
output:
3945
path "*.fasta", emit: refseq
4046

4147
script:
42-
// Determine output filename based on input
43-
// For accessions: NC_045512.2.fasta
44-
// For local files: preserve original basename
45-
def output_name = ref_input.contains('/') || ref_input.contains('\\')
46-
? file(ref_input).baseName + '.fasta'
47-
: ref_input.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.fasta'
48+
def output_name = local_ref.baseName + '.resolved.fasta'
49+
"""
50+
fetch_reference.py fasta "${local_ref}" --output "${output_name}"
51+
"""
52+
}
53+
54+
55+
/*
56+
* Resolve a FASTA reference from an NCBI accession.
57+
*
58+
* The accession is fetched via Entrez API, validated, and normalized.
59+
* This process is REQUIRED - if it fails, the pipeline should stop.
60+
*/
61+
process RESOLVE_REFSEQ_ACCESSION {
62+
63+
tag "${accession}"
4864

65+
publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true
66+
67+
errorStrategy { task.attempt < 3 ? 'retry' : 'finish' }
68+
maxRetries 2
69+
70+
input:
71+
val accession
72+
73+
output:
74+
path "*.fasta", emit: refseq
75+
76+
script:
77+
def output_name = accession.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.fasta'
4978
"""
50-
fetch_reference.py fasta "${ref_input}" --output "${output_name}"
79+
fetch_reference.py fasta "${accession}" --output "${output_name}"
5180
"""
5281
}
5382

5483

5584
/*
56-
* Resolve a GenBank reference from a local path or NCBI accession.
85+
* Resolve a GenBank reference from a local file path.
5786
*
87+
* The file is properly staged by Nextflow, validated, and normalized.
5888
* This process is OPTIONAL - if it fails, the pipeline continues without
59-
* variant annotation. Uses 'ignore' error strategy as final fallback.
89+
* variant annotation.
6090
*/
61-
process RESOLVE_REF_GBK {
91+
process RESOLVE_REF_GBK_LOCAL {
6292

63-
tag "${ref_input}"
93+
tag "${local_ref.name}"
6494

65-
// Use standard Nextflow caching (respects -resume) rather than storeDir
66-
// to avoid cross-run cache conflicts when switching references
6795
publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true
6896

6997
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
7098
maxRetries 2
7199

72100
input:
73-
val ref_input // Local path string or NCBI accession
101+
path local_ref
74102

75103
output:
76104
path "*.gbk", emit: ref_gbk, optional: true
77105

78106
script:
79-
// Determine output filename based on input
80-
// For accessions: NC_045512.2.gbk
81-
// For local files: preserve original basename
82-
def output_name = ref_input.contains('/') || ref_input.contains('\\')
83-
? file(ref_input).baseName + '.gbk'
84-
: ref_input.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.gbk'
107+
def output_name = local_ref.baseName + '.resolved.gbk'
108+
"""
109+
fetch_reference.py genbank "${local_ref}" --output "${output_name}"
110+
"""
111+
}
85112

113+
114+
/*
115+
* Resolve a GenBank reference from an NCBI accession.
116+
*
117+
* The accession is fetched via Entrez API, validated, and normalized.
118+
* This process is OPTIONAL - if it fails, the pipeline continues without
119+
* variant annotation.
120+
*/
121+
process RESOLVE_REF_GBK_ACCESSION {
122+
123+
tag "${accession}"
124+
125+
publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true
126+
127+
errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' }
128+
maxRetries 2
129+
130+
input:
131+
val accession
132+
133+
output:
134+
path "*.gbk", emit: ref_gbk, optional: true
135+
136+
script:
137+
def output_name = accession.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.gbk'
86138
"""
87-
fetch_reference.py genbank "${ref_input}" --output "${output_name}"
139+
fetch_reference.py genbank "${accession}" --output "${output_name}"
88140
"""
89141
}

0 commit comments

Comments
 (0)