|
2 | 2 | * Reference sequence resolution processes |
3 | 3 | * |
4 | 4 | * These processes resolve reference inputs that can be either: |
5 | | - * - Local file paths (validated and normalized) |
6 | | - * - NCBI accessions (fetched via Entrez API) |
| 5 | + * - Local file paths (validated and normalized via *_LOCAL processes) |
| 6 | + * - NCBI accessions (fetched via Entrez API via *_ACCESSION processes) |
7 | 7 | * |
8 | 8 | * Sequences are normalized to contain only valid IUPAC nucleotide characters, |
9 | 9 | * with invalid characters replaced by 'N'. |
10 | 10 | * |
11 | | - * Usage in main.nf: |
12 | | - * RESOLVE_REFSEQ(Channel.value(params.refseq)) |
13 | | - * RESOLVE_REF_GBK(Channel.value(params.ref_gbk)) |
| 11 | + * The workflow must determine which process to call based on whether the input |
| 12 | + * is a local file path or an NCBI accession. This separation ensures proper |
| 13 | + * Nextflow file staging for local files. |
| 14 | + * |
| 15 | + * Usage in workflows: |
| 16 | + * if (file(params.refseq).exists()) { |
| 17 | + * RESOLVE_REFSEQ_LOCAL(Channel.fromPath(params.refseq)) |
| 18 | + * ch_refseq = RESOLVE_REFSEQ_LOCAL.out.refseq |
| 19 | + * } else { |
| 20 | + * RESOLVE_REFSEQ_ACCESSION(Channel.value(params.refseq)) |
| 21 | + * ch_refseq = RESOLVE_REFSEQ_ACCESSION.out.refseq |
| 22 | + * } |
14 | 23 | */ |
15 | 24 |
|
16 | 25 |
|
17 | 26 | /* |
18 | | - * Resolve a FASTA reference from a local path or NCBI accession. |
| 27 | + * Resolve a FASTA reference from a local file path. |
19 | 28 | * |
| 29 | + * The file is properly staged by Nextflow, validated, and normalized. |
20 | 30 | * This process is REQUIRED - if it fails, the pipeline should stop. |
21 | | - * Uses 'finish' error strategy to allow other running processes to complete |
22 | | - * before terminating. |
23 | 31 | */ |
24 | | -process RESOLVE_REFSEQ { |
| 32 | +process RESOLVE_REFSEQ_LOCAL { |
25 | 33 |
|
26 | | - tag "${ref_input}" |
| 34 | + tag "${local_ref.name}" |
27 | 35 |
|
28 | | - // Use standard Nextflow caching (respects -resume) rather than storeDir |
29 | | - // to avoid cross-run cache conflicts when switching references |
30 | 36 | publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true |
31 | 37 |
|
32 | 38 | errorStrategy { task.attempt < 3 ? 'retry' : 'finish' } |
33 | 39 | maxRetries 2 |
34 | 40 |
|
35 | 41 | input: |
36 | | - val ref_input // Local path string or NCBI accession |
| 42 | + path local_ref |
37 | 43 |
|
38 | 44 | output: |
39 | 45 | path "*.fasta", emit: refseq |
40 | 46 |
|
41 | 47 | script: |
42 | | - // Determine output filename based on input |
43 | | - // For accessions: NC_045512.2.fasta |
44 | | - // For local files: preserve original basename |
45 | | - def output_name = ref_input.contains('/') || ref_input.contains('\\') |
46 | | - ? file(ref_input).baseName + '.fasta' |
47 | | - : ref_input.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.fasta' |
| 48 | + def output_name = local_ref.baseName + '.resolved.fasta' |
| 49 | + """ |
| 50 | + fetch_reference.py fasta "${local_ref}" --output "${output_name}" |
| 51 | + """ |
| 52 | +} |
| 53 | + |
| 54 | + |
| 55 | +/* |
| 56 | + * Resolve a FASTA reference from an NCBI accession. |
| 57 | + * |
| 58 | + * The accession is fetched via Entrez API, validated, and normalized. |
| 59 | + * This process is REQUIRED - if it fails, the pipeline should stop. |
| 60 | + */ |
| 61 | +process RESOLVE_REFSEQ_ACCESSION { |
| 62 | + |
| 63 | + tag "${accession}" |
48 | 64 |
|
| 65 | + publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true |
| 66 | + |
| 67 | + errorStrategy { task.attempt < 3 ? 'retry' : 'finish' } |
| 68 | + maxRetries 2 |
| 69 | + |
| 70 | + input: |
| 71 | + val accession |
| 72 | + |
| 73 | + output: |
| 74 | + path "*.fasta", emit: refseq |
| 75 | + |
| 76 | + script: |
| 77 | + def output_name = accession.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.fasta' |
49 | 78 | """ |
50 | | - fetch_reference.py fasta "${ref_input}" --output "${output_name}" |
| 79 | + fetch_reference.py fasta "${accession}" --output "${output_name}" |
51 | 80 | """ |
52 | 81 | } |
53 | 82 |
|
54 | 83 |
|
55 | 84 | /* |
56 | | - * Resolve a GenBank reference from a local path or NCBI accession. |
| 85 | + * Resolve a GenBank reference from a local file path. |
57 | 86 | * |
| 87 | + * The file is properly staged by Nextflow, validated, and normalized. |
58 | 88 | * This process is OPTIONAL - if it fails, the pipeline continues without |
59 | | - * variant annotation. Uses 'ignore' error strategy as final fallback. |
| 89 | + * variant annotation. |
60 | 90 | */ |
61 | | -process RESOLVE_REF_GBK { |
| 91 | +process RESOLVE_REF_GBK_LOCAL { |
62 | 92 |
|
63 | | - tag "${ref_input}" |
| 93 | + tag "${local_ref.name}" |
64 | 94 |
|
65 | | - // Use standard Nextflow caching (respects -resume) rather than storeDir |
66 | | - // to avoid cross-run cache conflicts when switching references |
67 | 95 | publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true |
68 | 96 |
|
69 | 97 | errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' } |
70 | 98 | maxRetries 2 |
71 | 99 |
|
72 | 100 | input: |
73 | | - val ref_input // Local path string or NCBI accession |
| 101 | + path local_ref |
74 | 102 |
|
75 | 103 | output: |
76 | 104 | path "*.gbk", emit: ref_gbk, optional: true |
77 | 105 |
|
78 | 106 | script: |
79 | | - // Determine output filename based on input |
80 | | - // For accessions: NC_045512.2.gbk |
81 | | - // For local files: preserve original basename |
82 | | - def output_name = ref_input.contains('/') || ref_input.contains('\\') |
83 | | - ? file(ref_input).baseName + '.gbk' |
84 | | - : ref_input.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.gbk' |
| 107 | + def output_name = local_ref.baseName + '.resolved.gbk' |
| 108 | + """ |
| 109 | + fetch_reference.py genbank "${local_ref}" --output "${output_name}" |
| 110 | + """ |
| 111 | +} |
85 | 112 |
|
| 113 | + |
| 114 | +/* |
| 115 | + * Resolve a GenBank reference from an NCBI accession. |
| 116 | + * |
| 117 | + * The accession is fetched via Entrez API, validated, and normalized. |
| 118 | + * This process is OPTIONAL - if it fails, the pipeline continues without |
| 119 | + * variant annotation. |
| 120 | + */ |
| 121 | +process RESOLVE_REF_GBK_ACCESSION { |
| 122 | + |
| 123 | + tag "${accession}" |
| 124 | + |
| 125 | + publishDir "${params.results}/reference_assets", mode: 'copy', overwrite: true |
| 126 | + |
| 127 | + errorStrategy { task.attempt < 3 ? 'retry' : 'ignore' } |
| 128 | + maxRetries 2 |
| 129 | + |
| 130 | + input: |
| 131 | + val accession |
| 132 | + |
| 133 | + output: |
| 134 | + path "*.gbk", emit: ref_gbk, optional: true |
| 135 | + |
| 136 | + script: |
| 137 | + def output_name = accession.replaceAll(/[^A-Za-z0-9._-]/, '_') + '.gbk' |
86 | 138 | """ |
87 | | - fetch_reference.py genbank "${ref_input}" --output "${output_name}" |
| 139 | + fetch_reference.py genbank "${accession}" --output "${output_name}" |
88 | 140 | """ |
89 | 141 | } |
0 commit comments