Skip to content

Commit bdc1cf2

Browse files
committed
add method to pull inputs for each Bactopia Tool
1 parent b992539 commit bdc1cf2

File tree

4 files changed

+307
-10
lines changed

4 files changed

+307
-10
lines changed

plugins/nf-bactopia/src/main/nextflow/bactopia/BactopiaExtension.groovy

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ import nextflow.bactopia.BactopiaConfig
3333
import nextflow.bactopia.BactopiaSchema
3434
import nextflow.bactopia.nfschema.SummaryCreator
3535

36+
import static nextflow.bactopia.inputs.BactopiaTools.collectInputs
3637
import static nextflow.bactopia.BactopiaTemplate.getLogColors
3738
import static nextflow.bactopia.BactopiaTemplate.getLogo
3839
import static nextflow.bactopia.nfschema.Common.getLongestKeyLength
@@ -60,6 +61,21 @@ class BactopiaExtension extends PluginExtensionPoint {
6061
config = new BactopiaConfig(session?.config?.navigate('bactopia') as Map, params)
6162
}
6263

64+
65+
//
66+
// Collect Bactopia Tool inputs
67+
//
68+
@Function
69+
public List bactopiaToolInputs(
70+
String bactopiaDir,
71+
String extension,
72+
String includeFile,
73+
String excludeFile
74+
) {
75+
return collectInputs(bactopiaDir, extension, includeFile, excludeFile)
76+
}
77+
78+
6379
//
6480
// Groovy Map summarising parameters/workflow options used by the pipeline
6581
//

plugins/nf-bactopia/src/main/nextflow/bactopia/BactopiaUtils.groovy

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,22 @@
44
// Modified from NF-Core's template: https://github.com/nf-core/tools
55
package nextflow.bactopia
66

7-
import org.json.JSONArray
8-
import org.json.JSONObject
9-
7+
import groovy.util.logging.Slf4j
108
import java.io.RandomAccessFile
119
import java.util.stream.IntStream
1210
import java.util.zip.GZIPInputStream
11+
import org.json.JSONArray
12+
import org.json.JSONObject
1313
import org.yaml.snakeyaml.Yaml
1414

15-
class Utils {
15+
@Slf4j
16+
class BactopiaUtils {
17+
18+
1619
//
1720
// When running with -profile conda, warn if channels have not been set-up appropriately
1821
//
19-
public static void checkCondaChannels(log) {
22+
public static void checkCondaChannels() {
2023
Yaml parser = new Yaml()
2124
def channels = []
2225
try {
@@ -45,17 +48,19 @@ class Utils {
4548
}
4649
}
4750

51+
4852
//
4953
// Join module args with appropriate spacing
5054
//
5155
public static String joinModuleArgs(args_list) {
5256
return ' ' + args_list.join(' ')
5357
}
5458

59+
5560
//
5661
// Verify input is a positive integer
5762
//
58-
public static Integer isPositiveInteger(value, name, log) {
63+
public static Integer isPositiveInteger(value, name) {
5964
def error = 0
6065
if (value.getClass() == Integer) {
6166
if (value < 0) {
@@ -74,13 +79,19 @@ class Utils {
7479
return error
7580
}
7681

82+
7783
//
7884
// Verify input file exists
7985
//
8086
public static Boolean fileExists(filename) {
81-
return new File(filename).exists()
87+
if (isLocal(filename)) {
88+
return new File(filename).exists()
89+
}
90+
// For remote files, we assume they exist
91+
return true
8292
}
8393

94+
8495
//
8596
// Check if file is remote (e.g. AWS, Azure, GCP)
8697
//
@@ -90,11 +101,13 @@ class Utils {
90101
}
91102
return true
92103
}
104+
105+
93106
//
94-
// Verify input file exists
107+
// Check is a file is not found
95108
//
96109
public static Integer fileNotFound(filename, parameter, log) {
97-
if (!new File(filename).exists()) {
110+
if (!fileExists(filename)) {
98111
log.error '* --'+ parameter +': Unable to find "' + filename + '", please verify it exists.'.trim()
99112
return 1
100113
}
@@ -105,7 +118,7 @@ class Utils {
105118
//
106119
// Verify input file is GZipped
107120
//
108-
public static Integer fileNotGzipped(filename, parameter, log) {
121+
public static Integer fileNotGzipped(filename, parameter, log) {
109122
// https://github.com/ConnectedPlacesCatapult/TomboloDigitalConnector/blob/master/src/main/java/uk/org/tombolo/importer/ZipUtils.java
110123

111124
if (fileNotFound(filename, parameter, log)) {

plugins/nf-bactopia/src/main/nextflow/bactopia/inputs/Bactopia.groovy

Whitespace-only changes.
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
package nextflow.bactopia.inputs
2+
3+
import groovy.util.logging.Slf4j
4+
import java.nio.file.Path
5+
6+
import static nextflow.bactopia.BactopiaUtils.fileExists
7+
8+
@Slf4j
9+
class BactopiaTools {
10+
11+
12+
//
13+
// Collect the input samples from the Bactopia directory to be used by a given Bactopia Tool
14+
//
15+
public static List collectInputs(String bactopiaDir, String extension, String includeFile, String excludeFile) {
16+
def Boolean includeAll = true
17+
def List inclusions = processFOFN(includeFile, true)
18+
def List exclusions = processFOFN(excludeFile, false)
19+
def List ignoreList = ['.nextflow', 'bactopia-info', 'bactopia-tools', 'work', 'bactopia-runs', 'pipeline_info']
20+
21+
// Check if bactopiaDir exists, and if so loop through it
22+
def List samples = []
23+
def List missing = []
24+
if (bactopiaDir) {
25+
Path bactopiaPath = Path.of(bactopiaDir)
26+
if (bactopiaPath.exists()) {
27+
// loop through the Bactopia directory and collect the samples
28+
bactopiaPath.eachFile { item ->
29+
if (item.isDirectory()) {
30+
def String sample = item.getName()
31+
if (!ignoreList.contains(sample)) {
32+
if (inclusions.contains(sample) || includeAll) {
33+
if (!exclusions.contains(sample)) {
34+
if (_isSampleDir(sample, bactopiaDir)) {
35+
def List inputs = _collectInputs(sample, bactopiaDir, extension)
36+
log.info ("inputs: ${inputs.getClass()}")
37+
if (inputs[0] instanceof String) {
38+
missing << inputs
39+
} else {
40+
samples << inputs
41+
}
42+
} else {
43+
log.info("${sample} does not appear to be a Bactopia sample, skipping...")
44+
}
45+
}
46+
}
47+
}
48+
}
49+
}
50+
} else {
51+
log.error("The Bactopia directory ${bactopiaDir} (--bactopia) does not exist.")
52+
}
53+
} else {
54+
log.error("--bactopia is is not set.")
55+
System.exit(1)
56+
}
57+
58+
log.info("Found ${samples.size()} samples to process")
59+
if (missing.size() > 0) {
60+
log.warn("${missing.size()} samples were excluded due to missing files. They are:")
61+
for (sample in missing) {
62+
log.warn(" ${sample}")
63+
}
64+
}
65+
log.info("\nIf this looks wrong, now's your chance to back out (CTRL+C 3 times).")
66+
log.info("Sleeping for 5 seconds...")
67+
log.info("--------------------------------------------------------------------")
68+
sleep(5000)
69+
return samples
70+
}
71+
72+
73+
//
74+
// Process the include/exclude FOFN files
75+
//
76+
private static List processFOFN(String fofn, Boolean isInclude) {
77+
def List samples = []
78+
79+
if (fofn) {
80+
// Check if the file exists, and if so collect the samples
81+
if (fileExists(fofn)) {
82+
new File(fofn).eachLine { line ->
83+
def sample = line.trim().split('\t')[0]
84+
if (sample) {
85+
samples << sample
86+
}
87+
}
88+
}
89+
90+
// If samples were found, log the number of samples
91+
if (samples.size() > 0) {
92+
if (isInclude) {
93+
log.info "Including ${samples.size()} samples for analysis"
94+
} else {
95+
log.info "Excluding ${samples.size()} samples from the analysis"
96+
}
97+
}
98+
}
99+
return samples
100+
}
101+
102+
103+
//
104+
// Test if the sample directory is likely to contain Bactopia results
105+
//
106+
private static Boolean _isSampleDir(String sample, String dir) {
107+
return fileExists("${dir}/${sample}/main/gather/${sample}-meta.tsv")
108+
}
109+
110+
111+
//
112+
// Navigate the Bactopia output directory and collect the inputs for a given Bactopia Tool
113+
//
114+
private static List _collectInputs(String sample, String dir, String extension) {
115+
def Map PATHS = [:]
116+
PATHS.blastdb = "annotator"
117+
PATHS.fastq = "qc"
118+
PATHS.fna = "assembler"
119+
PATHS.faa = "annotator"
120+
PATHS.gbk = "annotator"
121+
PATHS.gff = "annotator"
122+
PATHS.meta = "gather"
123+
124+
// Set up the paths for each extension
125+
def String baseDir = "${dir}/${sample}/main/"
126+
def String se = "${baseDir}/${PATHS['fastq']}/${sample}.fastq.gz"
127+
def String pe1 = "${baseDir}/${PATHS['fastq']}/${sample}_R1.fastq.gz"
128+
def String pe2 = "${baseDir}/${PATHS['fastq']}/${sample}_R2.fastq.gz"
129+
def String fna = "${baseDir}/${PATHS['fna']}/${sample}.fna"
130+
def String meta = "${baseDir}/${PATHS['meta']}/${sample}-meta.tsv"
131+
132+
// Check if the SE reads are ONT or Illumina
133+
def Boolean ont = false
134+
if (fileExists("${baseDir}/${PATHS['fastq']}/summary/${sample}-final_NanoPlot-report.html")) {
135+
// the se read is ONT data
136+
ont = true
137+
}
138+
139+
// Determine the inputs files required for the given extension
140+
// NOTE: Remote files will be assumed to exist
141+
//
142+
// Return List looks like:
143+
// [ [id:sample, single_end:true/false, runtype:'illumina'/'ont'], [file1], [file2], ... ]
144+
// 0 - meta map
145+
// 1 - input files
146+
// 2 - extra files
147+
// 3 - extra files
148+
if (extension == "illumina_fastq") {
149+
// Prioritize PE reads first
150+
if (fileExists(pe1) && fileExists(pe2)) {
151+
return [[id:sample, single_end:false, runtype:'illumina'], [pe1, pe2], [], []]
152+
} else if (fileExists(se) && !ont) {
153+
return [[id:sample, single_end:true, runtype:'illumina'], [se], [], []]
154+
}
155+
} else if (extension == 'fastq') {
156+
if (fileExists(se)) {
157+
if (ont) {
158+
return [[id:sample, single_end:true, runtype:'ont'], [se], [], []]
159+
} else {
160+
return [[id:sample, single_end:true, runtype:'illumina'], [se], []]
161+
}
162+
} else if (fileExists(pe1) && fileExists(pe2)) {
163+
return [[id:sample, single_end:false, runtype:'illumina'], [pe1, pe2], [], []]
164+
}
165+
} else if (extension == 'fna_fastq') {
166+
if (fileExists(se)) {
167+
def String runtype = "illumina"
168+
if (ont) {
169+
runtype = "ont"
170+
}
171+
172+
if (fileExists("${fna}.gz")) {
173+
return [[id:sample, single_end:true, is_compressed:true, runtype:runtype], ["${fna}.gz"], [se], []]
174+
} else if (fileExists(fna)) {
175+
return [[id:sample, single_end:true, is_compressed:false, runtype:runtype], [fna], [se], []]
176+
}
177+
} else if (fileExists(pe1) && fileExists(pe2)) {
178+
if (fileExists("${fna}.gz")) {
179+
return [[id:sample, single_end:false, is_compressed:true, runtype:'illumina'], ["${fna}.gz"], [pe1, pe2], []]
180+
} else if (fileExists(fna)) {
181+
return [[id:sample, single_end:false, is_compressed:false, runtype:'illumina'], [fna], [pe1, pe2], []]
182+
}
183+
}
184+
} else if (extension == 'fna_faa_gff') {
185+
// Default to Bakta faa
186+
fna = "${baseDir}/${PATHS['faa']}/bakta/${sample}.fna"
187+
def String faa = "${baseDir}/${PATHS['faa']}/bakta/${sample}.faa"
188+
def String gff = "${baseDir}/${PATHS['faa']}/bakta/${sample}.gff3"
189+
if (!fileExists(faa) && !fileExists("${faa}.gz")) {
190+
// Fall back on Prokka
191+
fna = "${baseDir}/${PATHS['faa']}/prokka/${sample}.fna"
192+
faa = "${baseDir}/${PATHS['faa']}/prokka/${sample}.faa"
193+
gff = "${baseDir}/${PATHS['faa']}/prokka/${sample}.gff"
194+
}
195+
196+
if (fileExists("${fna}.gz") && fileExists("${faa}.gz") && fileExists("${gff}.gz")) {
197+
return [[id:sample, is_compressed:true], ["${fna}.gz"], ["${faa}.gz"], ["${gff}.gz"]]
198+
} else if (fileExists(fna) && fileExists(faa) && fileExists(gff)) {
199+
return [[id:sample, is_compressed:false], [fna], [faa], [gff]]
200+
}
201+
} else if (extension == 'fna_faa') {
202+
// Default to Bakta faa
203+
def String faa = "${baseDir}/${PATHS['faa']}/bakta/${sample}.faa"
204+
if (!fileExists(faa) && !fileExists("${faa}.gz")) {
205+
// Fall back on Prokka
206+
faa = "${baseDir}/${PATHS['faa']}/prokka/${sample}.faa"
207+
}
208+
209+
if (fileExists("${fna}.gz") && fileExists("${faa}.gz")) {
210+
return [[id:sample, is_compressed:true], ["${fna}.gz"], ["${faa}.gz"], []]
211+
} else if (fileExists(fna) && fileExists(faa)) {
212+
return [[id:sample, is_compressed:false], [fna], [faa], []]
213+
}
214+
} else if (extension == 'fna_meta') {
215+
// include the meta file
216+
if (fileExists("${fna}.gz") && fileExists(meta)) {
217+
return [[id:sample, is_compressed:true], ["${fna}.gz"], [meta], []]
218+
} else if (fileExists(fna) && fileExists(meta)) {
219+
return [[id:sample, is_compressed:false], [fna], [meta], []]
220+
}
221+
} else if (extension == 'blastdb') {
222+
// Default to Bakta blastdb
223+
def String input = "${baseDir}/${PATHS[extension]}/bakta/${sample}-${extension}.tar.gz"
224+
if (!fileExists(input)) {
225+
// Fall back on Prokka
226+
input = "${baseDir}/${PATHS[extension]}/prokka/${sample}-${extension}.tar.gz"
227+
}
228+
229+
if (fileExists(input)) {
230+
return [[id:sample], [input], [], []]
231+
}
232+
} else {
233+
// The remaining are generic 1 to 1 mappings
234+
def String input = "${baseDir}/${PATHS[extension]}/${sample}.${extension}"
235+
if (extension == "gbk") {
236+
// Default to Bakta (gbff)
237+
input = "${baseDir}/${PATHS[extension]}/bakta/${sample}.gbff"
238+
if (!fileExists(input) && !fileExists("${input}.gz")) {
239+
// Fall back on Prokka (gbk)
240+
input = "${baseDir}/${PATHS[extension]}/prokka/${sample}.${extension}"
241+
}
242+
} else if (extension == "gff") {
243+
// Default to Bakta (gff3)
244+
input = "${baseDir}/${PATHS[extension]}/bakta/${sample}.gff3"
245+
if (!fileExists(input) && !fileExists("${input}.gz")) {
246+
// Fall back on Prokka (gff)
247+
input = "${baseDir}/${PATHS[extension]}/prokka/${sample}.${extension}"
248+
}
249+
} else if (extension == "faa") {
250+
// Default to Bakta faa
251+
input = "${baseDir}/${PATHS[extension]}/bakta/${sample}.${extension}"
252+
if (!fileExists(input) && !fileExists("${input}.gz")) {
253+
// Fall back on Prokka
254+
input = "${baseDir}/${PATHS[extension]}/prokka/${sample}.${extension}"
255+
}
256+
}
257+
258+
if (fileExists("${input}.gz")) {
259+
return [[id:sample, is_compressed:true], ["${input}.gz"], [], []]
260+
} else if (fileExists(input)) {
261+
return [[id:sample, is_compressed:false], [input], [], []]
262+
}
263+
}
264+
265+
// If we get here, the sample is missing the required files
266+
return [sample]
267+
}
268+
}

0 commit comments

Comments
 (0)