Skip to content

Commit a1e5119

Browse files
Merge branch 'parabricks_rnafq2bam_rnaseq_updates' of github.com:gburnett-nvidia/modules into parabricks_rnafq2bam_rnaseq_updates
2 parents 9d8bf81 + a824e46 commit a1e5119

File tree

14 files changed

+453
-227
lines changed

14 files changed

+453
-227
lines changed

modules/nf-core/quilt/quilt/main.nf

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ process QUILT_QUILT {
1616
tuple val(meta), path("*.vcf.gz.tbi") , emit: tbi , optional: true
1717
tuple val(meta), path("RData", type: "dir"), emit: rdata, optional: true
1818
tuple val(meta), path("plots", type: "dir"), emit: plots, optional: true
19-
path "versions.yml" , emit: versions
19+
tuple val("${task.process}"), val('r-quilt'), eval('Rscript -e "cat(as.character(packageVersion(\'QUILT\')))"'), topic: versions, emit: versions_r_quilt
20+
tuple val("${task.process}"), val('r-base'), eval('R --version | sed "1!d; s/.*version //; s/ .*//"'), topic: versions, emit: versions_r_base
2021

2122
when:
2223
task.ext.when == null || task.ext.when
@@ -70,13 +71,6 @@ process QUILT_QUILT {
7071
--reference_legend_file=${reference_legend_file} \\
7172
--output_filename=${prefix}.${suffix} \\
7273
${args}
73-
74-
75-
cat <<-END_VERSIONS > versions.yml
76-
"${task.process}":
77-
r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])")
78-
r-quilt: \$(Rscript -e "cat(as.character(utils::packageVersion(\\"QUILT\\")))")
79-
END_VERSIONS
8074
"""
8175

8276
stub:
@@ -109,11 +103,5 @@ process QUILT_QUILT {
109103
done
110104
done
111105
fi
112-
113-
cat <<-END_VERSIONS > versions.yml
114-
"${task.process}":
115-
r-base: \$(Rscript -e "cat(strsplit(R.version[['version.string']], ' ')[[1]][3])")
116-
r-quilt: \$(Rscript -e "cat(as.character(utils::packageVersion(\\"QUILT\\")))")
117-
END_VERSIONS
118106
"""
119107
}

modules/nf-core/quilt/quilt/meta.yml

Lines changed: 90 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: "quilt_quilt"
2-
description: QUILT is an R and C++ program for rapid genotype imputation from low-coverage
3-
sequence using a large reference panel.
2+
description: QUILT is an R and C++ program for rapid genotype imputation from
3+
low-coverage sequence using a large reference panel.
44
keywords:
55
- imputation
66
- low-coverage
@@ -15,7 +15,8 @@ tools:
1515
documentation: "https://github.com/rwdavies/quilt"
1616
tool_dev_url: "https://github.com/rwdavies/quilt"
1717
doi: "10.1038/s41588-021-00877-0"
18-
licence: ["GPL v3"]
18+
licence:
19+
- "GPL v3"
1920
identifier: ""
2021
input:
2122
- - meta:
@@ -35,86 +36,89 @@ input:
3536
ontologies: []
3637
- bamlist:
3738
type: file
38-
description: (Optional) File with list of BAM/CRAM files to impute. One file
39-
per line.
39+
description: (Optional) File with list of BAM/CRAM files to impute. One
40+
file per line.
4041
pattern: "*.{txt}"
4142
ontologies: []
4243
- samplename:
4344
type: file
44-
description: (Optional) File with list of samples names in the same order as
45-
in bamlist to impute. One file per line.
45+
description: (Optional) File with list of samples names in the same order
46+
as in bamlist to impute. One file per line.
4647
pattern: "*.{txt}"
4748
ontologies: []
4849
- reference_haplotype_file:
4950
type: file
50-
description: (Mandatory) Reference haplotype file in IMPUTE format (file with
51-
no header and no rownames, one row per SNP, one column per reference haplotype,
52-
space separated, values must be 0 or 1)
51+
description: (Mandatory) Reference haplotype file in IMPUTE format (file
52+
with no header and no rownames, one row per SNP, one column per
53+
reference haplotype, space separated, values must be 0 or 1)
5354
pattern: "*.{hap.gz}"
5455
ontologies: []
5556
- reference_legend_file:
5657
type: file
57-
description: (Mandatory) Reference haplotype legend file in IMPUTE format (file
58-
with one row per SNP, and a header including position for the physical position
59-
in 1 based coordinates, a0 for the reference allele, and a1 for the alternate
60-
allele).
58+
description: (Mandatory) Reference haplotype legend file in IMPUTE format
59+
(file with one row per SNP, and a header including position for the
60+
physical position in 1 based coordinates, a0 for the reference allele,
61+
and a1 for the alternate allele).
6162
pattern: "*.{legend.gz}"
6263
ontologies: []
6364
- posfile:
6465
type: file
65-
description: (Optional) File with positions of where to impute, lining up one-to-one
66-
with genfile. File is tab separated with no header, one row per SNP, with
67-
col 1 = chromosome, col 2 = physical position (sorted from smallest to largest),
68-
col 3 = reference base, col 4 = alternate base. Bases are capitalized.
66+
description: (Optional) File with positions of where to impute, lining up
67+
one-to-one with genfile. File is tab separated with no header, one row
68+
per SNP, with col 1 = chromosome, col 2 = physical position (sorted from
69+
smallest to largest), col 3 = reference base, col 4 = alternate base.
70+
Bases are capitalized.
6971
pattern: "*.{txt}"
7072
ontologies: []
7173
- phasefile:
7274
type: file
73-
description: (Optional) File with truth phasing results. Supersedes genfile
74-
if both options given. File has a header row with a name for each sample,
75-
matching what is found in the bam file. Each subject is then a tab separated
76-
column, with 0 = ref and 1 = alt, separated by a vertical bar |, e.g. 0|0
77-
or 0|1. Note therefore this file has one more row than posfile which has no
78-
header.
75+
description: (Optional) File with truth phasing results. Supersedes
76+
genfile if both options given. File has a header row with a name for
77+
each sample, matching what is found in the bam file. Each subject is
78+
then a tab separated column, with 0 = ref and 1 = alt, separated by a
79+
vertical bar |, e.g. 0|0 or 0|1. Note therefore this file has one more
80+
row than posfile which has no header.
7981
pattern: "*.{txt}"
8082
ontologies: []
8183
- genfile:
8284
type: file
83-
description: (Optional) Path to gen file with high coverage results. Empty for
84-
no genfile. If both genfile and phasefile are given, only phasefile is used,
85-
as genfile (unphased genotypes) is derivative to phasefile (phased genotypes).
86-
File has a header row with a name for each sample, matching what is found
87-
in the bam file. Each subject is then a tab seperated column, with 0 = hom
88-
ref, 1 = het, 2 = hom alt and NA indicating missing genotype, with rows corresponding
89-
to rows of the posfile. Note therefore this file has one more row than posfile
90-
which has no header [default \"\"]
85+
description: (Optional) Path to gen file with high coverage results. Empty
86+
for no genfile. If both genfile and phasefile are given, only phasefile
87+
is used, as genfile (unphased genotypes) is derivative to phasefile
88+
(phased genotypes). File has a header row with a name for each sample,
89+
matching what is found in the bam file. Each subject is then a tab
90+
seperated column, with 0 = hom ref, 1 = het, 2 = hom alt and NA
91+
indicating missing genotype, with rows corresponding to rows of the
92+
posfile. Note therefore this file has one more row than posfile which
93+
has no header [default \"\"]
9194
pattern: "*.{txt}"
9295
ontologies: []
9396
- chr:
9497
type: string
9598
description: (Mandatory) What chromosome to run. Should match BAM headers.
9699
- regions_start:
97100
type: integer
98-
description: (Mandatory) When running imputation, where to start from. The 1-based
99-
position x is kept if regionStart <= x <= regionEnd.
101+
description: (Mandatory) When running imputation, where to start from. The
102+
1-based position x is kept if regionStart <= x <= regionEnd.
100103
- regions_end:
101104
type: integer
102105
description: (Mandatory) When running imputation, where to stop.
103106
- ngen:
104107
type: integer
105-
description: Number of generations since founding or mixing. Note that the algorithm
106-
is relatively robust to this. Use nGen = 4 * Ne / K if unsure.
108+
description: Number of generations since founding or mixing. Note that the
109+
algorithm is relatively robust to this. Use nGen = 4 * Ne / K if unsure.
107110
- buffer:
108111
type: integer
109-
description: Buffer of region to perform imputation over. So imputation is run
110-
form regionStart-buffer to regionEnd+buffer, and reported for regionStart
111-
to regionEnd, including the bases of regionStart and regionEnd.
112+
description: Buffer of region to perform imputation over. So imputation is
113+
run form regionStart-buffer to regionEnd+buffer, and reported for
114+
regionStart to regionEnd, including the bases of regionStart and
115+
regionEnd.
112116
- genetic_map:
113117
type: file
114-
description: (Optional) File with genetic map information, a file with 3 white-space
115-
delimited entries giving position (1-based), genetic rate map in cM/Mbp, and
116-
genetic map in cM. If no file included, rate is based on physical distance
117-
and expected rate (expRate).
118+
description: (Optional) File with genetic map information, a file with 3
119+
white-space delimited entries giving position (1-based), genetic rate
120+
map in cM/Mbp, and genetic map in cM. If no file included, rate is based
121+
on physical distance and expected rate (expRate).
118122
pattern: "*.{txt,map}{,gz}"
119123
ontologies: []
120124
- - meta2:
@@ -141,8 +145,8 @@ output:
141145
e.g. [ id:'test', single_end:false ]
142146
- "*.vcf.gz":
143147
type: file
144-
description: VCF file with both SNP annotation information and per-sample
145-
genotype information.
148+
description: VCF file with both SNP annotation information and
149+
per-sample genotype information.
146150
pattern: "*.{vcf.gz}"
147151
ontologies: []
148152
tbi:
@@ -162,7 +166,7 @@ output:
162166
description: |
163167
Groovy Map containing sample information
164168
e.g. [ id:'test', single_end:false ]
165-
- "RData":
169+
- RData:
166170
type: directory
167171
description: |
168172
Folder of RData objects generated during the imputation process.
@@ -173,18 +177,51 @@ output:
173177
description: |
174178
Groovy Map containing sample information
175179
e.g. [ id:'test', single_end:false ]
176-
- "plots":
180+
- plots:
177181
type: directory
178182
description: |
179183
Folder of plots generated during the imputation process.
180184
pattern: "plots"
185+
versions_r_quilt:
186+
- - ${task.process}:
187+
type: string
188+
description: The name of the process
189+
- r-quilt:
190+
type: string
191+
description: The name of the tool
192+
- Rscript -e "cat(as.character(packageVersion('QUILT')))":
193+
type: eval
194+
description: The expression to obtain the version of the tool
195+
versions_r_base:
196+
- - ${task.process}:
197+
type: string
198+
description: The name of the process
199+
- r-base:
200+
type: string
201+
description: The name of the tool
202+
- R --version | sed "1!d; s/.*version //; s/ .*//":
203+
type: eval
204+
description: The expression to obtain the version of the tool
205+
topics:
181206
versions:
182-
- versions.yml:
183-
type: file
184-
description: File containing software versions
185-
pattern: "versions.yml"
186-
ontologies:
187-
- edam: http://edamontology.org/format_3750 # YAML
207+
- - ${task.process}:
208+
type: string
209+
description: The name of the process
210+
- r-quilt:
211+
type: string
212+
description: The name of the tool
213+
- Rscript -e "cat(as.character(packageVersion('QUILT')))":
214+
type: eval
215+
description: The expression to obtain the version of the tool
216+
- - ${task.process}:
217+
type: string
218+
description: The name of the process
219+
- r-base:
220+
type: string
221+
description: The name of the tool
222+
- R --version | sed "1!d; s/.*version //; s/ .*//":
223+
type: eval
224+
description: The expression to obtain the version of the tool
188225
authors:
189226
- "@atrigila"
190227
maintainers:

modules/nf-core/quilt/quilt/tests/main.nf.test

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,21 +77,21 @@ nextflow_process {
7777
}
7878

7979
then {
80+
assert process.success
8081
def dir = new File(process.out.plots[0][1])
8182
def list = []
8283
dir.eachFileRecurse { file -> list << file.getName() }
8384
assertAll(
84-
{ assert process.success },
8585
{ assert snapshot(
8686
process.out.tbi,
8787
list.sort(),
8888
process.out.rdata,
89-
process.out.versions,
9089
process.out.vcf.collect{ meta, vcf -> [
9190
meta,
9291
path(vcf).vcf.header.getGenotypeSamples().sort(),
9392
path(vcf).vcf.variantsMD5
94-
]}
93+
]},
94+
process.out.findAll { key, val -> key.startsWith('versions') }
9595
).match() }
9696
)
9797
}
@@ -120,17 +120,17 @@ nextflow_process {
120120
}
121121

122122
then {
123+
assert process.success
123124
def dir = new File(process.out.plots[0][1])
124125
def list = []
125126
dir.eachFileRecurse { file -> list << file.getName() }
126127
assertAll(
127-
{ assert process.success },
128128
{ assert snapshot(
129129
process.out.vcf,
130130
process.out.tbi,
131131
list.sort(),
132132
process.out.rdata,
133-
process.out.versions
133+
process.out.findAll { key, val -> key.startsWith('versions') }
134134
).match() }
135135
)
136136
}
@@ -160,17 +160,17 @@ nextflow_process {
160160
}
161161

162162
then {
163+
assert process.success
163164
assertAll(
164-
{ assert process.success },
165165
{ assert snapshot(
166166
process.out.tbi,
167167
process.out.rdata,
168-
process.out.versions,
169168
process.out.vcf.collect{ meta, vcf -> [
170169
meta,
171170
path(vcf).vcf.header.getGenotypeSamples().sort(),
172171
path(vcf).vcf.variantsMD5
173-
]}
172+
]},
173+
process.out.findAll { key, val -> key.startsWith('versions') }
174174
).match() }
175175
)
176176
}
@@ -213,19 +213,20 @@ nextflow_process {
213213
}
214214

215215
then {
216+
assert process.success
216217
assertAll(
217-
{ assert process.success },
218218
{ assert snapshot(
219219
process.out.tbi,
220220
process.out.rdata,
221-
process.out.versions,
222221
process.out.vcf.collect{ meta, vcf -> [
223222
meta,
224223
path(vcf).vcf.header.getGenotypeSamples().sort(),
225224
path(vcf).vcf.variantsMD5
226-
]}
225+
]},
226+
process.out.findAll { key, val -> key.startsWith('versions') }
227227
).match() }
228228
)
229229
}
230230
}
231+
231232
}

0 commit comments

Comments
 (0)