Skip to content

Commit a79f70c

Browse files
authored
Rename cellranger outputs so that they are unique (#90)
* Make cellranger output names unique Since the HCA upload service overwrites files that have the same name (even if the file paths are unique), change the file names output from cellranger so that they are unique. * Generalize task for renaming output files * Fix comment * Update pipeline_tools docker tag
1 parent c276d72 commit a79f70c

File tree

3 files changed

+44
-11
lines changed

3 files changed

+44
-11
lines changed

adapter_pipelines/Optimus/adapter.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ workflow AdapterOptimus {
127127
Int max_cromwell_retries = 0
128128
Boolean add_md5s = false
129129

130-
String pipeline_tools_version = "v0.32.0"
130+
String pipeline_tools_version = "v0.35.0"
131131

132132
call GetInputs as prep {
133133
input:

adapter_pipelines/cellranger/adapter.wdl

Lines changed: 42 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ task GetInputs {
4545
}
4646
}
4747

48-
task RenameFiles {
48+
task RenameFastqFiles {
4949
File r1
5050
File r2
5151
File i1
@@ -68,6 +68,31 @@ task RenameFiles {
6868
}
6969
}
7070

71+
task RenameFiles {
72+
Array[File] file_paths
73+
Array[String] new_file_names
74+
String pipeline_tools_version
75+
76+
command <<<
77+
python -u <<CODE
78+
import subprocess
79+
80+
files=["${sep='","' file_paths}"]
81+
file_names=["${sep='","' new_file_names}"]
82+
83+
for idx, f in enumerate(files):
84+
subprocess.check_output(['mv', f, file_names[idx]])
85+
86+
CODE
87+
>>>
88+
runtime {
89+
docker: "quay.io/humancellatlas/secondary-analysis-pipeline-tools:" + pipeline_tools_version
90+
}
91+
output {
92+
Array[File] outputs = new_file_names
93+
}
94+
}
95+
7196
task InputsForSubmit {
7297
Array[File] fastqs
7398
Array[Object] other_inputs
@@ -150,7 +175,7 @@ workflow Adapter10xCount {
150175
Int max_cromwell_retries = 0
151176
Boolean add_md5s = false
152177

153-
String pipeline_tools_version = "v0.33.0"
178+
String pipeline_tools_version = "v0.35.0"
154179

155180
call GetInputs {
156181
input:
@@ -166,14 +191,14 @@ workflow Adapter10xCount {
166191
}
167192
168193
# Cellranger code in 10x count wdl requires files to be named a certain way.
169-
# To accommodate that, RenameFiles copies the blue box files into the
194+
# To accommodate that, RenameFastqFiles copies the blue box files into the
170195
# cromwell execution bucket but with the names cellranger expects.
171196
# Putting this in its own task lets us take advantage of automatic localizing
172197
# and delocalizing by Cromwell/JES to actually read and write stuff in buckets.
173198
# TODO: Replace scatter with a for-loop inside of the task to avoid creating a
174199
# VM for each set of files that needs to be renamed
175200
scatter(i in range(length(GetInputs.lanes))) {
176-
call RenameFiles as prep {
201+
call RenameFastqFiles as prep {
177202
input:
178203
r1 = GetInputs.r1_fastq[i],
179204
r2 = GetInputs.r2_fastq[i],
@@ -216,12 +241,23 @@ workflow Adapter10xCount {
216241
pipeline_tools_version = pipeline_tools_version
217242
}
218243

244+
# Rename analysis files so that all the file names are unique. For example, rename
245+
# "${sample_id}/outs/raw_gene_bc_matrices/${reference}/barcodes.tsv" to "raw_barcodes.tsv" so that
246+
# it does not overwrite "${sample_id}/outs/filtered_gene_bc_matrices/${reference}/barcodes.tsv"
247+
# when uploading files
248+
call RenameFiles as output_files {
249+
input:
250+
file_paths = [analysis.raw_barcodes, analysis.raw_genes, analysis.raw_matrix],
251+
new_file_names = ["raw_barcodes.tsv", "raw_genes.tsv", "raw_matrix.mtx"],
252+
pipeline_tools_version = pipeline_tools_version
253+
}
254+
219255
Array[Object] inputs = read_objects(InputsForSubmit.inputs)
220256
221257
call submit_wdl.submit {
222258
input:
223259
inputs = inputs,
224-
outputs = [
260+
outputs = flatten([[
225261
analysis.qc,
226262
analysis.sorted_bam,
227263
analysis.sorted_bam_index,
@@ -230,12 +266,9 @@ workflow Adapter10xCount {
230266
analysis.matrix,
231267
analysis.filtered_gene_h5,
232268
analysis.raw_gene_h5,
233-
analysis.raw_barcodes,
234-
analysis.raw_genes,
235-
analysis.raw_matrix,
236269
analysis.mol_info_h5,
237270
analysis.web_summary
238-
],
271+
], output_files.outputs]),
239272
format_map = format_map,
240273
submit_url = submit_url,
241274
cromwell_url = cromwell_url,

adapter_pipelines/ss2_single_sample/adapter.wdl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ workflow AdapterSmartSeq2SingleCell{
8383
Int max_cromwell_retries = 0
8484
Boolean add_md5s = false
8585

86-
String pipeline_tools_version = "v0.33.0"
86+
String pipeline_tools_version = "v0.35.0"
8787

8888
call GetInputs as prep {
8989
input:

0 commit comments

Comments
 (0)