@@ -45,7 +45,7 @@ task GetInputs {
4545 }
4646}
4747
48- task RenameFiles {
48+ task RenameFastqFiles {
4949 File r1
5050 File r2
5151 File i1
@@ -68,6 +68,31 @@ task RenameFiles {
6868 }
6969}
7070
71+ task RenameFiles {
72+ Array [File ] file_paths
73+ Array [String ] new_file_names
74+ String pipeline_tools_version
75+
76+ command <<<
77+ python -u << CODE
78+ import subprocess
79+
80+ files=["${sep ='","' file_paths }"]
81+ file_names=["${sep ='","' new_file_names }"]
82+
83+ for idx, f in enumerate(files):
84+ subprocess.check_output(['mv', f, file_names[idx]])
85+
86+ CODE
87+ >>>
88+ runtime {
89+ docker : "quay.io/humancellatlas/secondary-analysis-pipeline-tools:" + pipeline_tools_version
90+ }
91+ output {
92+ Array [File ] outputs = new_file_names
93+ }
94+ }
95+
7196task InputsForSubmit {
7297 Array [File ] fastqs
7398 Array [Object ] other_inputs
@@ -150,7 +175,7 @@ workflow Adapter10xCount {
150175 Int max_cromwell_retries = 0
151176 Boolean add_md5s = false
152177
153- String pipeline_tools_version = "v0.33 .0"
178+ String pipeline_tools_version = "v0.35 .0"
154179
155180 call GetInputs {
156181 input :
@@ -166,14 +191,14 @@ workflow Adapter10xCount {
166191 }
167192
168193 # Cellranger code in 10x count wdl requires files to be named a certain way.
169- # To accommodate that, RenameFiles copies the blue box files into the
194+ # To accommodate that, RenameFastqFiles copies the blue box files into the
170195 # cromwell execution bucket but with the names cellranger expects.
171196 # Putting this in its own task lets us take advantage of automatic localizing
172197 # and delocalizing by Cromwell/JES to actually read and write stuff in buckets.
173198 # TODO: Replace scatter with a for-loop inside of the task to avoid creating a
174199 # VM for each set of files that needs to be renamed
175200 scatter (i in range (length (GetInputs .lanes ))) {
176- call RenameFiles as prep {
201+ call RenameFastqFiles as prep {
177202 input :
178203 r1 = GetInputs .r1_fastq [i ],
179204 r2 = GetInputs .r2_fastq [i ],
@@ -216,12 +241,23 @@ workflow Adapter10xCount {
216241 pipeline_tools_version = pipeline_tools_version
217242 }
218243
244+ # Rename analysis files so that all the file names are unique. For example, rename
245+ # "${sample_id}/outs/raw_gene_bc_matrices/${reference}/barcodes.tsv" to "raw_barcodes.tsv" so that
246+ # it does not overwrite "${sample_id}/outs/filtered_gene_bc_matrices/${reference}/barcodes.tsv"
247+ # when uploading files
248+ call RenameFiles as output_files {
249+ input :
250+ file_paths = [analysis .raw_barcodes , analysis .raw_genes , analysis .raw_matrix ],
251+ new_file_names = ["raw_barcodes.tsv" , "raw_genes.tsv" , "raw_matrix.mtx" ],
252+ pipeline_tools_version = pipeline_tools_version
253+ }
254+
219255 Array [Object ] inputs = read_objects (InputsForSubmit .inputs )
220256
221257 call submit_wdl .submit {
222258 input :
223259 inputs = inputs ,
224- outputs = [
260+ outputs = flatten ([ [
225261 analysis .qc ,
226262 analysis .sorted_bam ,
227263 analysis .sorted_bam_index ,
@@ -230,12 +266,9 @@ workflow Adapter10xCount {
230266 analysis .matrix ,
231267 analysis .filtered_gene_h5 ,
232268 analysis .raw_gene_h5 ,
233- analysis .raw_barcodes ,
234- analysis .raw_genes ,
235- analysis .raw_matrix ,
236269 analysis .mol_info_h5 ,
237270 analysis .web_summary
238- ],
271+ ], output_files . outputs ]),
239272 format_map = format_map ,
240273 submit_url = submit_url ,
241274 cromwell_url = cromwell_url ,
0 commit comments