@@ -42,7 +42,6 @@ def run(self, fileStore):
4242 working_dir = tempDir ,
4343 parameters = [os .path .join (tempDir , "FastQCone_script.sh" )],
4444 entrypoint = "/bin/bash" ,
45- user = 'root' ,
4645 stderr = True ,
4746 demux = True ,
4847 volumes = {tempDir : {"bind" : tempDir }})
@@ -57,7 +56,7 @@ def run(self, fileStore):
5756 fastqc_output_path = os .path .join (os .path .abspath (current_working_dir ), 'readsone_fastqc.html' )
5857 fileStore .exportFile (output_file_id , f'file://{ fastqc_output_path } ' )
5958
60- return {"fastqc_output_path " : fastqc_output_path }
59+ return {"fastqc " : output_file_id }
6160
6261
6362class FastQCtwoCls (Job ):
@@ -89,7 +88,6 @@ def run(self, fileStore):
8988 working_dir = tempDir ,
9089 parameters = [os .path .join (tempDir , "FastQCtwo_script.sh" )],
9190 entrypoint = "/bin/bash" ,
92- user = 'root' ,
9391 stderr = True ,
9492 demux = True ,
9593 volumes = {tempDir : {"bind" : tempDir }})
@@ -104,7 +102,7 @@ def run(self, fileStore):
104102 fastqc_output_path = os .path .join (os .path .abspath (current_working_dir ), 'readstwo_fastqc.html' )
105103 fileStore .exportFile (output_file_id , f'file://{ fastqc_output_path } ' )
106104
107- return {"fastqc_output_path " : fastqc_output_path }
105+ return {"fastqc " : output_file_id }
108106
109107
110108class SalmonIndexCls (Job ):
@@ -141,7 +139,6 @@ def run(self, fileStore):
141139 working_dir = tempDir ,
142140 parameters = [os .path .join (tempDir , "SalmonIndex_script.sh" )],
143141 entrypoint = "/bin/bash" ,
144- user = 'root' ,
145142 stderr = True ,
146143 demux = True ,
147144 volumes = {tempDir : {"bind" : tempDir }})
@@ -157,8 +154,7 @@ def run(self, fileStore):
157154 index_output_path = os .path .join (os .path .abspath (current_working_dir ), 'index.tar.gz' )
158155 fileStore .exportFile (output_file_id , f'file://{ index_output_path } ' )
159156
160-
161- return {"index" : index_output_path }
157+ return {"index" : output_file_id }
162158
163159
164160class SalmonAlignQuantCls (Job ):
@@ -181,11 +177,9 @@ def run(self, fileStore):
181177 except OSError as e :
182178 if e .errno != errno .EEXIST :
183179 raise
184-
185- fpath_reads1 = fileStore .readGlobalFile (self .reads1 , userPath = os .path .join (tempDir , os .path .basename (self .reads1 )))
186180
181+ fpath_reads1 = fileStore .readGlobalFile (self .reads1 , userPath = os .path .join (tempDir , os .path .basename (self .reads1 )))
187182 fpath_reads2 = fileStore .readGlobalFile (self .reads2 , userPath = os .path .join (tempDir , os .path .basename (self .reads2 )))
188-
189183 fpath_index = fileStore .readGlobalFile (self .index , userPath = os .path .join (tempDir , os .path .basename (self .index )))
190184
191185
@@ -201,7 +195,6 @@ def run(self, fileStore):
201195 working_dir = tempDir ,
202196 parameters = [os .path .join (tempDir , "SalmonAlignQuant_script.sh" )],
203197 entrypoint = "/bin/bash" ,
204- user = 'root' ,
205198 stderr = True ,
206199 demux = True ,
207200 volumes = {tempDir : {"bind" : tempDir }})
@@ -215,7 +208,7 @@ def run(self, fileStore):
215208 quant_output_path = os .path .join (os .path .abspath (current_working_dir ), 'quant.tar.gz' )
216209 fileStore .exportFile (output_file_id , f'file://{ quant_output_path } ' )
217210
218- return {"quant" : quant_output_path }
211+ return {"quant" : output_file_id }
219212
220213
221214if __name__ == "__main__" :
@@ -225,23 +218,28 @@ def run(self, fileStore):
225218 pkg_root = os .path .abspath (os .path .join (os .path .dirname (__file__ ), '..' , '..' ))
226219
227220 with Toil (options ) as fileStore :
228- reads1 = fileStore .importFile ('file://' + os .path .join (pkg_root , "test_data/reads_1.fq.gz" ))
229- reads2 = fileStore .importFile ('file://' + os .path .join (pkg_root , "test_data/reads_2.fq.gz" ))
230- ref_txome = fileStore .importFile ('file://' + os .path .join (pkg_root , "test_data/transcriptome.fa" ))
221+ # import all files into the jobstore and retrieve their file ID references
222+ # that way jobs run remotely can fetch from the centralized jobstore without sharing a filesystem
223+ reads1_file_id = fileStore .importFile (f'file://{ os .path .join (pkg_root , "test_data/reads_1.fq.gz" )} ' )
224+ reads2_file_id = fileStore .importFile (f'file://{ os .path .join (pkg_root , "test_data/reads_2.fq.gz" )} ' )
225+ ref_transcriptome_file_id = fileStore .importFile (f'file://{ os .path .join (pkg_root , "test_data/transcriptome.fa" )} ' )
231226
232- FastQCone = FastQConeCls (reads = reads1 )
233- fastqc_output_report_path = FastQCone .rv ("fastqc_res" )
227+ fastqc_job_1 = FastQConeCls (reads = reads1_file_id ) # this is our root job, which runs first
228+ fastqc_output_report_file_id_1 = fastqc_job_1 .rv ("fastqc" ) # "rv" == return value
234229
235- FastQCtwo = FastQCtwoCls (reads = reads2 )
236- FastQCtwo_fastqc_res = FastQCtwo .rv ("fastqc_res" )
237- FastQCone .addChild (FastQCtwo )
238-
239- SalmonIndex = FastQCone .addChild (SalmonIndexCls (ref_txome = ref_txome ))
240- SalmonIndex_index = SalmonIndex .rv ("index" )
230+ fastqc_job_2 = FastQCtwoCls (reads = reads2_file_id )
231+ fastqc_output_report_file_id_2 = fastqc_job_2 .rv ("fastqc" )
232+ fastqc_job_1 .addChild (fastqc_job_2 ) # fastqc_job_2 will run after our root job
241233
242- SalmonAlignQuant = FastQCone .addFollowOn (SalmonAlignQuantCls (reads1 = reads1 , reads2 = reads2 , index = (SalmonIndex_index )))
243- SalmonAlignQuant_quant = SalmonAlignQuant .rv ("quant" )
234+ salmon_index_job = SalmonIndexCls (ref_txome = ref_transcriptome_file_id )
235+ index_file_id = salmon_index_job .rv ("index" )
236+ fastqc_job_1 .addChild (salmon_index_job ) # salmon_index_job will run after our root job
237+
238+ salmon_align_quant_job = SalmonAlignQuantCls (reads1 = reads1_file_id , reads2 = reads2_file_id , index = index_file_id )
239+ fastqc_job_1 .addFollowOn (salmon_align_quant_job ) # run after this job and all of its children
240+ # we don't do anything with our results, but we could
241+ salmon_align_quant_file_id = salmon_align_quant_job .rv ("quant" )
244242
245- fileStore .start (FastQCone )
243+ fileStore .start (fastqc_job_1 )
246244
247245#/home/hexotical/bioinformatics-workflows/toil/python/index.tar.gz
0 commit comments