Skip to content

Commit ac136f7

Browse files
authored
Merge pull request #4 from Hexotical/file-ids
Use file ids instead of local paths.
2 parents a03ee89 + 4dccc43 commit ac136f7

File tree

1 file changed

+24
-26
lines changed

1 file changed

+24
-26
lines changed

toil/python/draft.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ def run(self, fileStore):
4242
working_dir=tempDir,
4343
parameters=[os.path.join(tempDir, "FastQCone_script.sh")],
4444
entrypoint="/bin/bash",
45-
user='root',
4645
stderr=True,
4746
demux=True,
4847
volumes={tempDir: {"bind": tempDir}})
@@ -57,7 +56,7 @@ def run(self, fileStore):
5756
fastqc_output_path = os.path.join(os.path.abspath(current_working_dir), 'readsone_fastqc.html')
5857
fileStore.exportFile(output_file_id, f'file://{fastqc_output_path}')
5958

60-
return {"fastqc_output_path": fastqc_output_path}
59+
return {"fastqc": output_file_id}
6160

6261

6362
class FastQCtwoCls(Job):
@@ -89,7 +88,6 @@ def run(self, fileStore):
8988
working_dir=tempDir,
9089
parameters=[os.path.join(tempDir, "FastQCtwo_script.sh")],
9190
entrypoint="/bin/bash",
92-
user='root',
9391
stderr=True,
9492
demux=True,
9593
volumes={tempDir: {"bind": tempDir}})
@@ -104,7 +102,7 @@ def run(self, fileStore):
104102
fastqc_output_path = os.path.join(os.path.abspath(current_working_dir), 'readstwo_fastqc.html')
105103
fileStore.exportFile(output_file_id, f'file://{fastqc_output_path}')
106104

107-
return {"fastqc_output_path": fastqc_output_path}
105+
return {"fastqc": output_file_id}
108106

109107

110108
class SalmonIndexCls(Job):
@@ -141,7 +139,6 @@ def run(self, fileStore):
141139
working_dir=tempDir,
142140
parameters=[os.path.join(tempDir, "SalmonIndex_script.sh")],
143141
entrypoint="/bin/bash",
144-
user='root',
145142
stderr=True,
146143
demux=True,
147144
volumes={tempDir: {"bind": tempDir}})
@@ -157,8 +154,7 @@ def run(self, fileStore):
157154
index_output_path = os.path.join(os.path.abspath(current_working_dir), 'index.tar.gz')
158155
fileStore.exportFile(output_file_id, f'file://{index_output_path}')
159156

160-
161-
return {"index" : index_output_path}
157+
return {"index": output_file_id}
162158

163159

164160
class SalmonAlignQuantCls(Job):
@@ -181,11 +177,9 @@ def run(self, fileStore):
181177
except OSError as e:
182178
if e.errno != errno.EEXIST:
183179
raise
184-
185-
fpath_reads1 = fileStore.readGlobalFile(self.reads1, userPath=os.path.join(tempDir, os.path.basename(self.reads1)))
186180

181+
fpath_reads1 = fileStore.readGlobalFile(self.reads1, userPath=os.path.join(tempDir, os.path.basename(self.reads1)))
187182
fpath_reads2 = fileStore.readGlobalFile(self.reads2, userPath=os.path.join(tempDir, os.path.basename(self.reads2)))
188-
189183
fpath_index = fileStore.readGlobalFile(self.index, userPath=os.path.join(tempDir, os.path.basename(self.index)))
190184

191185

@@ -201,7 +195,6 @@ def run(self, fileStore):
201195
working_dir=tempDir,
202196
parameters=[os.path.join(tempDir, "SalmonAlignQuant_script.sh")],
203197
entrypoint="/bin/bash",
204-
user='root',
205198
stderr=True,
206199
demux=True,
207200
volumes={tempDir: {"bind": tempDir}})
@@ -215,7 +208,7 @@ def run(self, fileStore):
215208
quant_output_path = os.path.join(os.path.abspath(current_working_dir), 'quant.tar.gz')
216209
fileStore.exportFile(output_file_id, f'file://{quant_output_path}')
217210

218-
return {"quant": quant_output_path}
211+
return {"quant": output_file_id}
219212

220213

221214
if __name__ == "__main__":
@@ -225,23 +218,28 @@ def run(self, fileStore):
225218
pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
226219

227220
with Toil(options) as fileStore:
228-
reads1 = fileStore.importFile('file://' + os.path.join(pkg_root, "test_data/reads_1.fq.gz"))
229-
reads2 = fileStore.importFile('file://' + os.path.join(pkg_root, "test_data/reads_2.fq.gz"))
230-
ref_txome = fileStore.importFile('file://' + os.path.join(pkg_root, "test_data/transcriptome.fa"))
221+
# import all files into the jobstore and retrieve their file ID references
222+
# that way jobs run remotely can fetch from the centralized jobstore without sharing a filesystem
223+
reads1_file_id = fileStore.importFile(f'file://{os.path.join(pkg_root, "test_data/reads_1.fq.gz")}')
224+
reads2_file_id = fileStore.importFile(f'file://{os.path.join(pkg_root, "test_data/reads_2.fq.gz")}')
225+
ref_transcriptome_file_id = fileStore.importFile(f'file://{os.path.join(pkg_root, "test_data/transcriptome.fa")}')
231226

232-
FastQCone = FastQConeCls(reads=reads1)
233-
fastqc_output_report_path = FastQCone.rv("fastqc_res")
227+
fastqc_job_1 = FastQConeCls(reads=reads1_file_id) # this is our root job, which runs first
228+
fastqc_output_report_file_id_1 = fastqc_job_1.rv("fastqc") # "rv" == return value
234229

235-
FastQCtwo = FastQCtwoCls(reads=reads2)
236-
FastQCtwo_fastqc_res = FastQCtwo.rv("fastqc_res")
237-
FastQCone.addChild(FastQCtwo)
238-
239-
SalmonIndex = FastQCone.addChild(SalmonIndexCls(ref_txome=ref_txome))
240-
SalmonIndex_index = SalmonIndex.rv("index")
230+
fastqc_job_2 = FastQCtwoCls(reads=reads2_file_id)
231+
fastqc_output_report_file_id_2 = fastqc_job_2.rv("fastqc")
232+
fastqc_job_1.addChild(fastqc_job_2) # fastqc_job_2 will run after our root job
241233

242-
SalmonAlignQuant = FastQCone.addFollowOn(SalmonAlignQuantCls(reads1=reads1, reads2=reads2, index=(SalmonIndex_index)))
243-
SalmonAlignQuant_quant = SalmonAlignQuant.rv("quant")
234+
salmon_index_job = SalmonIndexCls(ref_txome=ref_transcriptome_file_id)
235+
index_file_id = salmon_index_job.rv("index")
236+
fastqc_job_1.addChild(salmon_index_job) # salmon_index_job will run after our root job
237+
238+
salmon_align_quant_job = SalmonAlignQuantCls(reads1=reads1_file_id, reads2=reads2_file_id, index=index_file_id)
239+
fastqc_job_1.addFollowOn(salmon_align_quant_job) # run after this job and all of its children
240+
# we don't do anything with our results, but we could
241+
salmon_align_quant_file_id = salmon_align_quant_job.rv("quant")
244242

245-
fileStore.start(FastQCone)
243+
fileStore.start(fastqc_job_1)
246244

247245
#/home/hexotical/bioinformatics-workflows/toil/python/index.tar.gz

0 commit comments

Comments
 (0)