Skip to content

Commit 8175c05

Browse files
committed
make chunkszie configurable
1 parent ecd0fa6 commit 8175c05

File tree

2 files changed

+9
-11
lines changed

2 files changed

+9
-11
lines changed

spladder/merge.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -356,9 +356,6 @@ def run_merge(options):
356356
if options.do_prune:
357357
prune_tag = '_pruned'
358358

359-
chunksize = 10
360-
assert chunksize > 0
361-
362359
fn_out = '%s/spladder/genes_graph_conf%i.%s%s.pickle' % (options.outdir , options.confidence, options.merge, prune_tag)
363360
if options.validate_sg:
364361
fn_out_count = '%s/spladder/genes_graph_conf%i.%s%s.validated.count.hdf5' % (options.outdir, options.confidence, options.merge, prune_tag)
@@ -370,25 +367,25 @@ def run_merge(options):
370367
jobinfo = []
371368
PAR = dict()
372369
PAR['options'] = options
373-
levels = int(math.ceil(math.log(len(options.samples), chunksize)))
370+
levels = int(math.ceil(math.log(len(options.samples), options.chunksize)))
374371
for level in range(1, levels + 1):
375372
print('merging files on level %i' % level)
376373
if level == 1:
377374
merge_list = np.array(['%s/spladder/genes_graph_conf%i.%s%s.pickle' % (options.outdir, options.confidence, x, prune_tag) for x in options.samples])
378375
else:
379376
merge_list = np.array(level_files)
380377
level_files = []
381-
for c_idx in range(0, len(merge_list), chunksize):
378+
for c_idx in range(0, len(merge_list), options.chunksize):
382379
if level == levels:
383-
assert(len(merge_list) <= chunksize)
380+
assert len(merge_list) <= options.chunksize, 'chunksize is %i but merge_list has length %i with: %s' % (options.chunksize, len(merge_list), str(merge_list))
384381
fn = fn_out
385382
else:
386-
fn = '%s/spladder/genes_graph_conf%i.%s%s_level%i_chunk%i_%i.pickle' % (options.outdir, options.confidence, options.merge, prune_tag, level, c_idx, min(len(merge_list), c_idx + chunksize))
383+
fn = '%s/spladder/genes_graph_conf%i.%s%s_level%i_chunk%i_%i.pickle' % (options.outdir, options.confidence, options.merge, prune_tag, level, c_idx, min(len(merge_list), c_idx + options.chunksize))
387384
level_files.append(fn)
388385
if os.path.exists(fn):
389386
continue
390-
print('submitting level %i chunk %i to %i' % (level, c_idx, min(len(merge_list), c_idx + chunksize)))
391-
chunk_idx = np.arange(c_idx, min(len(merge_list), c_idx + chunksize))
387+
print('submitting level %i chunk %i to %i' % (level, c_idx, min(len(merge_list), c_idx + options.chunksize)))
388+
chunk_idx = np.arange(c_idx, min(len(merge_list), c_idx + options.chunksize))
392389
PAR['merge_list'] = merge_list[chunk_idx]
393390
PAR['fn_out'] = fn
394391
jobinfo.append(rp.rproc('merge_genes_by_splicegraph', PAR, 20000*level, options.options_rproc, 40*60))
@@ -402,7 +399,7 @@ def run_merge(options):
402399
chunk_end = min(len(merge_list), chunk_end)
403400

404401
if curr_level == max_level:
405-
assert(len(merge_list) <= chunksize)
402+
assert len(merge_list) <= options.chunksize, 'chunksize is %i but merge_list has length %i with: %s' % (options.chunksize, len(merge_list), str(merge_list))
406403
fn = fn_out
407404
else:
408405
fn = '%s/spladder/genes_graph_conf%i.%s%s_level%i_chunk%i_%i.pickle' % (options.outdir, options.confidence, options.merge, prune_tag, curr_level, chunk_start, chunk_end)

spladder/spladder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ def parse_options(argv):
6767
graph.add_argument('-c', '--confidence', dest='confidence', metavar='INT', type=int, help='confidence level (0 lowest to 3 highest) [3]', default=3)
6868
graph.add_argument('-I', '--iterations', dest='insert_intron_iterations', metavar='INT', type=int, help='number of iterations to insert new introns into the graph [5]', default=5)
6969
graph.add_argument('-M', '--merge-strat', dest='merge', metavar='<STRAT>', help='merge strategy, where <STRAT> is one of: single, merge_bams, merge_graphs, merge_all [merge_graphs]', default='merge_graphs')
70-
graph.add_argument('--chunked-merge', dest='chunked_merge', metavar="LEVEL MAX_LEVEL START END", nargs='+', action='append', help='provide infor for external merge with START being 0-based and END non-inclusive', default=[])
70+
graph.add_argument('--chunked-merge', dest='chunked_merge', metavar="LEVEL MAX_LEVEL START END", nargs='+', action='append', help='provide info for external merge with START being 0-based and END non-inclusive', default=[])
71+
graph.add_argument('--chunksize', dest='chunksize', metavar='INT', type=int, help='chunksize for chunked merge [10]', default=10)
7172
graph.add_argument('--insert-ir', dest='insert_ir', action='store_true', help='insert intron retentions [on]', default=True)
7273
graph.add_argument('--no-insert-ir', dest='insert_ir', action='store_false', default=True)
7374
graph.add_argument('--insert-es', dest='insert_es', action='store_true', help='insert cassette exons [on]', default=True)

0 commit comments

Comments
 (0)