Skip to content

Commit b4489b6

Browse files
committed
decrease output time via chunked read from count hdf5
1 parent a3019a6 commit b4489b6

File tree

1 file changed

+20
-2
lines changed

1 file changed

+20
-2
lines changed

spladder/alt_splice/write.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,17 +60,35 @@ def write_events_txt(fn_out_txt, strains, events, fn_counts, event_idx=None, ann
6060
### load data from count hdf5
6161
IN = h5py.File(fn_counts, 'r')
6262

63+
### get count and psi chunks
64+
cs1 = IN['event_counts'].chunks[-1]
65+
cs2 = IN['psi'].chunks[-1]
66+
chunk_idx_event = np.array([0, cs1])
67+
chunk_idx_psi = np.array([0, cs2])
68+
69+
event_counts_chunk = IN['event_counts'][:, :, :cs1]
70+
psi_chunk = IN['psi'][:, :cs2]
71+
6372
for ii,i in enumerate(event_idx):
6473
if verbose and ii > 0 and (ii+1) % 1000 == 0:
6574
print('%i/%i' % (ii+1, event_idx.shape[0]))
6675
fd.write('%s\t%c\t%s.%i\t%s' % (events[i].chr, events[i].strand, events[i].event_type, events[i].id, events[i].gene_name[0]))
6776
if anno_fn is not None:
6877
a_idx = anno_names.index(events[i].gene_name[0])
6978
fd.write('\t%i\t%i' % (anno[a_idx].start, anno[a_idx].stop))
79+
80+
### new count chunk needed?
81+
if i >= chunk_idx_event[1]:
82+
chunk_idx_event += cs1
83+
event_counts_chunk = IN['event_counts'][:, :, chunk_idx_event[0]:chunk_idx_event[1]]
84+
### new psi chunk needed?
85+
if i >= chunk_idx_psi[1]:
86+
chunk_idx_psi += cs2
87+
psi_chunk = IN['psi'][:, chunk_idx_psi[0]:chunk_idx_psi[1]]
7088

7189
ev = events[i]
72-
counts = IN['event_counts'][:, :, i]
73-
psi = IN['psi'][:,i]
90+
counts = event_counts_chunk[:, :, i - chunk_idx_event[0]]
91+
psi = psi_chunk[:, i - chunk_idx_psi[0]]
7492
if ev.event_type == 'exon_skip':
7593
fd.write('\t%i\t%i\t%i\t%i\t%i\t%i' % (ev.exons2[0, 0] + 1, ev.exons2[0, 1], ev.exons2[1, 0] + 1, ev.exons2[1, 1], ev.exons2[2, 0] + 1, ev.exons2[2, 1]))
7694
for j in range(len(strains)):

0 commit comments

Comments
 (0)