Skip to content

Commit f14984c

Browse files
committed
Merge branch 'development'
2 parents 324d45c + 33108fd commit f14984c

38 files changed

+1284
-1288
lines changed

docs/source/file_formats.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
File formats
22
============
33

4-
Input Formats
5-
-------------
4+
Input Formats -- ``build`` mode
5+
-------------------------------
66

77
Annotation Files
88
^^^^^^^^^^^^^^^^
@@ -26,8 +26,8 @@ have successfully tested SplAdder with the following aligners:
2626
- `PALMapper`_
2727
- `TopHat`_
2828

29-
Output Formats
30-
--------------
29+
Output Formats -- ``build`` mode
30+
--------------------------------
3131
SplAdder produces a variety of different output files. Here we will mainly discuss files that are
3232
aimed at the user and omit intermediate files that mainly necessary for internal processes of
3333
SplAdder. Most of the latter will be stored in the ``spladder`` subdirectory in the output

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[bumpversion]
2-
current_version = 2.4.2
2+
current_version = 2.4.3
33
commit = True
44
tag = True
55

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,6 @@
4545
test_suite='tests',
4646
tests_require=test_requirements,
4747
url='https://github.com/ratschlab/spladder',
48-
version='2.4.2',
48+
version='2.4.3',
4949
zip_safe=False,
5050
)

spladder/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44

55
__author__ = """Andre Kahles"""
66
__email__ = 'andre.kahles@inf.ethz.ch'
7-
__version__ = '2.4.2'
7+
__version__ = '2.4.3'

spladder/alt_splice/analyze.py

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import sys
22
import os
3-
import scipy as sp
3+
import numpy as np
44
import pickle
55
import h5py
66

@@ -34,11 +34,11 @@ def _prepare_count_hdf5(options, OUT, event_features, sample_idx=None):
3434
OUT.create_dataset(name='strains', data=codeUTF8(options.strains))
3535
feat = OUT.create_group(name='event_features')
3636
for f in event_features:
37-
feat.create_dataset(name=f, data=codeUTF8(sp.array(event_features[f], dtype='str')))
38-
OUT.create_dataset(name='gene_names', data=codeUTF8(sp.array([x.name for x in genes], dtype='str')))
39-
OUT.create_dataset(name='gene_chr', data=codeUTF8(sp.array([x.chr for x in genes], dtype='str')))
40-
OUT.create_dataset(name='gene_strand', data=codeUTF8(sp.array([x.strand for x in genes], dtype='str')))
41-
OUT.create_dataset(name='gene_pos', data=sp.array([[x.start, x.stop] for x in genes], dtype='int'))
37+
feat.create_dataset(name=f, data=codeUTF8(np.array(event_features[f], dtype='str')))
38+
OUT.create_dataset(name='gene_names', data=codeUTF8(np.array([x.name for x in genes], dtype='str')))
39+
OUT.create_dataset(name='gene_chr', data=codeUTF8(np.array([x.chr for x in genes], dtype='str')))
40+
OUT.create_dataset(name='gene_strand', data=codeUTF8(np.array([x.strand for x in genes], dtype='str')))
41+
OUT.create_dataset(name='gene_pos', data=np.array([[x.start, x.stop] for x in genes], dtype='int'))
4242

4343

4444
def analyze_events(options, event_type, sample_idx=None):
@@ -90,25 +90,25 @@ def analyze_events(options, event_type, sample_idx=None):
9090
events_all_strains = options.strains
9191

9292
### handle case where we did not find any event of this type
93-
if sp.sum([x.event_type == event_type for x in events_all]) == 0:
93+
if np.sum([x.event_type == event_type for x in events_all]) == 0:
9494
OUT = h5py.File(fn_out_count, 'w')
9595
OUT.create_dataset(name='event_counts', data=[0])
9696
_prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
9797
OUT.close()
98-
confirmed_idx = sp.array([], dtype='int')
98+
confirmed_idx = np.array([], dtype='int')
9999
else:
100100
if not options.pyproc:
101101
if options.merge == 'single':
102102
(events_all, counts) = verify_all_events(events_all, sample_idx, options.bam_fnames, event_type, options)
103103
else:
104-
(events_all, counts) = verify_all_events(events_all, sp.arange(len(options.strains)), options.bam_fnames, event_type, options)
105-
verified = sp.array([x.verified for x in events_all], dtype='bool')
104+
(events_all, counts) = verify_all_events(events_all, np.arange(len(options.strains)), options.bam_fnames, event_type, options)
105+
verified = np.array([x.verified for x in events_all], dtype='bool')
106106
for ev in events_all:
107107
ev.verified = []
108108

109-
psi = sp.empty((counts.shape[0], counts.shape[2]), dtype='float')
110-
iso1 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
111-
iso2 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
109+
psi = np.empty((counts.shape[0], counts.shape[2]), dtype='float')
110+
iso1 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
111+
iso2 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
112112
for i in range(counts.shape[2]):
113113
(psi[:, i], iso1[:, i], iso2[:, i]) = compute_psi(counts[:, :, i], event_type, options)
114114

@@ -117,7 +117,7 @@ def analyze_events(options, event_type, sample_idx=None):
117117
OUT.create_dataset(name='psi', data=psi, compression='gzip')
118118
OUT.create_dataset(name='iso1', data=iso1, compression='gzip')
119119
OUT.create_dataset(name='iso2', data=iso2, compression='gzip')
120-
OUT.create_dataset(name='gene_idx', data=sp.array([x.gene_idx for x in events_all], dtype='int'), compression='gzip')
120+
OUT.create_dataset(name='gene_idx', data=np.array([x.gene_idx for x in events_all], dtype='int'), compression='gzip')
121121
OUT.create_dataset(name='verified', data=verified, compression='gzip')
122122
_prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
123123
else:
@@ -126,9 +126,9 @@ def analyze_events(options, event_type, sample_idx=None):
126126
chunk_size_events = 5000
127127
chunk_size_strains = 500
128128
for i in range(0, events_all.shape[0], chunk_size_events):
129-
idx_events = sp.arange(i, min(i + chunk_size_events, events_all.shape[0]))
129+
idx_events = np.arange(i, min(i + chunk_size_events, events_all.shape[0]))
130130
for j in range(0, len(options.strains), chunk_size_strains):
131-
idx_strains = sp.arange(j, min(j + chunk_size_strains, len(options.strains)))
131+
idx_strains = np.arange(j, min(j + chunk_size_strains, len(options.strains)))
132132
PAR['ev'] = events_all[idx_events].copy()
133133
PAR['strain_idx'] = idx_strains
134134
PAR['list_bam'] = options.bam_fnames
@@ -151,9 +151,9 @@ def analyze_events(options, event_type, sample_idx=None):
151151
print('Collecting results from chunks ...')
152152
OUT = h5py.File(fn_out_count, 'w')
153153
for i in range(0, events_all.shape[0], chunk_size_events):
154-
idx_events = sp.arange(i, min(i + chunk_size_events, events_all.shape[0]))
154+
idx_events = np.arange(i, min(i + chunk_size_events, events_all.shape[0]))
155155
for j in range(0, len(options.strains), chunk_size_strains):
156-
idx_strains = sp.arange(j, min(j + chunk_size_strains, len(options.strains)))
156+
idx_strains = np.arange(j, min(j + chunk_size_strains, len(options.strains)))
157157
print('\r%i (%i), %i (%i)' % (i, events_all.shape[0], j, len(options.strains)))
158158
out_fn = '%s/event_count_chunks/%s_%i_%i_C%i.pickle' % (options.outdir, event_type, i, j, options.confidence)
159159
if not os.path.exists(out_fn):
@@ -166,22 +166,22 @@ def analyze_events(options, event_type, sample_idx=None):
166166
verified_ = [x.verified.astype('bool') for x in ev]
167167
collect_ids_ = [x.id for x in ev]
168168
else:
169-
counts = sp.r_[counts, counts_]
169+
counts = np.r_[counts, counts_]
170170
for jj in range(len(ev_)):
171-
verified_[jj] = sp.r_[verified_[jj], ev_[jj].verified]
171+
verified_[jj] = np.r_[verified_[jj], ev_[jj].verified]
172172
del counts_
173173

174-
psi = sp.empty((counts.shape[0], counts.shape[2]), dtype='float')
175-
iso1 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
176-
iso2 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
174+
psi = np.empty((counts.shape[0], counts.shape[2]), dtype='float')
175+
iso1 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
176+
iso2 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
177177
for j in range(counts.shape[2]):
178178
(psi[:, j], iso1[:, j], iso2[:, j]) = compute_psi(counts[:, :, j], event_type, options)
179179

180180
if i == 0:
181181
OUT.create_dataset(name='event_counts', data=counts, maxshape=(len(options.strains), len(event_features[event_type]), None), compression='gzip')
182-
OUT.create_dataset(name='psi', data=sp.atleast_2d(psi), maxshape=(psi.shape[0], None), compression='gzip')
183-
OUT.create_dataset(name='iso1', data=sp.atleast_2d(iso1), maxshape=(iso1.shape[0], None), compression='gzip')
184-
OUT.create_dataset(name='iso2', data=sp.atleast_2d(iso2), maxshape=(iso2.shape[0], None), compression='gzip')
182+
OUT.create_dataset(name='psi', data=np.atleast_2d(psi), maxshape=(psi.shape[0], None), compression='gzip')
183+
OUT.create_dataset(name='iso1', data=np.atleast_2d(iso1), maxshape=(iso1.shape[0], None), compression='gzip')
184+
OUT.create_dataset(name='iso2', data=np.atleast_2d(iso2), maxshape=(iso2.shape[0], None), compression='gzip')
185185
else:
186186
tmp = OUT['event_counts'].shape
187187
OUT['event_counts'].resize((tmp[0], tmp[1], tmp[2] + len(ev)))
@@ -197,33 +197,33 @@ def analyze_events(options, event_type, sample_idx=None):
197197
OUT['iso2'][:, tmp[1]:] = iso2
198198
verified.extend(verified_)
199199
collect_ids.extend(collect_ids_)
200-
gene_idx_ = sp.r_[gene_idx_, [x.gene_idx for x in ev]]
200+
gene_idx_ = np.r_[gene_idx_, [x.gene_idx for x in ev]]
201201
del iso1, iso2, psi, counts, ev, ev_
202202

203-
verified = sp.array(verified, dtype='bool')
203+
verified = np.array(verified, dtype='bool')
204204

205205
assert(events_all.shape[0] == verified.shape[0])
206-
assert(sp.all([events_all[e].id for e in range(events_all.shape[0])] == collect_ids))
206+
assert(np.all([events_all[e].id for e in range(events_all.shape[0])] == collect_ids))
207207

208208
OUT.create_dataset(name='verified', data=verified, dtype='bool', compression='gzip')
209209
OUT.create_dataset(name='gene_idx', data=gene_idx_)
210210
_prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
211211

212212
### write more event infos to hdf5
213213
if event_type == 'exon_skip':
214-
event_pos = sp.array([x.exons2.ravel() for x in events_all])
214+
event_pos = np.array([x.exons2.ravel() for x in events_all])
215215
elif event_type == 'intron_retention':
216-
event_pos = sp.array([x.exons1.ravel() for x in events_all])
216+
event_pos = np.array([x.exons1.ravel() for x in events_all])
217217
elif event_type in ['alt_3prime', 'alt_5prime']:
218-
event_pos = sp.array([unique_rows(sp.c_[x.exons1, x.exons2]).ravel() for x in events_all])
218+
event_pos = np.array([unique_rows(np.c_[x.exons1, x.exons2]).ravel() for x in events_all])
219219
elif event_type == 'mult_exon_skip':
220-
event_pos = sp.array([x.exons2[[0, 1, -2, -1], :].ravel() for x in events_all])
220+
event_pos = np.array([x.exons2[[0, 1, -2, -1], :].ravel() for x in events_all])
221221
elif event_type == 'mutex_exons':
222-
event_pos = sp.array([sp.c_[x.exons1[0, :], x.exons1[1, :], x.exons2[1, :], x.exons2[2, :]] for x in events_all])
222+
event_pos = np.array([np.c_[x.exons1[0, :], x.exons1[1, :], x.exons2[1, :], x.exons2[2, :]] for x in events_all])
223223

224224
OUT.create_dataset(name='event_pos', data=event_pos)
225225

226-
num_verified = sp.sum(verified, axis=1)
226+
num_verified = np.sum(verified, axis=1)
227227
confirmed = num_verified.min(axis=1)
228228
OUT.create_dataset(name='num_verified', data=num_verified)
229229
OUT.create_dataset(name='confirmed', data=confirmed)
@@ -232,7 +232,7 @@ def analyze_events(options, event_type, sample_idx=None):
232232
#for min_verified = 1:length(options.strains),
233233
# verified_count(min_verified) = sum([events_all.confirmed] >= min_verified) ;
234234

235-
confirmed_idx = sp.where(confirmed >= 1)[0]
235+
confirmed_idx = np.where(confirmed >= 1)[0]
236236
if confirmed_idx.shape[0] > 0:
237237
OUT.create_dataset(name='conf_idx', data=confirmed_idx)
238238

@@ -275,7 +275,7 @@ def analyze_events(options, event_type, sample_idx=None):
275275
if isinstance(sample_idx, int):
276276
sample_idx = [sample_idx]
277277
elif sample_idx is None:
278-
sample_idx = sp.arange(options.strains.shape[0])
278+
sample_idx = np.arange(options.strains.shape[0])
279279

280280
if options.output_gff3:
281281
if os.path.exists(fn_out_gff3):
@@ -331,13 +331,13 @@ def analyze_events(options, event_type, sample_idx=None):
331331
print('%s already exists' % fn_out_conf_txt)
332332
else:
333333
print('\nWriting filtered events (sample freq 0.05):')
334-
cf_idx = sp.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.05 * options.strains.shape[0]))[0]
334+
cf_idx = np.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.05 * options.strains.shape[0]))[0]
335335
write_events_txt(fn_out_conf_txt, options.strains[sample_idx], events_all, fn_out_count, event_idx=confirmed_idx[cf_idx])
336336

337337
fn_out_conf_txt = fn_out_conf.replace('.pickle', '.filt0.1.txt')
338338
if os.path.exists(fn_out_conf_txt):
339339
print('%s already exists' % fn_out_conf_txt)
340340
else:
341341
print('\nWriting filtered events (sample freq 0.01):')
342-
cf_idx = sp.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.01 * options.strains.shape[0]))[0]
342+
cf_idx = np.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.01 * options.strains.shape[0]))[0]
343343
write_events_txt(fn_out_conf_txt, options.strains[sample_idx], events_all, fn_out_count, event_idx=confirmed_idx[cf_idx])

0 commit comments

Comments
 (0)