ratschlab
diff --git a/‎docs/source/file_formats.rst‎
Lines changed: 4 additions & 4 deletions b/‎docs/source/file_formats.rst‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎setup.cfg‎
Lines changed: 1 addition & 1 deletion b/‎setup.cfg‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spladder/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎spladder/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎spladder/alt_splice/analyze.py‎
Lines changed: 39 additions & 39 deletions b/‎spladder/alt_splice/analyze.py‎
Lines changed: 39 additions & 39 deletions
@@ -1,8 +1,8 @@
 File formats
 ============
 
-Input Formats
--------------
+Input Formats -- ``build`` mode
+-------------------------------
 
 Annotation Files
 ^^^^^^^^^^^^^^^^
@@ -26,8 +26,8 @@ have successfully tested SplAdder with the following aligners:
 - `PALMapper`_
 - `TopHat`_
 
-Output Formats
---------------
+Output Formats -- ``build`` mode
+--------------------------------
 SplAdder produces a variety of different output files. Here we will mainly discuss files that are
 aimed at the user and omit intermediate files that mainly necessary for internal processes of
 SplAdder. Most of the latter will be stored in the ``spladder`` subdirectory in the output
 
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.4.2
+current_version = 2.4.3
 commit = True
 tag = True
 
 
@@ -45,6 +45,6 @@
     test_suite='tests',
     tests_require=test_requirements,
     url='https://github.com/ratschlab/spladder',
-    version='2.4.2',
+    version='2.4.3',
     zip_safe=False,
 )
@@ -4,4 +4,4 @@
 
 __author__ = """Andre Kahles"""
 __email__ = 'andre.kahles@inf.ethz.ch'
-__version__ = '2.4.2'
+__version__ = '2.4.3'
@@ -1,6 +1,6 @@
 import sys
 import os
-import scipy as sp
+import numpy as np
 import pickle
 import h5py
 
@@ -34,11 +34,11 @@ def _prepare_count_hdf5(options, OUT, event_features, sample_idx=None):
     OUT.create_dataset(name='strains', data=codeUTF8(options.strains))
     feat = OUT.create_group(name='event_features')
     for f in event_features:
-        feat.create_dataset(name=f, data=codeUTF8(sp.array(event_features[f], dtype='str')))
-    OUT.create_dataset(name='gene_names', data=codeUTF8(sp.array([x.name for x in genes], dtype='str')))
-    OUT.create_dataset(name='gene_chr', data=codeUTF8(sp.array([x.chr for x in genes], dtype='str')))
-    OUT.create_dataset(name='gene_strand', data=codeUTF8(sp.array([x.strand for x in genes], dtype='str')))
-    OUT.create_dataset(name='gene_pos', data=sp.array([[x.start, x.stop] for x in genes], dtype='int'))
+        feat.create_dataset(name=f, data=codeUTF8(np.array(event_features[f], dtype='str')))
+    OUT.create_dataset(name='gene_names', data=codeUTF8(np.array([x.name for x in genes], dtype='str')))
+    OUT.create_dataset(name='gene_chr', data=codeUTF8(np.array([x.chr for x in genes], dtype='str')))
+    OUT.create_dataset(name='gene_strand', data=codeUTF8(np.array([x.strand for x in genes], dtype='str')))
+    OUT.create_dataset(name='gene_pos', data=np.array([[x.start, x.stop] for x in genes], dtype='int'))
 
 
 def analyze_events(options, event_type, sample_idx=None):
@@ -90,25 +90,25 @@ def analyze_events(options, event_type, sample_idx=None):
         events_all_strains = options.strains
 
         ### handle case where we did not find any event of this type
-        if sp.sum([x.event_type == event_type for x in events_all]) == 0:
+        if np.sum([x.event_type == event_type for x in events_all]) == 0:
             OUT = h5py.File(fn_out_count, 'w')
             OUT.create_dataset(name='event_counts', data=[0])
             _prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
             OUT.close()
-            confirmed_idx = sp.array([], dtype='int')
+            confirmed_idx = np.array([], dtype='int')
         else:
             if not options.pyproc:
                 if options.merge == 'single':
                     (events_all, counts) = verify_all_events(events_all, sample_idx, options.bam_fnames, event_type, options)
                 else:
-                    (events_all, counts) = verify_all_events(events_all, sp.arange(len(options.strains)), options.bam_fnames, event_type, options)
-                verified = sp.array([x.verified for x in events_all], dtype='bool')
+                    (events_all, counts) = verify_all_events(events_all, np.arange(len(options.strains)), options.bam_fnames, event_type, options)
+                verified = np.array([x.verified for x in events_all], dtype='bool')
                 for ev in events_all:
                     ev.verified = []
 
-                psi = sp.empty((counts.shape[0], counts.shape[2]), dtype='float')
-                iso1 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
-                iso2 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
+                psi = np.empty((counts.shape[0], counts.shape[2]), dtype='float')
+                iso1 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
+                iso2 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
                 for i in range(counts.shape[2]):
                     (psi[:, i], iso1[:, i], iso2[:, i])  = compute_psi(counts[:, :, i], event_type, options)
 
@@ -117,7 +117,7 @@ def analyze_events(options, event_type, sample_idx=None):
                 OUT.create_dataset(name='psi', data=psi, compression='gzip')
                 OUT.create_dataset(name='iso1', data=iso1, compression='gzip')
                 OUT.create_dataset(name='iso2', data=iso2, compression='gzip')
-                OUT.create_dataset(name='gene_idx', data=sp.array([x.gene_idx for x in events_all], dtype='int'), compression='gzip')
+                OUT.create_dataset(name='gene_idx', data=np.array([x.gene_idx for x in events_all], dtype='int'), compression='gzip')
                 OUT.create_dataset(name='verified', data=verified, compression='gzip')
                 _prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
             else:
@@ -126,9 +126,9 @@ def analyze_events(options, event_type, sample_idx=None):
                 chunk_size_events = 5000
                 chunk_size_strains = 500
                 for i in range(0, events_all.shape[0], chunk_size_events):
-                    idx_events = sp.arange(i, min(i + chunk_size_events, events_all.shape[0]))
+                    idx_events = np.arange(i, min(i + chunk_size_events, events_all.shape[0]))
                     for j in range(0, len(options.strains), chunk_size_strains):
-                        idx_strains = sp.arange(j, min(j + chunk_size_strains, len(options.strains)))
+                        idx_strains = np.arange(j, min(j + chunk_size_strains, len(options.strains)))
                         PAR['ev'] = events_all[idx_events].copy()
                         PAR['strain_idx'] = idx_strains
                         PAR['list_bam'] = options.bam_fnames
@@ -151,9 +151,9 @@ def analyze_events(options, event_type, sample_idx=None):
                 print('Collecting results from chunks ...')
                 OUT = h5py.File(fn_out_count, 'w')
                 for i in range(0, events_all.shape[0], chunk_size_events):
-                    idx_events = sp.arange(i, min(i + chunk_size_events, events_all.shape[0]))
+                    idx_events = np.arange(i, min(i + chunk_size_events, events_all.shape[0]))
                     for j in range(0, len(options.strains), chunk_size_strains):
-                        idx_strains = sp.arange(j, min(j + chunk_size_strains, len(options.strains)))
+                        idx_strains = np.arange(j, min(j + chunk_size_strains, len(options.strains)))
                         print('\r%i (%i), %i (%i)' % (i, events_all.shape[0], j, len(options.strains)))
                         out_fn = '%s/event_count_chunks/%s_%i_%i_C%i.pickle' % (options.outdir, event_type, i, j, options.confidence)
                         if not os.path.exists(out_fn):
@@ -166,22 +166,22 @@ def analyze_events(options, event_type, sample_idx=None):
                             verified_ = [x.verified.astype('bool') for x in ev]
                             collect_ids_ = [x.id for x in ev]
                         else:
-                            counts = sp.r_[counts, counts_]
+                            counts = np.r_[counts, counts_]
                             for jj in range(len(ev_)):
-                                verified_[jj] = sp.r_[verified_[jj], ev_[jj].verified]
+                                verified_[jj] = np.r_[verified_[jj], ev_[jj].verified]
                             del counts_
 
-                    psi = sp.empty((counts.shape[0], counts.shape[2]), dtype='float')
-                    iso1 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
-                    iso2 = sp.empty((counts.shape[0], counts.shape[2]), dtype='int32')
+                    psi = np.empty((counts.shape[0], counts.shape[2]), dtype='float')
+                    iso1 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
+                    iso2 = np.empty((counts.shape[0], counts.shape[2]), dtype='int32')
                     for j in range(counts.shape[2]):
                         (psi[:, j], iso1[:, j], iso2[:, j]) = compute_psi(counts[:, :, j], event_type, options) 
 
                     if i == 0:
                         OUT.create_dataset(name='event_counts', data=counts, maxshape=(len(options.strains), len(event_features[event_type]), None), compression='gzip')
-                        OUT.create_dataset(name='psi', data=sp.atleast_2d(psi), maxshape=(psi.shape[0], None), compression='gzip')
-                        OUT.create_dataset(name='iso1', data=sp.atleast_2d(iso1), maxshape=(iso1.shape[0], None), compression='gzip')
-                        OUT.create_dataset(name='iso2', data=sp.atleast_2d(iso2), maxshape=(iso2.shape[0], None), compression='gzip')
+                        OUT.create_dataset(name='psi', data=np.atleast_2d(psi), maxshape=(psi.shape[0], None), compression='gzip')
+                        OUT.create_dataset(name='iso1', data=np.atleast_2d(iso1), maxshape=(iso1.shape[0], None), compression='gzip')
+                        OUT.create_dataset(name='iso2', data=np.atleast_2d(iso2), maxshape=(iso2.shape[0], None), compression='gzip')
                     else:
                         tmp = OUT['event_counts'].shape
                         OUT['event_counts'].resize((tmp[0], tmp[1], tmp[2] + len(ev)))
@@ -197,33 +197,33 @@ def analyze_events(options, event_type, sample_idx=None):
                         OUT['iso2'][:, tmp[1]:] = iso2
                     verified.extend(verified_)
                     collect_ids.extend(collect_ids_)
-                    gene_idx_ = sp.r_[gene_idx_, [x.gene_idx for x in ev]]
+                    gene_idx_ = np.r_[gene_idx_, [x.gene_idx for x in ev]]
                     del iso1, iso2, psi, counts, ev, ev_
 
-                verified = sp.array(verified, dtype='bool')
+                verified = np.array(verified, dtype='bool')
 
                 assert(events_all.shape[0] == verified.shape[0])
-                assert(sp.all([events_all[e].id for e in range(events_all.shape[0])] == collect_ids))
+                assert(np.all([events_all[e].id for e in range(events_all.shape[0])] == collect_ids))
 
                 OUT.create_dataset(name='verified', data=verified, dtype='bool', compression='gzip')
                 OUT.create_dataset(name='gene_idx', data=gene_idx_)
                 _prepare_count_hdf5(options, OUT, event_features, sample_idx=sample_idx)
 
             ### write more event infos to hdf5
             if event_type == 'exon_skip':
-                event_pos = sp.array([x.exons2.ravel() for x in events_all])
+                event_pos = np.array([x.exons2.ravel() for x in events_all])
             elif event_type == 'intron_retention':
-                event_pos = sp.array([x.exons1.ravel() for x in events_all])
+                event_pos = np.array([x.exons1.ravel() for x in events_all])
             elif event_type in ['alt_3prime', 'alt_5prime']:
-                event_pos = sp.array([unique_rows(sp.c_[x.exons1, x.exons2]).ravel() for x in events_all])
+                event_pos = np.array([unique_rows(np.c_[x.exons1, x.exons2]).ravel() for x in events_all])
             elif event_type == 'mult_exon_skip':
-                event_pos = sp.array([x.exons2[[0, 1, -2, -1], :].ravel() for x in events_all])
+                event_pos = np.array([x.exons2[[0, 1, -2, -1], :].ravel() for x in events_all])
             elif event_type == 'mutex_exons':
-                event_pos = sp.array([sp.c_[x.exons1[0, :], x.exons1[1, :], x.exons2[1, :], x.exons2[2, :]] for x in events_all])
+                event_pos = np.array([np.c_[x.exons1[0, :], x.exons1[1, :], x.exons2[1, :], x.exons2[2, :]] for x in events_all])
 
             OUT.create_dataset(name='event_pos', data=event_pos)
 
-            num_verified = sp.sum(verified, axis=1)
+            num_verified = np.sum(verified, axis=1)
             confirmed = num_verified.min(axis=1)
             OUT.create_dataset(name='num_verified', data=num_verified)
             OUT.create_dataset(name='confirmed', data=confirmed)
@@ -232,7 +232,7 @@ def analyze_events(options, event_type, sample_idx=None):
             #for min_verified = 1:length(options.strains),
             #    verified_count(min_verified) = sum([events_all.confirmed] >= min_verified) ;
 
-            confirmed_idx = sp.where(confirmed >= 1)[0]
+            confirmed_idx = np.where(confirmed >= 1)[0]
             if confirmed_idx.shape[0] > 0:
                 OUT.create_dataset(name='conf_idx', data=confirmed_idx)
 
@@ -275,7 +275,7 @@ def analyze_events(options, event_type, sample_idx=None):
     if isinstance(sample_idx, int):
         sample_idx = [sample_idx]
     elif sample_idx is None:
-        sample_idx = sp.arange(options.strains.shape[0])
+        sample_idx = np.arange(options.strains.shape[0])
 
     if options.output_gff3:
         if os.path.exists(fn_out_gff3):
@@ -331,13 +331,13 @@ def analyze_events(options, event_type, sample_idx=None):
             print('%s already exists' % fn_out_conf_txt)
         else:
             print('\nWriting filtered events (sample freq 0.05):')
-            cf_idx = sp.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.05 * options.strains.shape[0]))[0]
+            cf_idx = np.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.05 * options.strains.shape[0]))[0]
             write_events_txt(fn_out_conf_txt, options.strains[sample_idx], events_all, fn_out_count, event_idx=confirmed_idx[cf_idx])
 
         fn_out_conf_txt = fn_out_conf.replace('.pickle', '.filt0.1.txt')
         if os.path.exists(fn_out_conf_txt):
             print('%s already exists' %  fn_out_conf_txt)
         else:
             print('\nWriting filtered events (sample freq 0.01):')
-            cf_idx = sp.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.01 * options.strains.shape[0]))[0]
+            cf_idx = np.where([x.confirmed for x in events_all[confirmed_idx]] >= (0.01 * options.strains.shape[0]))[0]
             write_events_txt(fn_out_conf_txt, options.strains[sample_idx], events_all, fn_out_count, event_idx=confirmed_idx[cf_idx])
Original file line number	Diff line number	Diff line change
`@@ -45,6 +45,6 @@`
`45`	`45`	`test_suite='tests',`
`46`	`46`	`tests_require=test_requirements,`
`47`	`47`	`url='https://github.com/ratschlab/spladder',`
`48`		`- version='2.4.2',`
	`48`	`+ version='2.4.3',`
`49`	`49`	`zip_safe=False,`
`50`	`50`	`)`