Skip to content

Commit cd921e1

Browse files
committed
improve performance and add new feature
1 parent cde30e3 commit cd921e1

File tree

3 files changed

+18
-6
lines changed

3 files changed

+18
-6
lines changed

dimspy/models/peak_matrix.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ def rsd(self, *args, **kwargs):
401401
402402
:param args: tags or untyped tag values for RSD calculation, no value = calculate over all samples
403403
:param kwargs: typed tags for RSD calculation, , no value = calculate over all samples
404+
:param on_attr: calculate RSD on given attribute. Default = "intensity"
404405
:param flagged_only: whether to calculate on flagged peaks only. Default = True
405406
:type: numpy array
406407
@@ -413,6 +414,7 @@ def rsd(self, *args, **kwargs):
413414
corresponding rsd value will be set to np.nan.
414415
415416
"""
417+
on_attr = kwargs.pop('on_attr') if kwargs.has_key('on_attr') else 'intensity'
416418
flagged_only = kwargs.pop('flagged_only') if kwargs.has_key('flagged_only') else True
417419

418420
if self.shape[0] < 2:
@@ -423,7 +425,7 @@ def rsd(self, *args, **kwargs):
423425
unmask_peakmatrix(self, *args, **kwargs)) as m:
424426
if m.shape[0] == 0: raise AttributeError('peak matrix does not have label(s) [%s]' %
425427
join(map(lambda x: str(x)[1:-1], (args, kwargs)), ', '))
426-
ints = m.attr_matrix('intensity', flagged_only)
428+
ints = m.attr_matrix(on_attr, flagged_only)
427429
rsd = m._present_std(ints, 0, flagged_only) / m._present_mean(ints, 0, flagged_only) * 100
428430

429431
rsd[np.where(map(lambda x: len(set(x[np.nonzero(x)])) == 1, ints.T))] = np.nan # only one valid value

dimspy/portals/mzml_portal.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,21 @@
1414
import pymzml
1515
import numpy as np
1616
import zipfile
17+
from copy import deepcopy
1718
from dimspy.models.peaklist import PeakList
1819
from dimspy.experiment import mz_range_from_header
1920

2021

2122
class Mzml:
22-
def __init__(self, filename="", archive=None):
23+
def __init__(self, filename="", archive=None, preload=True):
2324
self.filename = filename
2425
self.archive = archive
26+
self._preload = preload
27+
self._cache = None
2528

2629
def run(self):
30+
if self._cache is not None: return self._cache
31+
2732
if not self.filename.lower().endswith(".mzml") and not self.filename.lower().endswith(".mzml.gz") and not self.filename.lower().endswith(".zip"):
2833
raise IOError('Incorrect file format for mzML parser')
2934
if self.archive is not None:
@@ -32,11 +37,15 @@ def run(self):
3237
zf = zipfile.ZipFile(self.archive, 'r')
3338
if self.filename not in zf.namelist():
3439
raise IOError("{} does not exist in zip file".format(self.filename))
35-
return pymzml.run.Reader('', file_object=zf.open(self.filename))
40+
dat = pymzml.run.Reader('', file_object=zf.open(self.filename))
41+
if self._preload: dat = self._cache = tuple(map(deepcopy, dat))
42+
return dat
3643
elif self.filename.lower().endswith(".mzml") or self.filename.lower().endswith(".mzml.gz"):
3744
if not os.path.isfile(self.filename):
3845
raise IOError("{} does not exist".format(self.filename))
39-
return pymzml.run.Reader(self.filename)
46+
dat = pymzml.run.Reader(self.filename)
47+
if self._preload: dat = self._cache = tuple(map(deepcopy, dat))
48+
return dat
4049
else:
4150
return None
4251

dimspy/process/peak_filters.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,21 +92,22 @@ def filter_mz_ranges(pl, mz_remove_rngs, flag_name='mz_range_remove_flag', flag_
9292

9393

9494
# PeakMatrix filters
95-
def filter_rsd(pm, rsd_threshold, qc_tag, flag_name='rsd_flag'):
95+
def filter_rsd(pm, rsd_threshold, qc_tag, on_attr = 'intensity', flag_name='rsd_flag'):
9696
"""
9797
PeakMatrix RSD filter.
9898
9999
:param pm: the target peak matrix
100100
:param rsd_threshold: threshold of the RSD of the QC samples
101101
:param qc_tag: tag (label) to unmask qc samples
102+
:param on_attr: calculate RSD on given attribute. Default = "intensity"
102103
:param flag_name: name of the new flag. Default = 'rsd_flag'
103104
:rtype: PeakMatrix object
104105
105106
This filter will calculate the RSD values of the QC samples. A peak with a QC RSD value larger than the
106107
threshold will be unflagged.
107108
108109
"""
109-
rsd_values = pm.rsd(qc_tag)
110+
rsd_values = pm.rsd(qc_tag, on_attr = on_attr)
110111
if np.any(np.isnan(rsd_values)):
111112
logging.warning('nan found in QC rsd values, filter might not work properly')
112113

0 commit comments

Comments
 (0)