Skip to content

Commit dcca71b

Browse files
committed
update portals for .mzml and .raw files
1 parent 1a44b41 commit dcca71b

File tree

2 files changed

+47
-45
lines changed

2 files changed

+47
-45
lines changed

dimspy/portals/mzml_portal.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,21 @@ def tics(self):
113113
# print self.run()[2]
114114
for scan in self.run():
115115
if scan["id"] == "TIC":
116-
tics = zip(*scan.peaks)[1]
117-
return tics
116+
return zip(*scan.peaks)[1]
118117
return
119118

119+
def injection_times(self):
120+
injection_times = {}
121+
for scan in self.run():
122+
injection_times[scan['id']] = None
123+
for element in scan.xmlTree:
124+
if "MS:1000927" == element.get('accession'):
125+
injection_times[scan['id']] = float(element.get("value"))
126+
break
127+
if scan['id'] not in injection_times:
128+
injection_times[scan['id']] = None
129+
return injection_times
130+
120131
def scan_dependents(self):
121132
l = []
122133
for scan in self.run():

dimspy/portals/thermo_raw_portal.py

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -24,66 +24,53 @@
2424

2525

2626
def mz_range_from_header(h):
27-
"""
28-
Extract a list of headers / .
29-
:rtype: list
30-
"""
3127
return [float(m) for m in re.findall(r'([\w\.-]+)-([\w\.-]+)', h)[0]]
3228

3329

3430
class ThermoRaw:
35-
"""
36-
Extract a list of headers / .
37-
:rtype: list
38-
"""
31+
3932
def __init__(self, filename):
4033
self.run = RawFileReader.RawFileReaderAdapter.FileFactory(filename)
4134
self.run.SelectInstrument(Business.Device.MS, 1)
35+
self.filename = filename
4236

4337
def headers(self):
44-
"""
45-
Extract a particular scan from a *.raw file and return a PeakList objects
46-
:rtype: dict
47-
"""
38+
4839
sids = collections.OrderedDict()
4940
for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
5041
sids.setdefault(str(self.run.GetFilterForScanNumber(scan_id).Filter), []).append(scan_id)
5142
return sids
5243

5344
def scan_ids(self):
54-
"""
55-
Extract a particular scan from a *.raw file and return a PeakList objects
56-
:rtype: dict
57-
"""
45+
5846
sids = collections.OrderedDict()
5947
for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
6048
sids[scan_id] = str(self.run.GetFilterForScanNumber(scan_id).Filter)
6149
return sids
6250

6351
def peaklist(self, scan_id, function_noise="noise_packets"):
64-
"""
65-
Extract a particular scan from a *.raw file and return a PeakList objects
6652

67-
:param scan_ids:
68-
:rtype: list
69-
"""
7053
if function_noise not in ["noise_packets", "mean", "median", "mad"]:
7154
raise ValueError("select a function that is available [noise_packets, mean, median, mad]")
7255

7356
scan = self.run.GetCentroidStream(scan_id, False)
74-
75-
mz_ibn = zip(scan.Masses, scan.Intensities, scan.Baselines, scan.Noises) # SignalToNoise not available
76-
mz_ibn.sort()
77-
mzs, ints, baseline, noise = zip(*mz_ibn)
57+
if scan.Masses is not None:
58+
mz_ibn = zip(scan.Masses, scan.Intensities, scan.Baselines, scan.Noises) # SignalToNoise not available
59+
mz_ibn.sort()
60+
mzs, ints, baseline, noise = zip(*mz_ibn)
61+
else:
62+
mzs, ints, baseline, noise = [], [], [], []
7863

7964
if function_noise == "noise_packets":
8065
snr = [p.SignalToNoise for p in scan.GetCentroids()]
81-
elif function_noise == "median":
66+
elif function_noise == "median" and len(ints) > 0:
8267
snr = ints / np.median(ints)
83-
elif function_noise == "mean":
68+
elif function_noise == "mean" and len(ints) > 0:
8469
snr = ints / np.mean(ints)
85-
elif function_noise == "mad":
70+
elif function_noise == "mad" and len(ints) > 0:
8671
snr = ints / np.median(np.abs(np.subtract(ints, np.median(ints))))
72+
else:
73+
snr = []
8774

8875
scan_stats = self.run.GetScanStatsForScanNumber(scan_id)
8976

@@ -119,39 +106,43 @@ def peaklist(self, scan_id, function_noise="noise_packets"):
119106
tic=tic,
120107
function_noise=function_noise)
121108

122-
pl.add_attribute('snr', snr)
123-
pl.add_attribute('noise', noise)
124-
pl.add_attribute('baseline', baseline)
109+
if len(pl.mz) > 0:
110+
pl.add_attribute('snr', snr)
111+
pl.add_attribute('noise', noise)
112+
pl.add_attribute('baseline', baseline)
113+
125114
return pl
126115

127116
def peaklists(self, scan_ids, function_noise="noise_packets"):
128-
"""
129-
Extract the scans from a *.raw file and return a list of PeakList objects
130-
131-
:param scan_ids:
132-
:rtype: list
133-
134-
"""
135117
if function_noise not in ["noise_packets", "mean", "median", "mad"]:
136118
raise ValueError("select a function that is available [noise_packets, mean, median, mad]")
137119

138120
return [self.peaklist(scan_id, function_noise=function_noise) for scan_id in scan_ids]
139121

140122
def tics(self):
141-
# somehow i can not access the scans directly when run() uses an open archive object
142-
# print self.run()[2]
143-
tics = []
123+
tics = collections.OrderedDict()
144124
for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
145125
scan_stats = self.run.GetScanStatsForScanNumber(scan_id)
146-
tics.append(scan_stats.TIC)
126+
tics[scan_id].append(scan_stats.TIC)
147127
return tics
148128

129+
def injection_times(self):
130+
injection_times = collections.OrderedDict()
131+
for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
132+
extra_values = list(self.run.GetTrailerExtraInformation(scan_id).Values)
133+
extra_labels = list(self.run.GetTrailerExtraInformation(scan_id).Labels)
134+
for i, label in enumerate(extra_labels):
135+
if "Ion Injection Time (ms):" == label:
136+
injection_times[scan_id] = float(extra_values[i])
137+
if scan_id not in injection_times:
138+
injection_times[scan_id] = None
139+
return injection_times
140+
149141
def scan_dependents(self):
150142
l = []
151143
for scan_id in range(self.run.RunHeaderEx.FirstSpectrum, self.run.RunHeaderEx.LastSpectrum + 1):
152144
gsd = self.run.GetScanDependents(scan_id, 5)
153145
if gsd is not None:
154146
for i, d in enumerate(gsd.ScanDependentDetailArray):
155-
print scan_id, self.run.GetFilterForScanNumber(scan_id).Filter, d.ScanIndex, d.FilterString
156147
l.append([scan_id, d.ScanIndex])
157148
return l

0 commit comments

Comments
 (0)