Skip to content

Commit de4ba98

Browse files
committed
Add fix for 'empty' scans after filtering
1 parent 666597c commit de4ba98

File tree

2 files changed

+39
-22
lines changed

2 files changed

+39
-22
lines changed

dimspy/experiment.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -175,25 +175,33 @@ def check_metadata(fn_tsv):
175175
return fm_dict
176176

177177

178-
def update_metadata_and_labels(peaklists, fl):
178+
def update_metadata_and_labels(peaklists, fl, pl_id=""):
179179

180180
if not isinstance(peaklists[0], PeakList):
181181
raise IOError("PeakList object required")
182182

183-
for k in fl.keys():
184-
for pl in peaklists:
185-
if pl.ID not in fl[fl.keys()[0]]:
186-
raise IOError("filelist and peaklist do not match {}".format(pl.ID))
183+
if len(fl) == 0:
184+
return peaklists
187185

188-
index = fl[fl.keys()[0]].index(pl.ID)
186+
for pl in peaklists:
187+
188+
if pl_id == "":
189+
pl_ID = pl_id
190+
else:
191+
pl_ID = pl.ID
192+
193+
if pl_ID not in fl[fl.keys()[0]]:
194+
raise IOError("filelist and peaklist do not match {}".format(pl_ID))
195+
196+
index = fl[fl.keys()[0]].index(pl_ID)
197+
for k in fl.keys():
189198
pl.metadata[k] = fl[k][index]
190-
#pl.metadata["filelist"] = {k:fl[k][index] for k in fl.keys()}
191199

192-
for tag_name in ["replicate", "replicates", "batch", "injectionOrder", "classLabel"]:
193-
if tag_name in fl.keys():
194-
if pl.tags.has_tag_type(tag_name):
195-
pl.tags.drop_tag_type(tag_name)
196-
pl.tags.add_tag(fl[tag_name][index], tag_name)
200+
for tag_name in ["replicate", "replicates", "batch", "injectionOrder", "classLabel"]:
201+
if tag_name in fl.keys():
202+
if pl.tags.has_tag_type(tag_name):
203+
pl.tags.drop_tag_type(tag_name)
204+
pl.tags.add_tag(fl[tag_name][index], tag_name)
197205

198206
return peaklists
199207

dimspy/tools.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ def process_scans(source, function_noise, snr_thres, ppm, min_fraction=None, rsd
105105
if len(pls_scans[h]) >= 1:
106106
if sum(pl.shape[0] for pl in pls_scans[h]) == 0:
107107
logging.warning("No scan data available for {}".format(h))
108+
d = dict((k, [pls_scans[h][0].metadata[k]]) for k in pls_scans[h][0].metadata)
109+
pls_avg.append(PeakList(ID=h, mz=[], intensity=[], **d))
110+
n_peaks, median_rsd = 0, "NA"
108111
else:
109112
pl_avg = average_replicate_scans(h, pls_scans[h], ppm, min_fraction, rsd_thres, "intensity", block_size, ncpus)
110113
pls_avg.append(pl_avg)
@@ -115,21 +118,26 @@ def process_scans(source, function_noise, snr_thres, ppm, min_fraction=None, rsd
115118
if report is not None:
116119
out.write("{}\t{}\t{}\t{}\t{}\n".format(os.path.basename(filenames[i]), h, nscans, n_peaks, median_rsd))
117120

118-
if len(pls_avg) == 0:
119-
raise IOError("No peaks remaining after filtering. Remove file from Study (filelist).")
121+
if sum(pl.shape[0] for pl in pls_avg) == 0:
122+
logging.warning("No peaks remaining after filtering. Remove file from Study (filelist).")
120123

121124
if not skip_stitching or len(pls_scans.keys()) == 1:
122125
pl = join_peaklists(os.path.basename(filenames[i]), pls_avg)
123126
pl = update_metadata_and_labels([pl], fl)
124127
pls.extend(pl)
128+
129+
if hasattr(pl[0], 'rsd'):
130+
median_rsd = np.nanmedian(pl[0].rsd)
131+
else:
132+
median_rsd = "NA"
133+
125134
if len(pls_scans.keys()) > 1 and report is not None:
126-
out.write("{}\t{}\t{}\t{}\t{}\n".format(os.path.basename(filenames[i]), "SIM-Stitch", "NA", pl[0].shape[0], np.nanmedian(pl[0].rsd)))
135+
out.write("{}\t{}\t{}\t{}\t{}\n".format(os.path.basename(filenames[i]), "SIM-Stitch", "NA", pl[0].shape[0], median_rsd))
127136
else:
128137
for pl in pls_avg:
129-
pl = update_metadata_and_labels([pl], fl)
130-
pl = join_peaklists("{}#{}".format(os.path.basename(filenames[i]), pl[0].metadata["header"][0]), pl)
131-
pls.append(pl)
132-
138+
pl = join_peaklists("{}#{}".format(os.path.basename(filenames[i]), pl.metadata["header"][0]), [pl])
139+
pl = update_metadata_and_labels([pl], fl, os.path.basename(filenames[i]))
140+
pls.extend(pl)
133141
return pls
134142

135143

@@ -244,9 +252,10 @@ def replicate_filter(source, ppm, replicates, min_peaks, rsd_thres=None, filelis
244252

245253
if sum([comb[-1] for comb in temp]) == 0.0:
246254
logging.warning("insufficient data available to calculate scores for {}".format(str([comb[0].ID for comb in temp])))
247-
248-
# sort the scores from high to low
249-
temp.sort(key=operator.itemgetter(-1), reverse=True)
255+
temp.sort(key=operator.itemgetter(1), reverse=True)
256+
else:
257+
# sort the scores from high to low
258+
temp.sort(key=operator.itemgetter(-1), reverse=True)
250259
# select the replicate filtered peaklist that is ranked first
251260
pls_rep_filt.append(temp[0][0])
252261

0 commit comments

Comments
 (0)