Skip to content

Commit e544400

Browse files
authored
Merge pull request #35 from computational-metabolomics/new_tags_behaviour
Add new tags behaviour
2 parents ac4f087 + 8d7beb5 commit e544400

34 files changed

+519
-392
lines changed

dimspy/__main__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def main():
224224
action='store_true', required=False,
225225
help="Remove blank samples from peak matrix.")
226226

227-
parser_bf.add_argument('-a', '--class-labels',
227+
parser_bf.add_argument('-a', '--labels',
228228
type=str, required=False,
229229
help="Tab delimited file with at least two columns named 'filename' and 'classLabel'.")
230230

@@ -257,7 +257,7 @@ def main():
257257
default=None, type=str, required=False,
258258
help="Class label for QCs")
259259

260-
parser_sf.add_argument('-a', '--class-labels',
260+
parser_sf.add_argument('-a', '--labels',
261261
type=str, required=False,
262262
help="Tab delimited file with at least two columns named 'filename' and 'classLabel'.")
263263

@@ -470,7 +470,7 @@ def main():
470470
min_fold_change=args.min_fold_change,
471471
function=args.function,
472472
rm_samples=args.remove_blank_samples,
473-
class_labels=args.class_labels)
473+
labels=args.labels)
474474
hdf5_portal.save_peak_matrix_as_hdf5(pm_bf, args.output)
475475

476476
elif args.step == "sample-filter":
@@ -479,7 +479,7 @@ def main():
479479
within=args.within,
480480
rsd=args.rsd_threshold,
481481
qc_label=args.qc_label,
482-
class_labels=args.class_labels)
482+
labels=args.labels)
483483
hdf5_portal.save_peak_matrix_as_hdf5(pm_sf, args.output)
484484

485485
elif args.step == "mv-sample-filter":
@@ -545,9 +545,11 @@ def main():
545545
tools.hdf5_peaklists_to_txt(args.input, path_out=args.output, delimiter=map_delimiter(args.delimiter))
546546

547547
elif args.step == "create-sample-list":
548-
pls = hdf5_portal.load_peaklists_from_hdf5(args.input)
549-
tools.create_sample_list(pls, args.output, delimiter=map_delimiter(args.delimiter))
550-
548+
try:
549+
inp = hdf5_portal.load_peaklists_from_hdf5(args.input)
550+
except:
551+
inp = hdf5_portal.load_peak_matrix_from_hdf5(args.input)
552+
tools.create_sample_list(inp, args.output, delimiter=map_delimiter(args.delimiter))
551553

552554
if __name__ == "__main__":
553555
main()

dimspy/experiment.py

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ def check_metadata(fn_tsv):
175175
return fm_dict
176176

177177

178-
def update_metadata(peaklists, fl):
178+
def update_metadata_and_labels(peaklists, fl):
179179

180180
if not isinstance(peaklists[0], PeakList):
181181
raise IOError("PeakList object required")
@@ -189,34 +189,16 @@ def update_metadata(peaklists, fl):
189189
pl.metadata[k] = fl[k][index]
190190
#pl.metadata["filelist"] = {k:fl[k][index] for k in fl.keys()}
191191

192-
if "classLabel" in fl.keys():
193-
if pl.tags.has_tag_type("classLabel"):
194-
pl.tags.drop_tag_types("classLabel")
195-
pl.tags.add_tags(classLabel=fl["classLabel"][index])
196-
197-
if "batch" in fl.keys():
198-
if pl.tags.has_tag_type("batch"):
199-
pl.tags.drop_tag_types("batch")
200-
pl.tags.add_tags(batch=fl["batch"][index])
192+
for tag_name in ["replicate", "replicates", "batch", "injectionOrder", "classLabel"]:
193+
if tag_name in fl.keys():
194+
if pl.tags.has_tag_type(tag_name):
195+
pl.tags.drop_tag_type(tag_name)
196+
pl.tags.add_tag(fl[tag_name][index], tag_name)
201197

202198
return peaklists
203199

204200

205-
def copy_metadata(peak_list, peak_list_out, labels=None):
206-
if not isinstance(peak_list, PeakList) or not isinstance(peak_list_out, PeakList):
207-
raise IOError("PeakList object required")
208-
for k, v in peak_list.metadata.items():
209-
if labels is None:
210-
peak_list_out.metadata[k] = v
211-
elif k in labels:
212-
peak_list_out.metadata[k] = v
213-
214-
peak_list_out.tags.add_tags(*peak_list.tags.tag_of(None),
215-
**{t: peak_list.tags.tag_of(t) for t in peak_list.tags.tag_types})
216-
return peak_list
217-
218-
219-
def update_class_labels(pm, fn_tsv):
201+
def update_labels(pm, fn_tsv):
220202

221203
assert os.path.isfile(fn_tsv.encode('string-escape')), "{} does not exist".format(fn_tsv)
222204

@@ -228,17 +210,12 @@ def update_class_labels(pm, fn_tsv):
228210
assert "classLabel" in fm.dtype.names, "Column for class label (classLabel) not available"
229211
assert (fm[fm.dtype.names[0]] == pm.peaklist_ids).all(), "Sample ids do not match {}".format(np.setdiff1d(fm[fm.dtype.names[0]], pm.peaklist_ids))
230212

231-
for i in range(len(fm["classLabel"])):
232-
if pm.peaklist_tags[i].has_tag_type("classLabel"):
233-
pm.peaklist_tags[i].drop_tag_types("classLabel")
234-
pm.peaklist_tags[i].add_tags(classLabel=fm["classLabel"][i])
235-
236-
if "batch" in fm.keys():
237-
for i in range(len(fm["batch"])):
238-
if pm.peaklist_tags[i].has_tag_type("batch"):
239-
pm.peaklist_tags[i].drop_tag_types("batch")
240-
pm.peaklist_tags[i].add_tags(batch=fm["batch"][i])
241-
213+
for tag_name in ["replicate", "replicates", "batch", "injectionOrder", "classLabel"]:
214+
if tag_name in fm.dtype.names:
215+
for i in range(len(fm[tag_name])):
216+
if pm.peaklist_tags[i].has_tag_type(tag_name):
217+
pm.peaklist_tags[i].drop_tag_type(tag_name)
218+
pm.peaklist_tags[i].add_tag(fm[tag_name][i], tag_name)
242219
return pm
243220

244221

dimspy/models/peak_matrix.py

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import numpy as np
1717
from collections import OrderedDict, Iterable
1818
from string import join
19+
from peaklist_tags import Tag
1920
from peaklist import PeakList
2021

2122

@@ -204,21 +205,21 @@ def peaklist_tag_types(self):
204205
Property of the source peaklist tag types.
205206
206207
:getter: returns a tuple including the types of the typed tags of the source peaklists
207-
:type: tuple
208+
:type: set
208209
209210
"""
210-
return tuple(set(reduce(lambda x, y: x + y, [t.tag_types for t in self.peaklist_tags], ())))
211+
return reduce(lambda x,y: x.union(y), map(lambda x: x.tag_types, self.peaklist_tags))
211212

212213
@property
213214
def peaklist_tag_values(self):
214215
"""
215216
Property of the source peaklist tag values.
216217
217218
:getter: returns a tuple including the values of the source peaklists tags, both typed and untyped
218-
:type: tuple
219+
:type: set
219220
220221
"""
221-
return tuple(set(reduce(lambda x, y: x + y, [t.tag_values for t in self.peaklist_tags], ())))
222+
return reduce(lambda x,y: x.union(y), map(lambda x: x.tag_values, self.peaklist_tags))
222223

223224
@property
224225
def shape(self):
@@ -398,8 +399,8 @@ def rsd(self, *args, **kwargs):
398399
"""
399400
Calculates relative standard deviation (RSD) array.
400401
401-
:param args: untyped tag label for RSD calculation, no value = calculate over all samples
402-
:param kwargs: typed tag label for RSD calculation, , no value = calculate over all samples
402+
:param args: tags or untyped tag values for RSD calculation, no value = calculate over all samples
403+
:param kwargs: typed tags for RSD calculation, , no value = calculate over all samples
403404
:param flagged_only: whether to calculate on flagged peaks only. Default = True
404405
:type: numpy array
405406
@@ -437,19 +438,19 @@ def tags_of(self, tag_type=None):
437438
:param tag_type: the type of the returning tags. Provide None to obtain untyped tags
438439
:rtype: tuple
439440
440-
"""
441-
if not (tag_type is None or all(map(lambda x: x.has_tag_type(tag_type), self.peaklist_tags))):
442-
raise ValueError('not all samples has tag type [%s]' % tag_type)
443-
tlst = [t.tag_of(tag_type) for t in self.peaklist_tags]
441+
"""
442+
if any(map(lambda x: not x.has_tag_type(tag_type), self.peaklist_tags)):
443+
raise KeyError('not all samples has tag type [%s]' % tag_type)
444+
tlst = filter(lambda x: x is not None, [t.tag_of(tag_type) for t in self.peaklist_tags])
444445
if tag_type is None: tlst = reduce(lambda x, y: x + y, tlst)
445-
return tuple(set(tlst))
446+
return reduce(lambda x, y: x + ((y,) if y not in x else ()), tlst, ())
446447

447448
def mask_tags(self, *args, **kwargs): # match to all
448449
"""
449450
Masks samples with particular tags.
450451
451-
:param args: target tag values, both typed and untyped
452-
:param kwargs: target typed tag types and values
452+
:param args: tags or untyped tag values for masking
453+
:param kwargs: typed tags for masking
453454
:param override: whether to override the current mask, default = False
454455
:rtype: PeakMatrix object (self)
455456
@@ -462,17 +463,19 @@ def mask_tags(self, *args, **kwargs): # match to all
462463
463464
"""
464465
override = kwargs.pop('override') if kwargs.has_key('override') else False
466+
if any(map(lambda x: isinstance(x, Tag), kwargs.values())):
467+
logging.warning('setting additional type for Tag object in kwargs will be ignored')
465468
mask = map(lambda x: all(map(lambda t: x.has_tag(t), args)) and
466-
all(map(lambda t: x.has_tag(**dict([t])), kwargs.items())), self._tags)
469+
all(map(lambda t: x.has_tag(t[1], tag_type = t[0]), kwargs.items())), self._tags)
467470
self.mask = np.logical_or(False if override else self._mask, mask)
468471
return self
469472

470473
def unmask_tags(self, *args, **kwargs): # match to all
471474
"""
472475
Unmasks samples with particular tags.
473476
474-
:param args: target tag values, both typed and untyped
475-
:param kwargs: target typed tag types and values
477+
:param args: tags or untyped tag values for unmasking
478+
:param kwargs: typed tags for unmasking
476479
:param override: whether to override the current mask, default = False
477480
:rtype: PeakMatrix object (self)
478481
@@ -486,8 +489,10 @@ def unmask_tags(self, *args, **kwargs): # match to all
486489
487490
"""
488491
override = kwargs.pop('override') if kwargs.has_key('override') else False
492+
if any(map(lambda x: isinstance(x, Tag), kwargs.values())):
493+
logging.warning('setting additional type for Tag object in kwargs will be ignored')
489494
mask = map(lambda x: not (all(map(lambda t: x.has_tag(t), args)) and
490-
all(map(lambda t: x.has_tag(**dict([t])), kwargs.items()))), self._tags)
495+
all(map(lambda t: x.has_tag(t[1], tag_type = t[0]), kwargs.items()))), self._tags)
491496
self.mask = np.logical_and(True if override else self._mask, mask)
492497
return self
493498

@@ -757,21 +762,22 @@ def to_str(self, attr_name='intensity', delimiter='\t', samples_in_rows=True, co
757762
[map(str, ln) for ln in self.attr_matrix(attr_name, flagged_only = not comprehensive).T]
758763

759764
if comprehensive:
760-
ttypes = set(reduce(lambda x, y: x + y, map(lambda x: x.tag_types, self.peaklist_tags)))
765+
ttypes = self.peaklist_tag_types
766+
if None in ttypes: ttypes.remove(None)
761767
tnum = len(ttypes)
762768
hd = [hd[0]] + ['missing values'] + map(lambda x: 'tags_' + x, ttypes) + ['tags_untyped'] + hd[1:]
763769
dm = [dm[0]] + \
764770
[map(str, self.missing_values)] + \
765-
[map(lambda x: str(x.tag_of(t)) if x.has_tag_type(t) else '', self.peaklist_tags) for t in ttypes] + \
766-
[map(lambda x: join(map(str, x.tag_of(None)), ';'), self.peaklist_tags)] + \
771+
[map(lambda x: (lambda v: str(v.value) if v else '')(x.tag_of(t)), self.peaklist_tags) for t in ttypes] + \
772+
[map(lambda x: join((lambda v: map(str,v) if v else ())(x.tag_of(None)), ';'), self.peaklist_tags)] + \
767773
dm[1:]
768774

769775
rsd_tags = tuple(rsd_tags) if isinstance(rsd_tags, Iterable) else (rsd_tags,)
770776

771777
prelst = ['present'] + ([''] * (tnum + 2)) + map(str, self.property('present', flagged_only = False))
772778
ocrlst = ['occurrence'] + ([''] * (tnum + 2)) + map(str, self.property('occurrence', flagged_only = False))
773779
puplst = ['purity'] + ([''] * (tnum + 2)) + map(str, self.property('purity', flagged_only = False))
774-
rsdmtx = [['rsd_' + rt] + ([''] * (tnum + 2)) + map(str, self.rsd(rt, flagged_only = False)) for rt in rsd_tags]
780+
rsdmtx = [['rsd_' + str(rt.value if isinstance(rt, Tag) else rt)] + ([''] * (tnum + 2)) + map(str, self.rsd(rt, flagged_only = False)) for rt in rsd_tags]
775781
rsdlst = ['rsd_all'] + ([''] * (tnum + 2)) + map(str, self.rsd(flagged_only = False))
776782
flgmtx = [[fn] + ([''] * (tnum + 2)) + map(str, self.flag_values(fn).astype(int)) for fn in self.flag_names]
777783
flglst = ['flags'] + ([''] * (tnum + 2)) + map(str, self.flags.astype(int))
@@ -806,13 +812,13 @@ class mask_peakmatrix:
806812

807813
def __init__(self, pm, *args, **kwargs):
808814
self._pm = pm
809-
self._utags = args
810-
self._ttags = kwargs
811-
if not self._ttags.has_key('override'): self._ttags['override'] = True # default for with statement
815+
self._args = args
816+
self._kwargs = kwargs
817+
if not self._kwargs.has_key('override'): self._kwargs['override'] = True # default for with statement
812818
self._oldmask = dict(zip(pm._pids, pm._mask))
813819

814820
def __enter__(self):
815-
self._pm.mask_tags(*self._utags, **self._ttags)
821+
self._pm.mask_tags(*self._args, **self._kwargs)
816822
return self._pm
817823

818824
def __exit__(self, exc_type, exc_val, exc_tb):
@@ -843,13 +849,13 @@ class unmask_peakmatrix:
843849

844850
def __init__(self, pm, *args, **kwargs):
845851
self._pm = pm
846-
self._utags = args
847-
self._ttags = kwargs
848-
if not self._ttags.has_key('override'): self._ttags['override'] = True # default for with statement
852+
self._args = args
853+
self._kwargs = kwargs
854+
if not self._kwargs.has_key('override'): self._kwargs['override'] = True # default for with statement
849855
self._oldmask = dict(zip(pm._pids, pm._mask))
850856

851857
def __enter__(self):
852-
self._pm.unmask_tags(*self._utags, **self._ttags)
858+
self._pm.unmask_tags(*self._args, **self._kwargs)
853859
return self._pm
854860

855861
def __exit__(self, exc_type, exc_val, exc_tb):

0 commit comments

Comments
 (0)