@@ -121,11 +121,14 @@ chromatograms as they are read from the disk. A simple implementation could look
121121
122122 which can the be used as follows:
123123
124- .. code-block :: output
124+ .. code-block :: python
125125
126126 filename = b " test.mzML"
127127 consumer = MSCallback()
128128 oms.MzMLFile().transform(filename, consumer)
129+
130+ .. code-block :: output
131+
129132 Read a spectrum
130133 Read a spectrum
131134 Read a spectrum
@@ -138,50 +141,61 @@ spectrum or chromatogram is read from disk, the function ``consumeSpectrum`` or
138141``consumeChromatogram `` is called and a specific action is performed. We can
139142use this to implement a simple filtering function for mass spectra:
140143
141- .. code-block :: output
144+ .. code-block :: python
142145
143- class FilteringConsumer:
144- """
145- Consumer that forwards all calls the internal consumer (after
146- filtering)
147- """
146+ import os
147+ import pyopenms as oms
148+ from urllib.request import urlretrieve
148149
149- def __init__(self, consumer, filter_string):
150- self._internal_consumer = consumer
151- self.filter_string = filter_string
150+ gh = " https://raw.githubusercontent.com/OpenMS/pyopenms-docs/master"
151+ urlretrieve(gh + " /src/data/tiny.mzML" , " test.mzML" )
152152
153- def setExperimentalSettings(self, s):
154- self._internal_consumer.setExperimentalSettings(s)
153+ print (" Current Working Directory where all files are stored:" , os.getcwd())
155154
156- def setExpectedSize(self, a, b):
157- self._internal_consumer.setExpectedSize(a, b)
155+ class FilteringConsumer :
156+ """
157+ Consumer that forwards all calls the internal consumer (after
158+ filtering out spectra with less than 'min_spec_size' peaks)
159+ """
158160
159- def consumeChromatogram (self, c ):
160- if c.getNativeID().find( self.filter_string) != -1:
161- self._internal_consumer.consumeChromatogram(c)
161+ def __init__ (self , consumer , min_spec_size = 0 ):
162+ self ._internal_consumer = consumer
163+ self ._min_spec_size = min_spec_size
162164
163- def consumeSpectrum(self, s):
164- if s.getNativeID().find(self.filter_string) != -1:
165- self._internal_consumer.consumeSpectrum(s)
165+ def setExperimentalSettings (self , s ):
166+ self ._internal_consumer.setExperimentalSettings(s)
167+
168+ def setExpectedSize (self , a , b ):
169+ self ._internal_consumer.setExpectedSize(a, b)
170+
171+ def consumeChromatogram (self , c ):
172+ # just forward; do nothing to chromatograms
173+ self ._internal_consumer.consumeChromatogram(c)
174+
175+ def consumeSpectrum (self , s ):
176+ print (" Spec has size: " , s.size())
177+ if s.size() >= self ._min_spec_size:
178+ print (" --> keep it" )
179+ self ._internal_consumer.consumeSpectrum(s)
180+ else :
181+ print (" --> discard it" )
166182
167- ###################################
168- filter_string = "DECOY"
169- inputfile = "in.mzML"
170- outputfile = "out.mzML"
171- ###################################
172183
173- consumer = oms.PlainMSDataWritingConsumer(outputfile)
174- consumer = FilteringConsumer(consumer, filter_string)
184+ min_spec_size = 11 # # we will keep spectra with 11 or more peaks and discard the others
185+ inputfile = " test.mzML"
186+ outputfile = " out.mzML"
175187
176- oms.MzMLFile().transform(inputfile, consumer)
188+ consumer = oms.PlainMSDataWritingConsumer(outputfile)
189+ consumer = FilteringConsumer(consumer, min_spec_size)
177190
191+ oms.MzMLFile().transform(inputfile, consumer)
178192
179- where the spectra and chromatograms are filtered by their native ids . It is
193+ , where the spectra are filtered by their size . It is
180194similarly trivial to implement filtering by other attributes. Note how the data
181195are written to disk using the :py:class: `~.PlainMSDataWritingConsumer ` which is one of
182196multiple available consumer classes -- this specific class will simply take the
183- spectrum ``s `` or chromatogram ``c `` and write it to disk (the location of the
184- output file is given by the ``outfile `` variable).
197+ :py:class: ` ~.MSSpectrum ` ``s `` or :py:class: ` ~.MSChromatogram ` ``c `` and write it to disk (the location of the
198+ output file is given by the ``outputfile `` variable).
185199
186200Note that this approach is memory efficient in cases where computation should
187201only occur on part of the data or the whole data may not fit into memory.
0 commit comments