Skip to content

Commit 36536fd

Browse files
committed
Implement helper functions for getting the logprobs of stuff
1 parent 974be4b commit 36536fd

File tree

9 files changed

+178
-18
lines changed

9 files changed

+178
-18
lines changed

src/IsoSpec++/cwrapper.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,31 @@ double getLightestPeakMassIso(void* iso)
5959
return reinterpret_cast<Iso*>(iso)->getLightestPeakMass();
6060
}
6161

62+
double getLightestPeakLProbIso(void* iso)
63+
{
64+
return reinterpret_cast<Iso*>(iso)->getLightestPeakLProb();
65+
}
66+
6267
double getHeaviestPeakMassIso(void* iso)
6368
{
6469
return reinterpret_cast<Iso*>(iso)->getHeaviestPeakMass();
6570
}
6671

72+
double getHeaviestPeakLProbIso(void* iso)
73+
{
74+
return reinterpret_cast<Iso*>(iso)->getHeaviestPeakLProb();
75+
}
76+
6777
double getMonoisotopicPeakMassIso(void* iso)
6878
{
6979
return reinterpret_cast<Iso*>(iso)->getMonoisotopicPeakMass();
7080
}
7181

82+
double getMonoisotopicPeakLProbIso(void* iso)
83+
{
84+
return reinterpret_cast<Iso*>(iso)->getMonoisotopicPeakLProb();
85+
}
86+
7287
double getModeLProbIso(void* iso)
7388
{
7489
return reinterpret_cast<Iso*>(iso)->getModeLProb();

src/IsoSpec++/cwrapper.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,11 @@ ISOSPEC_C_API void * setupIso(int dimNumber,
4040
ISOSPEC_C_API void * isoFromFasta(const char* fasta, bool use_nominal_masses, bool add_water);
4141

4242
ISOSPEC_C_API double getLightestPeakMassIso(void* iso);
43+
ISOSPEC_C_API double getLightestPeakLProbIso(void* iso);
4344
ISOSPEC_C_API double getHeaviestPeakMassIso(void* iso);
45+
ISOSPEC_C_API double getHeaviestPeakLProbIso(void* iso);
4446
ISOSPEC_C_API double getMonoisotopicPeakMassIso(void* iso);
47+
ISOSPEC_C_API double getMonoisotopicPeakLProbIso(void* iso);
4548
ISOSPEC_C_API double getModeLProbIso(void* iso);
4649
ISOSPEC_C_API double getModeMassIso(void* iso);
4750
ISOSPEC_C_API double getTheoreticalAverageMassIso(void* iso);

src/IsoSpec++/isoSpec++.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,14 @@ double Iso::getLightestPeakMass() const
250250
return mass;
251251
}
252252

253+
double Iso::getLightestPeakLProb() const
254+
{
255+
double lprob = 0.0;
256+
for (int ii = 0; ii < dimNumber; ii++)
257+
lprob += marginals[ii]->getLightestConfLProb();
258+
return lprob;
259+
}
260+
253261
double Iso::getHeaviestPeakMass() const
254262
{
255263
double mass = 0.0;
@@ -258,6 +266,14 @@ double Iso::getHeaviestPeakMass() const
258266
return mass;
259267
}
260268

269+
double Iso::getHeaviestPeakLProb() const
270+
{
271+
double lprob = 0.0;
272+
for (int ii = 0; ii < dimNumber; ii++)
273+
lprob += marginals[ii]->getHeaviestConfLProb();
274+
return lprob;
275+
}
276+
261277
double Iso::getMonoisotopicPeakMass() const
262278
{
263279
double mass = 0.0;
@@ -266,6 +282,14 @@ double Iso::getMonoisotopicPeakMass() const
266282
return mass;
267283
}
268284

285+
double Iso::getMonoisotopicPeakLProb() const
286+
{
287+
double lprob = 0.0;
288+
for (int ii = 0; ii < dimNumber; ii++)
289+
lprob += marginals[ii]->getMonoisotopicConfLProb();
290+
return lprob;
291+
}
292+
269293
double Iso::getUnlikeliestPeakLProb() const
270294
{
271295
double lprob = 0.0;

src/IsoSpec++/isoSpec++.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,29 @@ class ISOSPEC_EXPORT_SYMBOL Iso {
135135
//! Get the mass of the lightest peak in the isotopic distribution.
136136
double getLightestPeakMass() const;
137137

138+
//! Get the log-probability of the lightest peak in the isotopic distribution.
139+
double getLightestPeakLProb() const;
140+
138141
//! Get the mass of the heaviest peak in the isotopic distribution.
139142
double getHeaviestPeakMass() const;
140143

144+
//! Get the log-probability of the heaviest peak in the isotopic distribution.
145+
double getHeaviestPeakLProb() const;
146+
141147
/*!
142148
Get the mass of the monoisotopic peak in the isotopic distribution. Monoisotopc molecule is defined as
143149
consisting only of the most frequent isotopes of each element. These are often (but not always) the
144150
lightest ones, making this often (but again, not always) equal to getLightestPeakMass()
145151
*/
146152
double getMonoisotopicPeakMass() const;
147153

154+
/*!
155+
Get the log-probability of the monoisotopic peak in the isotopic distribution. Monoisotopc molecule is defined as
156+
consisting only of the most frequent isotopes of each element. These are often (but not always) the
157+
lightest ones. Making this often (but again, not always) equal to getLightestPeakLProb()
158+
*/
159+
double getMonoisotopicPeakLProb() const;
160+
148161
//! Get the log-probability of the mode-configuration (if there are many modes, they share this value).
149162
double getModeLProb() const;
150163

src/IsoSpec++/marginalTrek++.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,22 @@ double Marginal::getLightestConfMass() const
256256
return ret_mass*atomCnt;
257257
}
258258

259+
double Marginal::getLightestConfLProb() const
260+
{
261+
std::unique_ptr<int[]> lightest_conf = std::make_unique<int[]>(isotopeNo);
262+
size_t lightest_idx = 0;
263+
double min_mass = std::numeric_limits<double>::infinity();
264+
for(unsigned int ii = 0; ii < isotopeNo; ii++)
265+
if( min_mass > atom_masses[ii] )
266+
{
267+
min_mass = atom_masses[ii];
268+
lightest_idx = ii;
269+
}
270+
for(unsigned int ii = 0; ii < isotopeNo; ii++)
271+
lightest_conf[ii] = (ii == lightest_idx ? atomCnt : 0);
272+
return logProb(lightest_conf.get());
273+
}
274+
259275
double Marginal::getHeaviestConfMass() const
260276
{
261277
double ret_mass = 0.0;
@@ -265,6 +281,22 @@ double Marginal::getHeaviestConfMass() const
265281
return ret_mass*atomCnt;
266282
}
267283

284+
double Marginal::getHeaviestConfLProb() const
285+
{
286+
std::unique_ptr<int[]> heaviest_conf = std::make_unique<int[]>(isotopeNo);
287+
size_t heaviest_idx = 0;
288+
double max_mass = -std::numeric_limits<double>::infinity();
289+
for(unsigned int ii = 0; ii < isotopeNo; ii++)
290+
if( max_mass < atom_masses[ii] )
291+
{
292+
max_mass = atom_masses[ii];
293+
heaviest_idx = ii;
294+
}
295+
for(unsigned int ii = 0; ii < isotopeNo; ii++)
296+
heaviest_conf[ii] = (ii == heaviest_idx ? atomCnt : 0);
297+
return logProb(heaviest_conf.get());
298+
}
299+
268300
size_t Marginal::getMonoisotopicAtomIndex() const
269301
{
270302
double found_prob = -std::numeric_limits<double>::infinity();
@@ -284,6 +316,15 @@ double Marginal::getMonoisotopicConfMass() const
284316
return atom_masses[idx]*atomCnt;
285317
}
286318

319+
double Marginal::getMonoisotopicConfLProb() const
320+
{
321+
size_t idx = getMonoisotopicAtomIndex();
322+
std::unique_ptr<int[]> monoisotopic_conf = std::make_unique<int[]>(isotopeNo);
323+
for(unsigned int ii = 0; ii < isotopeNo; ii++)
324+
monoisotopic_conf[ii] = (ii == idx ? atomCnt : 0);
325+
return logProb(monoisotopic_conf.get());
326+
}
327+
287328
double Marginal::getAtomAverageMass() const
288329
{
289330
double ret = 0.0;

src/IsoSpec++/marginalTrek++.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,19 +96,28 @@ class Marginal
9696
*/
9797
double getLightestConfMass() const;
9898

99+
//! Get the log-probability of the lightest subisotopologue.
100+
double getLightestConfLProb() const;
101+
99102
//! Get the mass of the heaviest subisotopologue.
100103
/*! This is trivially obtained by considering all atomNo atoms to be the heaviest isotope possible.
101104
\return The mass of the heaviest subisotopologue.
102105
*/
103106
double getHeaviestConfMass() const;
104107

108+
//! Get the log-probability of the heaviest subisotopologue.
109+
double getHeaviestConfLProb() const;
110+
105111
//! Get the mass of the monoisotopic subisotopologue.
106112
/*! The monoisotopic subisotopologue is defined as the molecule consiting only
107113
of the most likely isotope. This is frequently the lightest subisotopologue,
108114
making this frequently (but not always) equal to getLightestConfMass()
109115
*/
110116
double getMonoisotopicConfMass() const;
111117

118+
//! Get the log-probability of the monoisotopic subisotopologue.
119+
double getMonoisotopicConfLProb() const;
120+
112121
//! Get the index of the monoisotopic (most probable) atom.
113122
/*!
114123
\return The index of the most probable isotope of the element.

src/IsoSpecPy/IsoSpecPy.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,14 +219,26 @@ def getLightestPeakMass(self):
219219
"""Get the lightest peak in the isotopic distribution."""
220220
return self.ffi.getLightestPeakMassIso(self.iso)
221221

222+
def getLightestPeakLProb(self):
223+
"""Get the log probability of the lightest peak in the isotopic distribution."""
224+
return self.ffi.getLightestPeakLProbIso(self.iso)
225+
222226
def getHeaviestPeakMass(self):
223227
"""Get the heaviest peak in the isotopic distribution."""
224228
return self.ffi.getHeaviestPeakMassIso(self.iso)
225229

230+
def getHeaviestPeakLProb(self):
231+
"""Get the log probability of the heaviest peak in the isotopic distribution."""
232+
return self.ffi.getHeaviestPeakLProbIso(self.iso)
233+
226234
def getMonoisotopicPeakMass(self):
227235
"""Get the monoisotopic mass of the peak."""
228236
return self.ffi.getMonoisotopicPeakMassIso(self.iso)
229237

238+
def getMonoisotopicPeakLProb(self):
239+
"""Get the log probability of the monoisotopic peak in the isotopic distribution."""
240+
return self.ffi.getMonoisotopicPeakLProbIso(self.iso)
241+
230242
def getModeLProb(self):
231243
"""Get the log probability of the most probable peak(s) in the isotopic distribution."""
232244
return self.ffi.getModeLProbIso(self.iso)

src/IsoSpecPy/isoFFI.py

Lines changed: 45 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,16 @@
33
import sys
44
import glob
55
from pathlib import Path
6+
67
if False:
78
import IsoSpecCppPy
89

10+
911
class IsoFFI:
1012
def __init__(self):
1113
self.ffi = cffi.FFI()
12-
self.ffi.cdef('''
14+
self.ffi.cdef(
15+
"""
1316
void * setupIso(int dimNumber,
1417
const int* isotopeNumbers,
1518
const int* atomCounts,
@@ -19,8 +22,11 @@ def __init__(self):
1922
void * isoFromFasta(const char* fasta, bool use_nominal_masses, bool add_water);
2023
2124
double getLightestPeakMassIso(void* iso);
25+
double getLightestPeakLProbIso(void* iso);
2226
double getHeaviestPeakMassIso(void* iso);
27+
double getHeaviestPeakLProbIso(void* iso);
2328
double getMonoisotopicPeakMassIso(void* iso);
29+
double getMonoisotopicPeakLProbIso(void* iso);
2430
double getModeLProbIso(void* iso);
2531
double getModeMassIso(void* iso);
2632
double getTheoreticalAverageMassIso(void* iso);
@@ -157,39 +163,47 @@ def __init__(self):
157163
extern const char* elem_table_element[NUMBER_OF_ISOTOPIC_ENTRIES];
158164
extern const char* elem_table_symbol[NUMBER_OF_ISOTOPIC_ENTRIES];
159165
extern const bool elem_table_Radioactive[NUMBER_OF_ISOTOPIC_ENTRIES];
160-
''');
166+
"""
167+
)
161168

162169
mod_dir = Path(__file__).resolve().parent
163170

164-
if (mod_dir.parent / 'setup.py').exists():
165-
raise ImportError('''Attempted to load IsoSpecPy module from its build directory. This usually
171+
if (mod_dir.parent / "setup.py").exists():
172+
raise ImportError(
173+
"""Attempted to load IsoSpecPy module from its build directory. This usually
166174
won't work and is generally a Bad Idea. Please cd somewhere else, or, if you're really
167-
sure you want to do that, edit the source and disable this check.''')
175+
sure you want to do that, edit the source and disable this check."""
176+
)
168177

169-
libnames = ['IsoSpecCppPy*', 'IsoSpec++*']
170-
libprefix = ['', 'lib', 'Lib']
171-
extension = ['.so', '.dylib', '.dll']
178+
libnames = ["IsoSpecCppPy*", "IsoSpec++*"]
179+
libprefix = ["", "lib", "Lib"]
180+
extension = [".so", ".dylib", ".dll"]
172181
try:
173-
if platform.system() == 'Linux':
174-
extension = ['.so', 'pyd']
175-
elif platform.system() == 'Windows':
176-
extension = ['.dll', '.pyd']
182+
if platform.system() == "Linux":
183+
extension = [".so", "pyd"]
184+
elif platform.system() == "Windows":
185+
extension = [".dll", ".pyd"]
177186
except:
178187
pass
179188

180-
prebuilt = ['', 'prebuilt-']
189+
prebuilt = ["", "prebuilt-"]
181190

182191
def cprod(ll1, ll2):
183192
res = []
184193
for l1 in ll1:
185194
for l2 in ll2:
186-
res.append(l1+l2)
195+
res.append(l1 + l2)
187196
return res
188197

189198
paths_to_check = cprod(prebuilt, cprod(libprefix, cprod(libnames, extension)))
190199
dpc = []
191200

192-
for dirpath in [mod_dir, mod_dir.parent, mod_dir.parent / 'bin', mod_dir.parent / 'lib']:
201+
for dirpath in [
202+
mod_dir,
203+
mod_dir.parent,
204+
mod_dir.parent / "bin",
205+
mod_dir.parent / "lib",
206+
]:
193207
dpc.extend([dirpath / p for p in paths_to_check])
194208

195209
paths_to_check = dpc
@@ -200,7 +214,10 @@ def cprod(ll1, ll2):
200214
paths_to_check = expanded
201215
try:
202216
import importlib
203-
paths_to_check.insert(0, Path(importlib.util.find_spec("IsoSpecCppPy").origin))
217+
218+
paths_to_check.insert(
219+
0, Path(importlib.util.find_spec("IsoSpecCppPy").origin)
220+
)
204221
except (ImportError, AttributeError):
205222
pass
206223

@@ -213,11 +230,21 @@ def cprod(ll1, ll2):
213230
self.libpath = libpath
214231
break
215232
except (IndexError, OSError) as e:
216-
errmsg = "Load libIsoSpec++.so, tried: " + libpath + '\n' + "Got error: " + str(type(e)) + ": " + str(e)
233+
errmsg = (
234+
"Load libIsoSpec++.so, tried: "
235+
+ libpath
236+
+ "\n"
237+
+ "Got error: "
238+
+ str(type(e))
239+
+ ": "
240+
+ str(e)
241+
)
217242
errors.append(errmsg)
218243

219244
if self.clib == None:
220-
raise ImportError("Cannot find or load the C++ part of the library\n" + '\n'.join(errors))
245+
raise ImportError(
246+
"Cannot find or load the C++ part of the library\n" + "\n".join(errors)
247+
)
221248

222249

223250
isoFFI = IsoFFI() # This is done while including the module

tests/Python/test_iface.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,21 @@ def test_get_monoisotopic_mass():
167167
assert math.isclose(68885.198515667, mono_mass, rel_tol=1e-9)
168168
print("OK!")
169169

170+
def test_lightest_peak():
171+
print("Checking lightest peak...", end=" ")
172+
formula = "C10B10H10Sn1"
173+
iso = IsoSpecPy.Iso(formula=formula)
174+
lightest_mass = iso.getLightestPeakMass()
175+
lightest_lprob = iso.getLightestPeakLProb()
176+
iso_threshold = IsoSpecPy.IsoThreshold(0.0, formula=formula)
177+
masses = list(iso_threshold.masses)
178+
probs = list(iso_threshold.probs)
179+
min_index = masses.index(min(masses))
180+
print(lightest_mass, lightest_lprob, end=" ")
181+
assert lightest_mass == masses[min_index]
182+
assert math.isclose(lightest_lprob, math.log(probs[min_index]), rel_tol=1e-9)
183+
print("OK!")
184+
170185
if __name__ == "__main__":
171186
test_wasserstein_distance()
172187
test_normalization()
@@ -181,3 +196,4 @@ def test_get_monoisotopic_mass():
181196
test_empiric_variance()
182197
test_empiric_stddev()
183198
test_get_monoisotopic_mass()
199+
test_lightest_peak()

0 commit comments

Comments
 (0)