Skip to content

Commit 71b8e05

Browse files
author
Kai Dührkop
committed
Merge branch 'preprocessing-fixes' into 'master'
Preprocessing fixes See merge request bright-giant/sirius/sirius-frontend!398
2 parents e761dcc + 2186e03 commit 71b8e05

File tree

37 files changed

+1344
-472
lines changed

37 files changed

+1344
-472
lines changed

chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/chem/PeriodicTable.java

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,9 @@ private void addDefaultIons() {
245245
this.NEUTRAL_IONIZATION_DUMMY = new IonMode(0, 1, "NEUTRAL_IONIZATION", MolecularFormula.emptyFormula());
246246
PROTONATION = new IonMode(1, "[M + H]+", MolecularFormula.parseOrThrow("H"));
247247
DEPROTONATION = new IonMode(-1, "[M - H]-", MolecularFormula.parseOrThrow("H").negate());
248-
this.UNKNOWN_NEGATIVE_IONTYPE = new PrecursorIonType(NEGATIVE_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(),1, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
249-
this.UNKNOWN_POSITIVE_IONTYPE = new PrecursorIonType(POSITIVE_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
250-
this.UNKNOWN_IONTYPE = new PrecursorIonType(UNKNOWN_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
248+
this.UNKNOWN_NEGATIVE_IONTYPE = new PrecursorIonType(NEGATIVE_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(),1,0, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
249+
this.UNKNOWN_POSITIVE_IONTYPE = new PrecursorIonType(POSITIVE_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1,0, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
250+
this.UNKNOWN_IONTYPE = new PrecursorIonType(UNKNOWN_IONIZATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1,0, PrecursorIonType.SPECIAL_TYPES.UNKNOWN);
251251

252252
this.POSITIVE_ION_MODES = new IonMode[]{
253253
new IonMode(1, "[M + K]+", MolecularFormula.parseOrThrow("K")),
@@ -261,8 +261,8 @@ private void addDefaultIons() {
261261
new IonMode(-1, "[M + Br]-", MolecularFormula.parseOrThrow("Br")),
262262
DEPROTONATION
263263
};
264-
this.INTRINSICALLY_CHARGED_NEGATIVE = new PrecursorIonType(DEPROTONATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, PrecursorIonType.SPECIAL_TYPES.INTRINSICAL_CHARGED);
265-
this.INTRINSICALLY_CHARGED_POSITIVE = new PrecursorIonType(PROTONATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, PrecursorIonType.SPECIAL_TYPES.INTRINSICAL_CHARGED);
264+
this.INTRINSICALLY_CHARGED_NEGATIVE = new PrecursorIonType(DEPROTONATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1,0, PrecursorIonType.SPECIAL_TYPES.INTRINSICAL_CHARGED);
265+
this.INTRINSICALLY_CHARGED_POSITIVE = new PrecursorIonType(PROTONATION, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1,0, PrecursorIonType.SPECIAL_TYPES.INTRINSICAL_CHARGED);
266266
loadKnownIonTypes();
267267
}
268268

@@ -358,6 +358,9 @@ private PrecursorIonType parseIonType(String name) throws UnknownElementExceptio
358358
final ArrayList<MolecularFormula> adducts = new ArrayList<>();
359359
final ArrayList<MolecularFormula> insourceFrags = new ArrayList<MolecularFormula>();
360360

361+
int isotopes = 0;
362+
int fpos = 0;
363+
361364
boolean isAdd = true;
362365
int number = 1;
363366

@@ -389,58 +392,72 @@ private PrecursorIonType parseIonType(String name) throws UnknownElementExceptio
389392
isAdd = false;
390393
break;
391394
}
392-
case 'M':
393-
if (token.length() <= 1 || !(Character.isDigit(token.charAt(1)) || Character.isAlphabetic(token.charAt(1)))) {
394-
if (number != 1) {
395-
throw new IllegalArgumentException("Do not support multimeres: '" + name + "'");
396-
} else if (!isAdd) {
397-
throw new IllegalArgumentException("Invalid format of ion type: '" + name + "'");
398-
} else break;
399-
}
400395
default: {
401396
if (IONTYPE_NUM_PATTERN.matcher(token).find()) {
402397
// is a number
403398
number = Integer.parseInt(token);
404399
} else {
400+
final int prefixNumber;
405401
final String formulaString;
406402
final Matcher numm = IONTYPE_NUM_PATTERN_LEFT.matcher(token);
407403
if (numm.find()) {
408-
if (number != 1) {
409-
throw new IllegalArgumentException("Do not support nested groups in formula string: '" + name + "'");
410-
}
411-
number = Integer.parseInt(numm.group());
404+
prefixNumber = Integer.parseInt(numm.group());
405+
number = prefixNumber; // not sure about this...?
412406
formulaString = token.substring(numm.group().length());
413407
} else {
408+
prefixNumber=1;
414409
formulaString = token;
415410
}
416-
// should be a molecular formula
417-
MolecularFormula f;
418-
if (replacement.containsKey(formulaString.toUpperCase())) {
419-
f = MolecularFormula.parse(replacement.get(formulaString.toUpperCase()));
411+
++fpos;
412+
// could be isotope count
413+
if (formulaString.equals("i")) {
414+
if (!isAdd)
415+
throw new IllegalArgumentException("Cannot subtract isotopes from precursor ion type.");
416+
isotopes += number;
417+
number = 1;
418+
isAdd = true;
419+
} else if (formulaString.equals("M")) {
420+
// we already checked multimere count above
421+
if (!isAdd) throw new IllegalArgumentException("Cannot subtract neutral formula.");
422+
if (fpos>1) throw new IllegalArgumentException("Neutral formula M should be specified first in adduct string");
423+
isAdd=true; number=1;
420424
} else {
421-
f = MolecularFormula.parse(formulaString);
422-
}
423-
if (number != 1) {
424-
f = f.multiply(number);
425-
}
425+
// should be a molecular formula
426+
MolecularFormula f;
427+
if (replacement.containsKey(formulaString.toUpperCase())) {
428+
f = MolecularFormula.parse(replacement.get(formulaString.toUpperCase())).multiply(number);;
429+
} else {
430+
f = MolecularFormula.parse(formulaString);
431+
if (f.atomCount()==1) {
432+
f=f.multiply(number);
433+
} else if (prefixNumber != 1) {
434+
throw new IllegalArgumentException("Do not support nested groups in formula string: '" + name + "'");
435+
}
436+
}
426437

427-
possibleNewIonTypes.add(f);
438+
possibleNewIonTypes.add(f);
428439

429-
if (isAdd) {
430-
adducts.add(f);
431-
} else {
432-
insourceFrags.add(f);
440+
if (isAdd) {
441+
adducts.add(f);
442+
} else {
443+
insourceFrags.add(f);
444+
}
445+
isAdd = true;
446+
number = 1;
433447
}
434-
isAdd = true;
435-
number = 1;
436-
437448
}
438449
}
439450
}
440451
}
441452

442453
final int charge = (isAdd ? 1 : -1);
443454

455+
/*
456+
Given an ion type [M + X + Y]+ we do not know which of X or Y is the adduct or the charge. This is because we distinguish between modifications that carry the charge and cannot be removed but only
457+
swapped with another adduct (e.g. Na+, K+, H+...) and modifications that can easily fragment off (e.g. NH3). Probably, the cleanest way would be to write adduct formulas like this:
458+
[M + X + Y+] but this is just not the convention. So we instead check if we find X or Y in our predefined list of ions.
459+
*/
460+
444461
// find ionization mode
445462
Ionization usedIonMode = null;
446463
final IonMode[] addModes = Arrays.stream((charge > 0) ? POSITIVE_ION_MODES : NEGATIVE_ION_MODES)
@@ -536,7 +553,7 @@ private PrecursorIonType parseIonType(String name) throws UnknownElementExceptio
536553
throw new RuntimeException("Cannot parse " + name);
537554
} else return new
538555

539-
PrecursorIonType(usedIonMode, insource, adduct, multimereCount, PrecursorIonType.SPECIAL_TYPES.REGULAR);
556+
PrecursorIonType(usedIonMode, insource, adduct, multimereCount,isotopes, PrecursorIonType.SPECIAL_TYPES.REGULAR);
540557
}
541558

542559

@@ -571,7 +588,7 @@ public PrecursorIonType getPrecursorIonTypeFromIonization(Ionization ion) {
571588
if (!i.isIntrinsicalCharged() && i.getIonization().equals(ion) && i.getAdduct().atomCount() == 0 && i.getInSourceFragmentation().atomCount() == 0 && !i.isMultimere())
572589
return i;
573590
}
574-
return new PrecursorIonType(ion, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, PrecursorIonType.SPECIAL_TYPES.REGULAR);
591+
return new PrecursorIonType(ion, MolecularFormula.emptyFormula(), MolecularFormula.emptyFormula(), 1, 0,PrecursorIonType.SPECIAL_TYPES.REGULAR);
575592
}
576593

577594
public PrecursorIonType unknownPositivePrecursorIonType() {

chemistry_base/src/main/java/de/unijena/bioinf/ChemistryBase/chem/PrecursorIonType.java

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,18 @@ protected enum SPECIAL_TYPES {
103103

104104
private final byte multimere;
105105

106+
/**
107+
* The "isotopic" shift (integer mass delta between the isotopologue with lowest mass and the isotopologue represented
108+
* by this precursor ion type)
109+
*/
110+
private final byte isotopes;
111+
112+
public boolean isIsotope() {
113+
return isotopes>0;
114+
}
115+
public int getIsotopicShift() {
116+
return isotopes;
117+
}
106118

107119
public boolean hasNeitherAdductNorInsource() {
108120
return inSourceFragmentation.isEmpty() && adduct.isEmpty();
@@ -148,10 +160,13 @@ public static PrecursorIonType unknownNegative() {
148160
return PeriodicTable.getInstance().unknownNegativePrecursorIonType();
149161
}
150162

151-
PrecursorIonType(Ionization ion, MolecularFormula insource, MolecularFormula adduct, int multimere, final SPECIAL_TYPES special) {
163+
PrecursorIonType(Ionization ion, MolecularFormula insource, MolecularFormula adduct, int multimere, int isotopes, final SPECIAL_TYPES special) {
152164
this.ionization = ion;
153165
this.inSourceFragmentation = insource == null ? MolecularFormula.emptyFormula() : insource;
154166
this.adduct = adduct == null ? MolecularFormula.emptyFormula() : adduct;
167+
if (isotopes<0) throw new IllegalArgumentException("Isotopic shift has to be a positive number.");
168+
if (isotopes>=Byte.MAX_VALUE) throw new IllegalArgumentException("Isotopic shift too large.");
169+
this.isotopes = (byte)isotopes;
155170
if (!this.inSourceFragmentation.isAllPositiveOrZero())
156171
throw new IllegalArgumentException("Negative element amounts are not allowed in in-source fragments.");
157172
if (!this.adduct.isAllPositiveOrZero())
@@ -200,7 +215,20 @@ public String substituteName(MolecularFormula neutralFormula) {
200215
}
201216

202217
private String multimereStr() {
203-
return multimere == 1 ? "" : String.valueOf(multimere);
218+
return multimere == 1 ? "M" : String.valueOf(multimere)+"M";
219+
}
220+
private String isoString() {
221+
return isotopes==0 ? "" : " + " + isotopes + "i";
222+
}
223+
private String chargeString() {
224+
int c = getCharge();
225+
if (c > 0) {
226+
if (c==1) return "+";
227+
else return c + "+";
228+
} else {
229+
if (c==-1) return "-";
230+
else return c + "-";
231+
}
204232
}
205233

206234
public boolean equals(PrecursorIonType other) {
@@ -216,23 +244,26 @@ public boolean equals(Object other) {
216244
}
217245

218246
public PrecursorIonType withoutAdduct() {
219-
return new PrecursorIonType(getIonization(), inSourceFragmentation, MolecularFormula.emptyFormula(), multimere, special);
247+
return new PrecursorIonType(getIonization(), inSourceFragmentation, MolecularFormula.emptyFormula(), multimere, isotopes, special);
220248
}
221249

222250
public PrecursorIonType withMultimere(int count) {
223-
return new PrecursorIonType(getIonization(), inSourceFragmentation, MolecularFormula.emptyFormula(), count, special);
251+
return new PrecursorIonType(getIonization(), inSourceFragmentation, MolecularFormula.emptyFormula(), count,isotopes, special);
252+
}
253+
public PrecursorIonType withIsotopes(int count) {
254+
return new PrecursorIonType(getIonization(), inSourceFragmentation, MolecularFormula.emptyFormula(), multimere,count, special);
224255
}
225256

226257
public PrecursorIonType withoutInsource() {
227-
return new PrecursorIonType(getIonization(), MolecularFormula.emptyFormula(), adduct, multimere, special);
258+
return new PrecursorIonType(getIonization(), MolecularFormula.emptyFormula(), adduct, multimere,isotopes, special);
228259
}
229260

230261
public PrecursorIonType substituteAdduct(MolecularFormula newAdduct) {
231-
return new PrecursorIonType(getIonization(), inSourceFragmentation, newAdduct, multimere, special);
262+
return new PrecursorIonType(getIonization(), inSourceFragmentation, newAdduct, multimere,isotopes, special);
232263
}
233264

234265
public PrecursorIonType substituteInsource(MolecularFormula newInsource) {
235-
return new PrecursorIonType(getIonization(), newInsource, adduct, multimere, special);
266+
return new PrecursorIonType(getIonization(), newInsource, adduct, multimere,isotopes, special);
236267
}
237268

238269
@Override
@@ -247,13 +278,14 @@ public String toString() {
247278

248279
private String formatToString() {
249280
if (isIonizationUnknown()) {
250-
return ionization.toString();
281+
//return ionization.toString();
282+
return "[" + multimereStr() + isoString() + " + ?]" + chargeString();
251283
}
252284
if (isIntrinsicalCharged()) {
253-
return "[" + multimereStr() + "M]" + (getCharge() > 0 ? "+" : "-");
285+
return "[" + multimereStr() +isoString() + "M]" + chargeString();
254286
}
255287
final StringBuilder buf = new StringBuilder(128);
256-
buf.append("[").append(multimereStr()).append("M");
288+
buf.append("[").append(multimereStr());
257289
if (!inSourceFragmentation.isEmpty()) {
258290
buf.append(" - ");
259291
buf.append(inSourceFragmentation);
@@ -271,13 +303,9 @@ private String formatToString() {
271303
buf.append(ionization.getAtoms().negate().toString());
272304
}
273305
}
306+
buf.append(isoString());
274307
buf.append("]");
275-
if (ionization.getCharge() == 1) buf.append("+");
276-
else if (ionization.getCharge() == -1) buf.append("-");
277-
else {
278-
buf.append(getCharge());
279-
buf.append(getCharge() > 0 ? "+" : "-");
280-
}
308+
buf.append(chargeString());
281309
return buf.toString();
282310
}
283311

chemistry_base/src/main/resources/de.unijena.bioinf.ms.defaults/chemistry_base.auto.config

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ PossibleAdductSwitches = [M+Na]+:[M+H]+,[M+K]+:[M+H]+,[M+Cl]-:[M-H]-
3636
# Enforced ion modes that are always considered.
3737
AdductSettings.enforced = ,
3838
# Detectable ion modes which are only considered if there is an indication in the MS1 scan (e.g. correct mass delta).
39-
AdductSettings.detectable = [M+H]+,[M+K]+,[M+Na]+,[M+H-H2O]+,[M+H-H4O2]+,[M+NH3+H]+,[M+FA+H]+,[M+ACN+H]+,[2M+H]+,[2M+K]+,[2M+Na]+,[M-H]-,[M+Cl]-,[M+Br]-,[M-H2O-H]-,[M+Na-2H]-,[M+CH2O2-H]-,[M+C2H4O2-H]-,[M+H2O-H]-,[M-H3N-H]-,[M-CO2-H]-,[M-CH2O3-H]-,[M-CH3-H]-,[2M+H]-,[2M+Cl]-,[2M+Br]-
39+
AdductSettings.detectable = [M+H]+,[M+K]+,[M+Na]+,[M+H-H2O]+,[M+H-H4O2]+,[M+NH3+H]+,[M+FA+H]+,[M+ACN+H]+,[2M+H]+,[2M+K]+,[2M+Na]+,[M-H]-,[M+Cl]-,[M+Br]-,[M-H2O-H]-,[M+Na-2H]-,[M+CH2O2-H]-,[M+C2H4O2-H]-,[M+H2O-H]-,[M-H3N-H]-,[M-CO2-H]-,[M-CH2O3-H]-,[M-CH3-H]-,[2M-H]-,[2M+Cl]-,[2M+Br]-
4040
# Fallback ion modes which are considered if the auto detection did not find any indication for an ion mode.
4141
AdductSettings.fallback = [M+H]+,[M-H]-,[M+Na]+,[M+K]+
4242
# Adducts specified in the input file are used as is independent of what enforced/detectable/fallback adducts are set.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
ZodiacEpochs=de.unijena.bioinf.GibbsSampling.properties.ZodiacEpochs
21
ZodiacNumberOfConsideredCandidatesAt300Mz=de.unijena.bioinf.GibbsSampling.properties.ZodiacNumberOfConsideredCandidatesAt300Mz
2+
ZodiacEpochs=de.unijena.bioinf.GibbsSampling.properties.ZodiacEpochs
33
ZodiacRunInTwoSteps=de.unijena.bioinf.GibbsSampling.properties.ZodiacRunInTwoSteps
44
ZodiacRatioOfConsideredCandidatesPerIonization=de.unijena.bioinf.GibbsSampling.properties.ZodiacRatioOfConsideredCandidatesPerIonization
5-
ZodiacEdgeFilterThresholds=de.unijena.bioinf.GibbsSampling.properties.ZodiacEdgeFilterThresholds
65
ZodiacAnalogueNodes=de.unijena.bioinf.GibbsSampling.properties.ZodiacAnalogueNodes
6+
ZodiacEdgeFilterThresholds=de.unijena.bioinf.GibbsSampling.properties.ZodiacEdgeFilterThresholds
77
ZodiacClusterCompounds=de.unijena.bioinf.GibbsSampling.properties.ZodiacClusterCompounds
88
ZodiacLibraryScoring=de.unijena.bioinf.GibbsSampling.properties.ZodiacLibraryScoring
99
ZodiacNumberOfConsideredCandidatesAt800Mz=de.unijena.bioinf.GibbsSampling.properties.ZodiacNumberOfConsideredCandidatesAt800Mz

0 commit comments

Comments
 (0)