Skip to content

Commit e7f2b17

Browse files
committed
Add new MS protocol #428
1 parent 19226d3 commit e7f2b17

File tree

2 files changed

+162
-0
lines changed

2 files changed

+162
-0
lines changed
758 KB
Binary file not shown.

examples/protocols/4/protocol_4.py

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
import matplotlib.pyplot as plt
2+
import numpy as np
3+
import io, sys, types
4+
from pathlib import Path
5+
import pandas as pd
6+
import matplotlib.pyplot as plt
7+
from IPython.display import display
8+
9+
# Fix for pandas compatibility with older pickle files
10+
mod = types.ModuleType('pandas.core.indexes.numeric')
11+
mod.Int64Index = pd.Index
12+
mod.UInt64Index = pd.Index
13+
mod.Float64Index = pd.Index
14+
sys.modules['pandas.core.indexes.numeric'] = mod
15+
16+
from mzmlripper import process_mzml_file
17+
# from ms_json import process_mzml_json
18+
# from recursive_ma.ms_tree import process, identify_parents
19+
# from recursive_ma import build_tree, estimate_MA
20+
# from recursive_ma.estimator import MAEstimator
21+
from rdkit import Chem
22+
from rdkit.Chem import Draw
23+
import assemblytheorytools as att
24+
25+
if __name__ == "__main__":
26+
SMILES = 'COC(=O)C(NC(=O)OC(C)(C)C)P(=O)(OC)OC'
27+
MW = 297.2
28+
MA_REFERENCE = 14
29+
TARGET_PARENT_MZ = 296.26
30+
31+
print(f"Compound: {SMILES}")
32+
print(f"Molecular Weight: {MW}")
33+
print(f"Reference MA: {MA_REFERENCE}")
34+
print(f"Target Parent m/z: {TARGET_PARENT_MZ}")
35+
36+
mol = Chem.MolFromSmiles(SMILES)
37+
display(Draw.MolToImage(mol, size=(320, 220)))
38+
39+
# Parse the mzML file
40+
mzml_file = Path('Sample_#15_Stepped_MS3.mzML')
41+
output_dir = Path('temp_mzml_output')
42+
output_dir.mkdir(exist_ok=True)
43+
44+
process_mzml_file(
45+
filename=str(mzml_file),
46+
out_dir=str(output_dir),
47+
rt_units='min',
48+
int_threshold=1000,
49+
relative=False
50+
)
51+
52+
json_file = output_dir / f"ripper_{mzml_file.stem}.json"
53+
54+
# # Process JSON to structured DataFrames
55+
# print("Converting JSON to DataFrames...")
56+
# raw_data = process_mzml_json(json_file)
57+
#
58+
# print(f"✓ MS levels: {list(raw_data.keys())}")
59+
# for level, df in raw_data.items():
60+
# print(f" MS{level}: {len(df):,} peaks")
61+
#
62+
# # Processing parameters (matching original pickle files)
63+
# MIN_REL_INTENSITY = 0.01
64+
# MAX_NUM_PEAKS = 20
65+
# MASS_TOL = 0.05
66+
# MS_N_DIGITS = 3
67+
# MIN_ABS_INTENSITY = {
68+
# 1: 10 ** 6,
69+
# 2: 10 ** 4,
70+
# 3: 10 ** 3
71+
# }
72+
# processed_data = process(
73+
# sample=raw_data,
74+
# max_num_peaks=MAX_NUM_PEAKS,
75+
# min_abs_intensity=MIN_ABS_INTENSITY,
76+
# min_rel_intensity=MIN_REL_INTENSITY,
77+
# n_digits=MS_N_DIGITS
78+
# )
79+
#
80+
# for level, df in processed_data.items():
81+
# print(f" MS{level}: {len(df):,} peaks (filtered from {len(raw_data[level]):,})")
82+
#
83+
# # Link parent-child relationships
84+
# rooted_data = identify_parents(
85+
# processed_data,
86+
# mass_tol=MASS_TOL,
87+
# ms_n_digits=MS_N_DIGITS
88+
# )
89+
#
90+
# # Build tree structure
91+
# tree = build_tree(rooted_data, max_level=3)
92+
#
93+
# # Filter out MS1 parents with no fragments (empty entries)
94+
# tree = {parent: children for parent, children in tree.items() if children}
95+
#
96+
# # Find the parent closest to our target
97+
# parent_mz = None
98+
# for mz in tree.keys():
99+
# if abs(mz - TARGET_PARENT_MZ) < 0.01:
100+
# parent_mz = mz
101+
# break
102+
#
103+
# if parent_mz is None:
104+
# parent_mz = list(tree.keys())[0]
105+
#
106+
# n_fragments = len(tree[parent_mz]) if parent_mz in tree else 0
107+
#
108+
# print(f"✓ Tree created: {len(tree)} parent(s) with MS2 fragments")
109+
# print(f" Parent m/z: {parent_mz:.4f} ({n_fragments} MS2 fragments)")
110+
#
111+
# # Plot MS2 spectrum - showing all fragments after processing (that go into the tree)
112+
# ms2_processed = processed_data[2]
113+
# parent_data = ms2_processed[ms2_processed['parent'].between(parent_mz - 0.01, parent_mz + 0.01)]
114+
# plt.figure(figsize=(10, 4))
115+
# if len(parent_data) > 0:
116+
# plot_df = parent_data.sort_values('mz')
117+
# plt.vlines(plot_df['mz'], 0, plot_df['intensity'], color='black', linewidth=1.5)
118+
# print(f"Plotting all {len(plot_df)} processed MS2 fragments")
119+
# else:
120+
# fragments = sorted(tree[parent_mz].keys())
121+
# plt.vlines(fragments, 0, 1, color='black', linewidth=1.5)
122+
# print(f"Plotting {len(fragments)} MS2 fragments from tree")
123+
# plt.axhline(y=0, color='gray', linewidth=1)
124+
# plt.xlabel('MS2 m/z', fontsize=11)
125+
# plt.ylabel('Intensity', fontsize=11)
126+
# plt.title(f'Sample #15 MS2 Spectrum - Processed Fragments (parent m/z {parent_mz:.2f})', fontsize=12,
127+
# fontweight='bold')
128+
# plt.xlim(0, max(plot_df['mz']) + 20 if len(plot_df) > 0 else 300)
129+
# plt.grid(alpha=0.3)
130+
# plt.tight_layout()
131+
# plt.show()
132+
#
133+
# # Initialize MA estimator
134+
# est = MAEstimator(same_level=True, n_samples=300, tol=0.05)
135+
# # First approximation (mass-only)
136+
# ma_first = float(est.estimate_by_MW(parent_mz, has_children=False).mean())
137+
# # Recursive MA (fragment-informed)
138+
# old_stdout = sys.stdout
139+
# sys.stdout = buffer = io.StringIO()
140+
# ma_recursive = float(est.estimate_MA(tree, parent_mz, progress_levels=2).mean())
141+
# sys.stdout = old_stdout
142+
# # Extract diagnostic information
143+
# interesting = [
144+
# line
145+
# for line in buffer.getvalue().split("\n")
146+
# if "Common precursors" in line or "HIT:" in line
147+
# ]
148+
#
149+
# if interesting:
150+
# print("Numbers in [brackets] are shared sub-fragments from both MS2 fragments.")
151+
# print("Common precursors found:")
152+
# import re
153+
#
154+
# for line in interesting:
155+
# formatted_line = re.sub(r"(\d+\.\d{3})\d+", r"\1", line)
156+
# print(f" {formatted_line}")
157+
#
158+
# print(f"Parent m/z: {parent_mz:.4f}")
159+
# print(f"First approximation (mass-only): {ma_first:.2f} Da")
160+
# print(f"Recursive MA (fragment-informed): {ma_recursive:.2f} Da")
161+
# print(f"Reference MA (known value): {MA_REFERENCE} Da")
162+
# print(tree)

0 commit comments

Comments
 (0)