1+ import matplotlib .pyplot as plt
2+ import numpy as np
3+ import io , sys , types
4+ from pathlib import Path
5+ import pandas as pd
6+ import matplotlib .pyplot as plt
7+ from IPython .display import display
8+
9+ # Fix for pandas compatibility with older pickle files
10+ mod = types .ModuleType ('pandas.core.indexes.numeric' )
11+ mod .Int64Index = pd .Index
12+ mod .UInt64Index = pd .Index
13+ mod .Float64Index = pd .Index
14+ sys .modules ['pandas.core.indexes.numeric' ] = mod
15+
16+ from mzmlripper import process_mzml_file
17+ # from ms_json import process_mzml_json
18+ # from recursive_ma.ms_tree import process, identify_parents
19+ # from recursive_ma import build_tree, estimate_MA
20+ # from recursive_ma.estimator import MAEstimator
21+ from rdkit import Chem
22+ from rdkit .Chem import Draw
23+ import assemblytheorytools as att
24+
25+ if __name__ == "__main__" :
26+ SMILES = 'COC(=O)C(NC(=O)OC(C)(C)C)P(=O)(OC)OC'
27+ MW = 297.2
28+ MA_REFERENCE = 14
29+ TARGET_PARENT_MZ = 296.26
30+
31+ print (f"Compound: { SMILES } " )
32+ print (f"Molecular Weight: { MW } " )
33+ print (f"Reference MA: { MA_REFERENCE } " )
34+ print (f"Target Parent m/z: { TARGET_PARENT_MZ } " )
35+
36+ mol = Chem .MolFromSmiles (SMILES )
37+ display (Draw .MolToImage (mol , size = (320 , 220 )))
38+
39+ # Parse the mzML file
40+ mzml_file = Path ('Sample_#15_Stepped_MS3.mzML' )
41+ output_dir = Path ('temp_mzml_output' )
42+ output_dir .mkdir (exist_ok = True )
43+
44+ process_mzml_file (
45+ filename = str (mzml_file ),
46+ out_dir = str (output_dir ),
47+ rt_units = 'min' ,
48+ int_threshold = 1000 ,
49+ relative = False
50+ )
51+
52+ json_file = output_dir / f"ripper_{ mzml_file .stem } .json"
53+
54+ # # Process JSON to structured DataFrames
55+ # print("Converting JSON to DataFrames...")
56+ # raw_data = process_mzml_json(json_file)
57+ #
58+ # print(f"✓ MS levels: {list(raw_data.keys())}")
59+ # for level, df in raw_data.items():
60+ # print(f" MS{level}: {len(df):,} peaks")
61+ #
62+ # # Processing parameters (matching original pickle files)
63+ # MIN_REL_INTENSITY = 0.01
64+ # MAX_NUM_PEAKS = 20
65+ # MASS_TOL = 0.05
66+ # MS_N_DIGITS = 3
67+ # MIN_ABS_INTENSITY = {
68+ # 1: 10 ** 6,
69+ # 2: 10 ** 4,
70+ # 3: 10 ** 3
71+ # }
72+ # processed_data = process(
73+ # sample=raw_data,
74+ # max_num_peaks=MAX_NUM_PEAKS,
75+ # min_abs_intensity=MIN_ABS_INTENSITY,
76+ # min_rel_intensity=MIN_REL_INTENSITY,
77+ # n_digits=MS_N_DIGITS
78+ # )
79+ #
80+ # for level, df in processed_data.items():
81+ # print(f" MS{level}: {len(df):,} peaks (filtered from {len(raw_data[level]):,})")
82+ #
83+ # # Link parent-child relationships
84+ # rooted_data = identify_parents(
85+ # processed_data,
86+ # mass_tol=MASS_TOL,
87+ # ms_n_digits=MS_N_DIGITS
88+ # )
89+ #
90+ # # Build tree structure
91+ # tree = build_tree(rooted_data, max_level=3)
92+ #
93+ # # Filter out MS1 parents with no fragments (empty entries)
94+ # tree = {parent: children for parent, children in tree.items() if children}
95+ #
96+ # # Find the parent closest to our target
97+ # parent_mz = None
98+ # for mz in tree.keys():
99+ # if abs(mz - TARGET_PARENT_MZ) < 0.01:
100+ # parent_mz = mz
101+ # break
102+ #
103+ # if parent_mz is None:
104+ # parent_mz = list(tree.keys())[0]
105+ #
106+ # n_fragments = len(tree[parent_mz]) if parent_mz in tree else 0
107+ #
108+ # print(f"✓ Tree created: {len(tree)} parent(s) with MS2 fragments")
109+ # print(f" Parent m/z: {parent_mz:.4f} ({n_fragments} MS2 fragments)")
110+ #
111+ # # Plot MS2 spectrum - showing all fragments after processing (that go into the tree)
112+ # ms2_processed = processed_data[2]
113+ # parent_data = ms2_processed[ms2_processed['parent'].between(parent_mz - 0.01, parent_mz + 0.01)]
114+ # plt.figure(figsize=(10, 4))
115+ # if len(parent_data) > 0:
116+ # plot_df = parent_data.sort_values('mz')
117+ # plt.vlines(plot_df['mz'], 0, plot_df['intensity'], color='black', linewidth=1.5)
118+ # print(f"Plotting all {len(plot_df)} processed MS2 fragments")
119+ # else:
120+ # fragments = sorted(tree[parent_mz].keys())
121+ # plt.vlines(fragments, 0, 1, color='black', linewidth=1.5)
122+ # print(f"Plotting {len(fragments)} MS2 fragments from tree")
123+ # plt.axhline(y=0, color='gray', linewidth=1)
124+ # plt.xlabel('MS2 m/z', fontsize=11)
125+ # plt.ylabel('Intensity', fontsize=11)
126+ # plt.title(f'Sample #15 MS2 Spectrum - Processed Fragments (parent m/z {parent_mz:.2f})', fontsize=12,
127+ # fontweight='bold')
128+ # plt.xlim(0, max(plot_df['mz']) + 20 if len(plot_df) > 0 else 300)
129+ # plt.grid(alpha=0.3)
130+ # plt.tight_layout()
131+ # plt.show()
132+ #
133+ # # Initialize MA estimator
134+ # est = MAEstimator(same_level=True, n_samples=300, tol=0.05)
135+ # # First approximation (mass-only)
136+ # ma_first = float(est.estimate_by_MW(parent_mz, has_children=False).mean())
137+ # # Recursive MA (fragment-informed)
138+ # old_stdout = sys.stdout
139+ # sys.stdout = buffer = io.StringIO()
140+ # ma_recursive = float(est.estimate_MA(tree, parent_mz, progress_levels=2).mean())
141+ # sys.stdout = old_stdout
142+ # # Extract diagnostic information
143+ # interesting = [
144+ # line
145+ # for line in buffer.getvalue().split("\n")
146+ # if "Common precursors" in line or "HIT:" in line
147+ # ]
148+ #
149+ # if interesting:
150+ # print("Numbers in [brackets] are shared sub-fragments from both MS2 fragments.")
151+ # print("Common precursors found:")
152+ # import re
153+ #
154+ # for line in interesting:
155+ # formatted_line = re.sub(r"(\d+\.\d{3})\d+", r"\1", line)
156+ # print(f" {formatted_line}")
157+ #
158+ # print(f"Parent m/z: {parent_mz:.4f}")
159+ # print(f"First approximation (mass-only): {ma_first:.2f} Da")
160+ # print(f"Recursive MA (fragment-informed): {ma_recursive:.2f} Da")
161+ # print(f"Reference MA (known value): {MA_REFERENCE} Da")
162+ # print(tree)
0 commit comments