|
1 | 1 | """Load a HDX-MS dataset (peptide list with D-uptake per peptide, csv format)""" |
2 | 2 | # %% |
3 | 3 | from pathlib import Path |
| 4 | + |
4 | 5 | import numpy as np |
5 | | -from pyhdx import read_dynamx, HDXMeasurement |
6 | | -from pyhdx.process import apply_control, correct_d_uptake |
| 6 | + |
| 7 | +from pyhdx import HDXMeasurement, read_dynamx |
7 | 8 | from pyhdx.datasets import filter_peptides |
| 9 | +from pyhdx.process import apply_control, correct_d_uptake |
8 | 10 |
|
9 | 11 | # %% |
10 | 12 |
|
|
41 | 43 |
|
42 | 44 | # Create HDX Measurement object with addtional experimental metadata (sequence, pH, temperature) |
43 | 45 | hdxm = HDXMeasurement(peptides_corrected, sequence=sequence, pH=pH, temperature=temperature) |
44 | | - |
45 | | -#%% |
46 | | -data = hdxm.data |
47 | | -data['id'] = data['start'].apply(str) + '_' + data['end'].apply(str) |
48 | | -peptides = data['id'].unique() |
49 | | -peptides |
50 | | -#%% |
51 | | - |
52 | | -peptide = peptides[15] |
53 | | -peptide = '118_125' |
54 | | - |
55 | | -df_f = data[data['id'] == peptide] |
56 | | -row_0 = df_f.iloc[0] |
57 | | -start, end = row_0['start'], row_0['end'] |
58 | | -start, end |
59 | | - |
60 | | -df_f |
61 | | - |
62 | | -#%% |
63 | | -from hdxrate import k_int_from_sequence |
64 | | -seq = df_f['_sequence'].iloc[0] |
65 | | - |
66 | | -k_int_from_sequence(seq, temperature, pH, d_percentage=90) |
67 | | - |
68 | | -exposure = np.array(df_f['exposure'])[1:] |
69 | | -upt = np.array(df_f['uptake_corrected'])[1:] |
70 | | - |
71 | | -if exposure[0] != 0: |
72 | | - exposure = np.insert(exposure, 0, 0) |
73 | | - upt = np.insert(upt, 0, 0) |
74 | | - |
75 | | -exposure, upt |
76 | | - |
77 | | -#%% |
78 | | -import proplot as pplt |
79 | | -fig, ax = pplt.subplots() |
80 | | -ax.scatter(exposure, upt) |
81 | | -ax.format(xscale='log') |
82 | | -pplt.show() |
83 | | - |
84 | | -#%% |
85 | | -peptide_info = hdxm.coverage.protein.loc[start:end] |
86 | | -k_int = peptide_info['k_int'][peptide_info['exchanges']] |
87 | | -k_arr = np.array(k_int) |
88 | | - |
89 | | -k_eff = np.exp(np.log(k_arr).mean()) |
90 | | - |
91 | | -k_int = np.array(peptide_info['k_int']) |
92 | | -k_int |
93 | | - |
94 | | -# TODO i need the inverse of this function to find the time difference; then take the log of it |
95 | | -d0 = 1 - np.exp(np.divide(-k_int[:, np.newaxis]*exposure[np.newaxis, :], 2)) |
96 | | -d0 |
97 | | - |
98 | | -#%% |
99 | | -np.log10(10) |
100 | | - |
101 | | -#%% |
102 | | - |
103 | | -np.log(2.72) |
104 | | - |
105 | | -#%% |
106 | | - |
107 | | -hdxm.metadata |
108 | | - |
109 | | -#%% |
110 | | - |
111 | | -data_wide = data.pivot(index='id', columns='exposure', values='uptake_corrected') |
112 | | - |
113 | | -max_d = data[data['id'] == peptide]['ex_residues'].iloc[0] |
114 | | - |
115 | | -import proplot as pplt |
116 | | - |
117 | | -t_eval = np.linspace(0, 6000*10, 100, endpoint=True) |
118 | | -k = 1e-3 |
119 | | -d_eval = max_d * (1 - np.exp(-k*t_eval)) |
120 | | -#%% |
121 | | - |
122 | | -from scipy.interpolate import PchipInterpolator |
123 | | - |
124 | | -s = data_wide.loc[peptide] |
125 | | -x = np.array(s.index) |
126 | | -y = np.array(s) |
127 | | - |
128 | | -x = np.append(x, x[-1]*5) |
129 | | -y = np.append(y, y[-1]) |
130 | | - |
131 | | -from scipy.interpolate import make_interp_spline |
132 | | - |
133 | | -bspl = make_interp_spline(x, y, k=2) |
134 | | - |
135 | | - |
136 | | -#%% |
137 | | -# parameterized spline |
138 | | -p = np.stack((x, y)) |
139 | | - |
140 | | -u_unif = x |
141 | | - |
142 | | -dp = p[:, 1:] - p[:, :-1] # 2-vector distances between points |
143 | | -l = (dp**2).sum(axis=0) # squares of lengths of 2-vectors between points |
144 | | -u_cord = np.sqrt(l).cumsum() # cumulative sums of 2-norms |
145 | | -u_cord = np.r_[0, u_cord] # the first point is parameterized at zero |
146 | | - |
147 | | -u_c = np.r_[0, np.cumsum((dp**2).sum(axis=0)**0.25)] |
148 | | - |
149 | | -from scipy.interpolate import make_interp_spline |
150 | | -import matplotlib.pyplot as plt |
151 | | - |
152 | | -fig, ax = plt.subplots(1, 3, figsize=(8, 3)) |
153 | | -parametrizations = ['uniform', 'cord length', 'centripetal'] |
154 | | -for j, u in enumerate([u_unif, u_cord, u_c]): |
155 | | - spl = make_interp_spline(u, p, axis=1) # note p is a 2D array |
156 | | - uu = np.linspace(u[0], u[-1], 51) |
157 | | - xx, yy = spl(uu) |
158 | | - ax[j].plot(xx, yy, '--') |
159 | | - ax[j].plot(p[0, :], p[1, :], 'o') |
160 | | - ax[j].set_title(parametrizations[j]) |
161 | | - |
162 | | -#plt.show() |
163 | | - |
164 | | -xx, uu |
165 | | -#%% |
166 | | - |
167 | | -spl_p = make_interp_spline(u_c, p, axis=1) |
168 | | -uu = np.linspace(u_c[0], u_c[-1], 51) |
169 | | -xx, yy = spl_p(uu) |
170 | | - |
171 | | -spl = PchipInterpolator(x, y) |
172 | | - |
173 | | -fig, ax = pplt.subplots() |
174 | | -ax.scatter(data_wide.loc[peptide]) |
175 | | -ax.plot(t_eval, d_eval, color='k', ls='--') |
176 | | -ax.plot(t_eval, spl(t_eval), color='g', ls='--') |
177 | | -#ax.plot(t_eval, bspl(t_eval), color='b', ls='--') |
178 | | -ax.plot(xx, yy, color='c', ls='--') |
179 | | -# ax.axhline(max_d, color='r', ls='--') |
180 | | -# ax.format(xscale='log') |
181 | | -ax.format(xlim=(0, 100), ylim=(0, 20)) |
182 | | - |
183 | | -#%% |
184 | | -exposures = np.array(data_wide.columns) |
185 | | -exposures |
186 | | - |
187 | | -#%% |
188 | | - |
189 | | -- np.log(1/2), np.log(2) |
190 | | - |
191 | | -#%% |
192 | | -hdxm.data_wide.iloc[0].xs('d_uptake') |
193 | | - |
194 | | -#%% |
195 | | - |
0 commit comments