Skip to content

Commit 91b55d0

Browse files
committed
chore: clean up template file
1 parent d889088 commit 91b55d0

File tree

1 file changed

+4
-154
lines changed

1 file changed

+4
-154
lines changed

templates/01_load_secb_data.py

Lines changed: 4 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
"""Load a HDX-MS dataset (peptide list with D-uptake per peptide, csv format)"""
22
# %%
33
from pathlib import Path
4+
45
import numpy as np
5-
from pyhdx import read_dynamx, HDXMeasurement
6-
from pyhdx.process import apply_control, correct_d_uptake
6+
7+
from pyhdx import HDXMeasurement, read_dynamx
78
from pyhdx.datasets import filter_peptides
9+
from pyhdx.process import apply_control, correct_d_uptake
810

911
# %%
1012

@@ -41,155 +43,3 @@
4143

4244
# Create HDX Measurement object with addtional experimental metadata (sequence, pH, temperature)
4345
hdxm = HDXMeasurement(peptides_corrected, sequence=sequence, pH=pH, temperature=temperature)
44-
45-
#%%
46-
data = hdxm.data
47-
data['id'] = data['start'].apply(str) + '_' + data['end'].apply(str)
48-
peptides = data['id'].unique()
49-
peptides
50-
#%%
51-
52-
peptide = peptides[15]
53-
peptide = '118_125'
54-
55-
df_f = data[data['id'] == peptide]
56-
row_0 = df_f.iloc[0]
57-
start, end = row_0['start'], row_0['end']
58-
start, end
59-
60-
df_f
61-
62-
#%%
63-
from hdxrate import k_int_from_sequence
64-
seq = df_f['_sequence'].iloc[0]
65-
66-
k_int_from_sequence(seq, temperature, pH, d_percentage=90)
67-
68-
exposure = np.array(df_f['exposure'])[1:]
69-
upt = np.array(df_f['uptake_corrected'])[1:]
70-
71-
if exposure[0] != 0:
72-
exposure = np.insert(exposure, 0, 0)
73-
upt = np.insert(upt, 0, 0)
74-
75-
exposure, upt
76-
77-
#%%
78-
import proplot as pplt
79-
fig, ax = pplt.subplots()
80-
ax.scatter(exposure, upt)
81-
ax.format(xscale='log')
82-
pplt.show()
83-
84-
#%%
85-
peptide_info = hdxm.coverage.protein.loc[start:end]
86-
k_int = peptide_info['k_int'][peptide_info['exchanges']]
87-
k_arr = np.array(k_int)
88-
89-
k_eff = np.exp(np.log(k_arr).mean())
90-
91-
k_int = np.array(peptide_info['k_int'])
92-
k_int
93-
94-
# TODO i need the inverse of this function to find the time difference; then take the log of it
95-
d0 = 1 - np.exp(np.divide(-k_int[:, np.newaxis]*exposure[np.newaxis, :], 2))
96-
d0
97-
98-
#%%
99-
np.log10(10)
100-
101-
#%%
102-
103-
np.log(2.72)
104-
105-
#%%
106-
107-
hdxm.metadata
108-
109-
#%%
110-
111-
data_wide = data.pivot(index='id', columns='exposure', values='uptake_corrected')
112-
113-
max_d = data[data['id'] == peptide]['ex_residues'].iloc[0]
114-
115-
import proplot as pplt
116-
117-
t_eval = np.linspace(0, 6000*10, 100, endpoint=True)
118-
k = 1e-3
119-
d_eval = max_d * (1 - np.exp(-k*t_eval))
120-
#%%
121-
122-
from scipy.interpolate import PchipInterpolator
123-
124-
s = data_wide.loc[peptide]
125-
x = np.array(s.index)
126-
y = np.array(s)
127-
128-
x = np.append(x, x[-1]*5)
129-
y = np.append(y, y[-1])
130-
131-
from scipy.interpolate import make_interp_spline
132-
133-
bspl = make_interp_spline(x, y, k=2)
134-
135-
136-
#%%
137-
# parameterized spline
138-
p = np.stack((x, y))
139-
140-
u_unif = x
141-
142-
dp = p[:, 1:] - p[:, :-1] # 2-vector distances between points
143-
l = (dp**2).sum(axis=0) # squares of lengths of 2-vectors between points
144-
u_cord = np.sqrt(l).cumsum() # cumulative sums of 2-norms
145-
u_cord = np.r_[0, u_cord] # the first point is parameterized at zero
146-
147-
u_c = np.r_[0, np.cumsum((dp**2).sum(axis=0)**0.25)]
148-
149-
from scipy.interpolate import make_interp_spline
150-
import matplotlib.pyplot as plt
151-
152-
fig, ax = plt.subplots(1, 3, figsize=(8, 3))
153-
parametrizations = ['uniform', 'cord length', 'centripetal']
154-
for j, u in enumerate([u_unif, u_cord, u_c]):
155-
spl = make_interp_spline(u, p, axis=1) # note p is a 2D array
156-
uu = np.linspace(u[0], u[-1], 51)
157-
xx, yy = spl(uu)
158-
ax[j].plot(xx, yy, '--')
159-
ax[j].plot(p[0, :], p[1, :], 'o')
160-
ax[j].set_title(parametrizations[j])
161-
162-
#plt.show()
163-
164-
xx, uu
165-
#%%
166-
167-
spl_p = make_interp_spline(u_c, p, axis=1)
168-
uu = np.linspace(u_c[0], u_c[-1], 51)
169-
xx, yy = spl_p(uu)
170-
171-
spl = PchipInterpolator(x, y)
172-
173-
fig, ax = pplt.subplots()
174-
ax.scatter(data_wide.loc[peptide])
175-
ax.plot(t_eval, d_eval, color='k', ls='--')
176-
ax.plot(t_eval, spl(t_eval), color='g', ls='--')
177-
#ax.plot(t_eval, bspl(t_eval), color='b', ls='--')
178-
ax.plot(xx, yy, color='c', ls='--')
179-
# ax.axhline(max_d, color='r', ls='--')
180-
# ax.format(xscale='log')
181-
ax.format(xlim=(0, 100), ylim=(0, 20))
182-
183-
#%%
184-
exposures = np.array(data_wide.columns)
185-
exposures
186-
187-
#%%
188-
189-
- np.log(1/2), np.log(2)
190-
191-
#%%
192-
hdxm.data_wide.iloc[0].xs('d_uptake')
193-
194-
#%%
195-

0 commit comments

Comments
 (0)