Skip to content

Commit fc27475

Browse files
committed
pass activation energies dictionary copy instead of modifying
1 parent 90405c7 commit fc27475

File tree

2 files changed

+129
-95
lines changed

2 files changed

+129
-95
lines changed

hdxrate/hdxrate.py

Lines changed: 127 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,21 @@
2222

2323
R = 1.987
2424

25+
2526
# Activation energies (cal/mol)
2627
E_act = {
27-
'acid': 14000.,
28-
'base': 17000.,
29-
'water': 19000.,
30-
'D': 1000.,
31-
'E': 1083.,
32-
'H': 7500.
28+
"acid": 14000.0,
29+
"base": 17000.0,
30+
"water": 19000.0,
31+
"D": 1000.0,
32+
"E": 1083.0,
33+
"H": 7500.0,
3334
}
3435

35-
D_E_act = {
36-
'D_HD' : 1000.,
37-
'D_DH' : 1000. - 40,
38-
'D_HH' : 1000. - 40
39-
}
36+
D_E_act = {"D_HD": 1000.0, "D_DH": 1000.0 - 40, "D_HH": 1000.0 - 40}
37+
4038

41-
def get_side_chain_dictionary(temperature, pH, k_reference):
39+
def get_side_chain_dictionary(temperature, pH, k_reference, activation_energy):
4240
"""
4341
Returns a dictionary with inductive effects of side chains on H/D exchange rates.
4442
@@ -74,61 +72,94 @@ def get_side_chain_dictionary(temperature, pH, k_reference):
7472
"""
7573

7674
root_dir = Path(__file__).parent
77-
names = ['name', 'short_name', 'acid_lambda', 'acid_rho', 'base_lambda', 'base_rho']
78-
side_chain_array = np.genfromtxt(root_dir / 'constants.txt', comments='#', skip_header=2, delimiter='\t', dtype=None,
79-
names=names, encoding=None, autostrip=True)
80-
81-
side_chain_dict = {elem['short_name']: np.array(list(elem)[2:]) for elem in side_chain_array}
82-
for residue in ['D', 'E', 'H']: # residues D, E, H are calculated based on pH and pKa
83-
k_corrected = -np.log10(10**-k_reference[residue] * np.exp(-E_act[residue] * (1 / temperature - 1 / 278) / R)) # Check correct reference temperature
84-
85-
deprotenated = side_chain_dict[residue + '0']
86-
protenated = side_chain_dict[residue + '+']
87-
88-
values = np.log10(np.divide(10 ** (protenated - pH) + 10 ** (deprotenated - k_corrected),
89-
10 ** -k_corrected + 10 ** -pH))
75+
names = ["name", "short_name", "acid_lambda", "acid_rho", "base_lambda", "base_rho"]
76+
side_chain_array = np.genfromtxt(
77+
root_dir / "constants.txt",
78+
comments="#",
79+
skip_header=2,
80+
delimiter="\t",
81+
dtype=None,
82+
names=names,
83+
encoding=None,
84+
autostrip=True,
85+
)
86+
87+
side_chain_dict = {
88+
elem["short_name"]: np.array(list(elem)[2:]) for elem in side_chain_array
89+
}
90+
for residue in [
91+
"D",
92+
"E",
93+
"H",
94+
]: # residues D, E, H are calculated based on pH and pKa
95+
k_corrected = -np.log10(
96+
10 ** -k_reference[residue]
97+
* np.exp(-activation_energy[residue] * (1 / temperature - 1 / 278) / R)
98+
) # Check correct reference temperature
99+
100+
deprotenated = side_chain_dict[residue + "0"]
101+
protenated = side_chain_dict[residue + "+"]
102+
103+
values = np.log10(
104+
np.divide(
105+
10 ** (protenated - pH) + 10 ** (deprotenated - k_corrected),
106+
10**-k_corrected + 10**-pH,
107+
)
108+
)
90109
side_chain_dict[residue] = values
91-
if residue == 'E':
92-
side_chain_dict['CT'][0] = np.log10(np.divide(10 ** (0.05 - pH) + 10 ** (0.96 - k_corrected),
93-
10 ** -k_corrected + 10 ** -pH))
110+
if residue == "E":
111+
side_chain_dict["CT"][0] = np.log10(
112+
np.divide(
113+
10 ** (0.05 - pH) + 10 ** (0.96 - k_corrected),
114+
10**-k_corrected + 10**-pH,
115+
)
116+
)
94117

95118
return side_chain_dict
96119

97120

98-
def correct_pH(pH_read, d_percentage=100.):
121+
def correct_pH(pH_read, d_percentage=100.0):
122+
"""
123+
Correct for pH as described in Nguyen et al, 2018[1]_.
124+
This adds 0.4 to the pH value multiplied by the deuteration percentage.
125+
126+
Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
127+
128+
Parameters
129+
----------
130+
pH_read: :obj:`float`
131+
pH value of the solution as read by a standard glass electrode pH meter.
132+
d_percentage: :obj:`float`
133+
Percentage of deuterium in the solution.
134+
135+
Returns
136+
-------
137+
pH_corrected : :obj:`float`
138+
Corrected pH value (pD)
139+
140+
References
141+
----------
142+
143+
.. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
144+
Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
145+
.. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
146+
Anal. Methods 9, 2744–2750 (2017).
99147
"""
100-
Correct for pH as described in Nguyen et al, 2018[1]_.
101-
This adds 0.4 to the pH value multiplied by the deuteration percentage.
102-
103-
Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
104-
105-
Parameters
106-
----------
107-
pH_read: :obj:`float`
108-
pH value of the solution as read by a standard glass electrode pH meter.
109-
d_percentage: :obj:`float`
110-
Percentage of deuterium in the solution.
111-
112-
Returns
113-
-------
114-
pH_corrected : :obj:`float`
115-
Corrected pH value (pD)
116-
117-
References
118-
----------
119-
120-
.. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
121-
Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
122-
.. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
123-
Anal. Methods 9, 2744–2750 (2017).
124-
"""
125148

126149
pH_corrected = pH_read + 0.4 * d_percentage / 100
127150
return pH_corrected
128151

129152

130-
def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchange_type='HD',
131-
d_percentage=100., ph_correction=True, wildcard='X'):
153+
def k_int_from_sequence(
154+
sequence,
155+
temperature,
156+
pH_read,
157+
reference="poly",
158+
exchange_type="HD",
159+
d_percentage=100.0,
160+
ph_correction=True,
161+
wildcard="X",
162+
):
132163
"""
133164
Calculated intrisic rates of exchange for amide hydrogens in proteins.
134165
@@ -176,56 +207,59 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
176207
Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
177208
"""
178209

179-
if len(sequence) <3:
180-
raise ValueError('Sequence needs a minimum length of 3')
181-
if exchange_type not in ['HD', 'DH', 'HH']:
210+
if len(sequence) < 3:
211+
raise ValueError("Sequence needs a minimum length of 3")
212+
if exchange_type not in ["HD", "DH", "HH"]:
182213
raise ValueError(f"Unsupported exchange type '{exchange_type}'")
183214

184-
if exchange_type == 'HD':
215+
activation_energy = E_act.copy()
216+
if exchange_type == "HD":
185217
exponents = np.array([1.62, 10.18, -1.5])
186218
pD = correct_pH(pH_read, d_percentage) if ph_correction else pH_read
187219
pKD = 15.05
188-
k_reference = {'D': 4.48, 'E': 4.93, 'H': 7.42} # HD
189-
E_act['D'] = D_E_act['D_HD']
190-
elif exchange_type == 'DH':
191-
exponents = np.array([1.4, 10., -1.6])
220+
k_reference = {"D": 4.48, "E": 4.93, "H": 7.42} # HD
221+
activation_energy["D"] = D_E_act["D_HD"]
222+
elif exchange_type == "DH":
223+
exponents = np.array([1.4, 10.0, -1.6])
192224
pD = pH_read
193225
pKD = 14.17
194-
E_act['D'] = D_E_act['D_DH']
195-
k_reference = {'D': 3.87, 'E': 4.33, 'H': 7.0} #DH
196-
elif exchange_type == 'HH':
226+
k_reference = {"D": 3.87, "E": 4.33, "H": 7.0} # DH
227+
activation_energy["D"] = D_E_act["D_DH"]
228+
elif exchange_type == "HH":
197229
exponents = np.array([1.39, 10.08, -1.6])
198230
pD = pH_read
199231
pKD = 14.17
200-
E_act['D'] = D_E_act['D_HH']
201-
k_reference = {'D': 3.88, 'E': 4.35, 'H': 7.11} #HH
232+
k_reference = {"D": 3.88, "E": 4.35, "H": 7.11} # HH
233+
activation_energy["D"] = D_E_act["D_HH"]
202234

203-
conc_D = 10. ** -pD
204-
conc_OD = 10. ** (pD - pKD)
235+
conc_D = 10.0**-pD
236+
conc_OD = 10.0 ** (pD - pKD)
205237

206-
k_values = (10 ** exponents) / 60
238+
k_values = (10**exponents) / 60
207239
oligo_factors = [2.34, 1.35, 1.585]
208-
if reference == 'poly':
240+
if reference == "poly":
209241
k_acid_ref, k_base_ref, k_water_ref = k_values
210-
elif reference == 'oligo':
242+
elif reference == "oligo":
211243
k_acid_ref, k_base_ref, k_water_ref = k_values * oligo_factors
212244
else:
213245
raise ValueError("Value for 'reference' mush be either 'poly' or 'oligo'")
214246

215247
sequence = list(sequence)
216-
sequence.insert(0, 'NT')
217-
sequence.append('CT')
248+
sequence.insert(0, "NT")
249+
sequence.append("CT")
218250

219251
# Rates without inductive effects from neighbours, corrected for temperature
220-
k_acid = k_acid_ref * np.exp(-E_act['acid'] * (1 / temperature - 1 / 293) / R)
221-
k_base = k_base_ref * np.exp(-E_act['base'] * (1 / temperature - 1 / 293) / R)
222-
k_water = k_water_ref * np.exp(-E_act['water'] * (1 / temperature - 1 / 293) / R)
252+
k_acid = k_acid_ref * np.exp(-E_act["acid"] * (1 / temperature - 1 / 293) / R)
253+
k_base = k_base_ref * np.exp(-E_act["base"] * (1 / temperature - 1 / 293) / R)
254+
k_water = k_water_ref * np.exp(-E_act["water"] * (1 / temperature - 1 / 293) / R)
223255

224-
side_chain_dict = get_side_chain_dictionary(temperature, pD, k_reference)
256+
side_chain_dict = get_side_chain_dictionary(
257+
temperature, pD, k_reference, activation_energy
258+
)
225259

226260
k_int = []
227261
for i, residue in enumerate(sequence):
228-
if residue == 'NT':
262+
if residue == "NT":
229263
continue
230264
elif i == 1: # First residue
231265
k_int.append(np.inf)
@@ -234,9 +268,9 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
234268
next_residue = sequence[i + 1]
235269
prev_residue = sequence[i - 1]
236270
# Proline or unknown residues are set to zero rate
237-
if residue in ['P', 'Pc'] or wildcard in [prev_residue, residue]:
238-
k_int.append(0.)
239-
if next_residue == 'CT':
271+
if residue in ["P", "Pc"] or wildcard in [prev_residue, residue]:
272+
k_int.append(0.0)
273+
if next_residue == "CT":
240274
break
241275
else:
242276
continue
@@ -245,29 +279,29 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
245279
_, prev_rho_acid, _, prev_rho_base = side_chain_dict[prev_residue]
246280
curr_lambda_acid, _, curr_lambda_base, _ = side_chain_dict[residue]
247281

248-
if next_residue == 'CT':
249-
cterm_acid = side_chain_dict['CT'][0]
250-
cterm_base = side_chain_dict['CT'][2]
282+
if next_residue == "CT":
283+
cterm_acid = side_chain_dict["CT"][0]
284+
cterm_base = side_chain_dict["CT"][2]
251285

252286
Fa = 10 ** (prev_rho_acid + curr_lambda_acid + cterm_acid)
253287
Fb = 10 ** (curr_lambda_base + prev_rho_base + cterm_base)
254288
elif i == 2: # Second residue in the chain (starts at 1)
255-
nterm_acid = side_chain_dict['NT'][1]
256-
nterm_base = side_chain_dict['NT'][3]
289+
nterm_acid = side_chain_dict["NT"][1]
290+
nterm_base = side_chain_dict["NT"][3]
257291

258292
Fa = 10 ** (prev_rho_acid + curr_lambda_acid + nterm_acid)
259293
Fb = 10 ** (curr_lambda_base + prev_rho_base + nterm_base)
260294
else:
261295
Fa = 10 ** (prev_rho_acid + curr_lambda_acid)
262296
Fb = 10 ** (curr_lambda_base + prev_rho_base)
263297

264-
k_total_acid = Fa*k_acid*conc_D
265-
k_total_base = Fb*k_base*conc_OD
266-
k_total_water = Fb*k_water
298+
k_total_acid = Fa * k_acid * conc_D
299+
k_total_base = Fb * k_base * conc_OD
300+
k_total_water = Fb * k_water
267301

268302
k_int.append(k_total_acid + k_total_base + k_total_water)
269303

270-
if next_residue == 'CT':
304+
if next_residue == "CT":
271305
break
272306

273307
return np.array(k_int)

tests/test_hdxrate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44
from hdxrate import k_int_from_sequence
5-
from hdxrate.hdxrate import get_side_chain_dictionary
5+
from hdxrate.hdxrate import get_side_chain_dictionary, E_act
66
from pathlib import Path
77
from functools import reduce
88
from itertools import combinations
@@ -22,7 +22,7 @@ def seq1():
2222
def seq2():
2323
"""sequence two a sequence of the pairwise combination of all side chains"""
2424
k_reference = {"D": 3.87, "E": 4.33, "H": 7.0} # DH
25-
chains_dict = get_side_chain_dictionary(278, 8, k_reference)
25+
chains_dict = get_side_chain_dictionary(278, 8, k_reference, E_act)
2626
one_letter = [k for k in chains_dict.keys() if len(k) == 1]
2727
seq2 = reduce(add, [a + b for a, b in combinations(one_letter, 2)])
2828

0 commit comments

Comments
 (0)