2222
2323R = 1.987
2424
25+
2526# Activation energies (cal/mol)
2627E_act = {
27- ' acid' : 14000. ,
28- ' base' : 17000. ,
29- ' water' : 19000. ,
30- 'D' : 1000. ,
31- 'E' : 1083. ,
32- 'H' : 7500.
28+ " acid" : 14000.0 ,
29+ " base" : 17000.0 ,
30+ " water" : 19000.0 ,
31+ "D" : 1000.0 ,
32+ "E" : 1083.0 ,
33+ "H" : 7500.0 ,
3334}
3435
35- D_E_act = {
36- 'D_HD' : 1000. ,
37- 'D_DH' : 1000. - 40 ,
38- 'D_HH' : 1000. - 40
39- }
36+ D_E_act = {"D_HD" : 1000.0 , "D_DH" : 1000.0 - 40 , "D_HH" : 1000.0 - 40 }
37+
4038
41- def get_side_chain_dictionary (temperature , pH , k_reference ):
39+ def get_side_chain_dictionary (temperature , pH , k_reference , activation_energy ):
4240 """
4341 Returns a dictionary with inductive effects of side chains on H/D exchange rates.
4442
@@ -74,61 +72,94 @@ def get_side_chain_dictionary(temperature, pH, k_reference):
7472 """
7573
7674 root_dir = Path (__file__ ).parent
77- names = ['name' , 'short_name' , 'acid_lambda' , 'acid_rho' , 'base_lambda' , 'base_rho' ]
78- side_chain_array = np .genfromtxt (root_dir / 'constants.txt' , comments = '#' , skip_header = 2 , delimiter = '\t ' , dtype = None ,
79- names = names , encoding = None , autostrip = True )
80-
81- side_chain_dict = {elem ['short_name' ]: np .array (list (elem )[2 :]) for elem in side_chain_array }
82- for residue in ['D' , 'E' , 'H' ]: # residues D, E, H are calculated based on pH and pKa
83- k_corrected = - np .log10 (10 ** - k_reference [residue ] * np .exp (- E_act [residue ] * (1 / temperature - 1 / 278 ) / R )) # Check correct reference temperature
84-
85- deprotenated = side_chain_dict [residue + '0' ]
86- protenated = side_chain_dict [residue + '+' ]
87-
88- values = np .log10 (np .divide (10 ** (protenated - pH ) + 10 ** (deprotenated - k_corrected ),
89- 10 ** - k_corrected + 10 ** - pH ))
75+ names = ["name" , "short_name" , "acid_lambda" , "acid_rho" , "base_lambda" , "base_rho" ]
76+ side_chain_array = np .genfromtxt (
77+ root_dir / "constants.txt" ,
78+ comments = "#" ,
79+ skip_header = 2 ,
80+ delimiter = "\t " ,
81+ dtype = None ,
82+ names = names ,
83+ encoding = None ,
84+ autostrip = True ,
85+ )
86+
87+ side_chain_dict = {
88+ elem ["short_name" ]: np .array (list (elem )[2 :]) for elem in side_chain_array
89+ }
90+ for residue in [
91+ "D" ,
92+ "E" ,
93+ "H" ,
94+ ]: # residues D, E, H are calculated based on pH and pKa
95+ k_corrected = - np .log10 (
96+ 10 ** - k_reference [residue ]
97+ * np .exp (- activation_energy [residue ] * (1 / temperature - 1 / 278 ) / R )
98+ ) # Check correct reference temperature
99+
100+ deprotenated = side_chain_dict [residue + "0" ]
101+ protenated = side_chain_dict [residue + "+" ]
102+
103+ values = np .log10 (
104+ np .divide (
105+ 10 ** (protenated - pH ) + 10 ** (deprotenated - k_corrected ),
106+ 10 ** - k_corrected + 10 ** - pH ,
107+ )
108+ )
90109 side_chain_dict [residue ] = values
91- if residue == 'E' :
92- side_chain_dict ['CT' ][0 ] = np .log10 (np .divide (10 ** (0.05 - pH ) + 10 ** (0.96 - k_corrected ),
93- 10 ** - k_corrected + 10 ** - pH ))
110+ if residue == "E" :
111+ side_chain_dict ["CT" ][0 ] = np .log10 (
112+ np .divide (
113+ 10 ** (0.05 - pH ) + 10 ** (0.96 - k_corrected ),
114+ 10 ** - k_corrected + 10 ** - pH ,
115+ )
116+ )
94117
95118 return side_chain_dict
96119
97120
98- def correct_pH (pH_read , d_percentage = 100. ):
121+ def correct_pH (pH_read , d_percentage = 100.0 ):
122+ """
123+ Correct for pH as described in Nguyen et al, 2018[1]_.
124+ This adds 0.4 to the pH value multiplied by the deuteration percentage.
125+
126+ Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
127+
128+ Parameters
129+ ----------
130+ pH_read: :obj:`float`
131+ pH value of the solution as read by a standard glass electrode pH meter.
132+ d_percentage: :obj:`float`
133+ Percentage of deuterium in the solution.
134+
135+ Returns
136+ -------
137+ pH_corrected : :obj:`float`
138+ Corrected pH value (pD)
139+
140+ References
141+ ----------
142+
143+ .. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
144+ Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
145+ .. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
146+ Anal. Methods 9, 2744–2750 (2017).
99147 """
100- Correct for pH as described in Nguyen et al, 2018[1]_.
101- This adds 0.4 to the pH value multiplied by the deuteration percentage.
102-
103- Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
104-
105- Parameters
106- ----------
107- pH_read: :obj:`float`
108- pH value of the solution as read by a standard glass electrode pH meter.
109- d_percentage: :obj:`float`
110- Percentage of deuterium in the solution.
111-
112- Returns
113- -------
114- pH_corrected : :obj:`float`
115- Corrected pH value (pD)
116-
117- References
118- ----------
119-
120- .. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
121- Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
122- .. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
123- Anal. Methods 9, 2744–2750 (2017).
124- """
125148
126149 pH_corrected = pH_read + 0.4 * d_percentage / 100
127150 return pH_corrected
128151
129152
130- def k_int_from_sequence (sequence , temperature , pH_read , reference = 'poly' , exchange_type = 'HD' ,
131- d_percentage = 100. , ph_correction = True , wildcard = 'X' ):
153+ def k_int_from_sequence (
154+ sequence ,
155+ temperature ,
156+ pH_read ,
157+ reference = "poly" ,
158+ exchange_type = "HD" ,
159+ d_percentage = 100.0 ,
160+ ph_correction = True ,
161+ wildcard = "X" ,
162+ ):
132163 """
133164 Calculated intrisic rates of exchange for amide hydrogens in proteins.
134165
@@ -176,56 +207,59 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
176207 Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
177208 """
178209
179- if len (sequence ) < 3 :
180- raise ValueError (' Sequence needs a minimum length of 3' )
181- if exchange_type not in ['HD' , 'DH' , 'HH' ]:
210+ if len (sequence ) < 3 :
211+ raise ValueError (" Sequence needs a minimum length of 3" )
212+ if exchange_type not in ["HD" , "DH" , "HH" ]:
182213 raise ValueError (f"Unsupported exchange type '{ exchange_type } '" )
183214
184- if exchange_type == 'HD' :
215+ activation_energy = E_act .copy ()
216+ if exchange_type == "HD" :
185217 exponents = np .array ([1.62 , 10.18 , - 1.5 ])
186218 pD = correct_pH (pH_read , d_percentage ) if ph_correction else pH_read
187219 pKD = 15.05
188- k_reference = {'D' : 4.48 , 'E' : 4.93 , 'H' : 7.42 } # HD
189- E_act [ 'D' ] = D_E_act [' D_HD' ]
190- elif exchange_type == 'DH' :
191- exponents = np .array ([1.4 , 10. , - 1.6 ])
220+ k_reference = {"D" : 4.48 , "E" : 4.93 , "H" : 7.42 } # HD
221+ activation_energy [ "D" ] = D_E_act [" D_HD" ]
222+ elif exchange_type == "DH" :
223+ exponents = np .array ([1.4 , 10.0 , - 1.6 ])
192224 pD = pH_read
193225 pKD = 14.17
194- E_act [ 'D' ] = D_E_act [ 'D_DH' ]
195- k_reference = { 'D' : 3.87 , 'E' : 4.33 , 'H' : 7.0 } #DH
196- elif exchange_type == 'HH' :
226+ k_reference = { "D" : 3.87 , "E" : 4.33 , "H" : 7.0 } # DH
227+ activation_energy [ "D" ] = D_E_act [ "D_DH" ]
228+ elif exchange_type == "HH" :
197229 exponents = np .array ([1.39 , 10.08 , - 1.6 ])
198230 pD = pH_read
199231 pKD = 14.17
200- E_act [ 'D' ] = D_E_act [ 'D_HH' ]
201- k_reference = { 'D' : 3.88 , 'E' : 4.35 , 'H' : 7.11 } #HH
232+ k_reference = { "D" : 3.88 , "E" : 4.35 , "H" : 7.11 } # HH
233+ activation_energy [ "D" ] = D_E_act [ "D_HH" ]
202234
203- conc_D = 10. ** - pD
204- conc_OD = 10. ** (pD - pKD )
235+ conc_D = 10.0 ** - pD
236+ conc_OD = 10.0 ** (pD - pKD )
205237
206- k_values = (10 ** exponents ) / 60
238+ k_values = (10 ** exponents ) / 60
207239 oligo_factors = [2.34 , 1.35 , 1.585 ]
208- if reference == ' poly' :
240+ if reference == " poly" :
209241 k_acid_ref , k_base_ref , k_water_ref = k_values
210- elif reference == ' oligo' :
242+ elif reference == " oligo" :
211243 k_acid_ref , k_base_ref , k_water_ref = k_values * oligo_factors
212244 else :
213245 raise ValueError ("Value for 'reference' mush be either 'poly' or 'oligo'" )
214246
215247 sequence = list (sequence )
216- sequence .insert (0 , 'NT' )
217- sequence .append ('CT' )
248+ sequence .insert (0 , "NT" )
249+ sequence .append ("CT" )
218250
219251 # Rates without inductive effects from neighbours, corrected for temperature
220- k_acid = k_acid_ref * np .exp (- E_act [' acid' ] * (1 / temperature - 1 / 293 ) / R )
221- k_base = k_base_ref * np .exp (- E_act [' base' ] * (1 / temperature - 1 / 293 ) / R )
222- k_water = k_water_ref * np .exp (- E_act [' water' ] * (1 / temperature - 1 / 293 ) / R )
252+ k_acid = k_acid_ref * np .exp (- E_act [" acid" ] * (1 / temperature - 1 / 293 ) / R )
253+ k_base = k_base_ref * np .exp (- E_act [" base" ] * (1 / temperature - 1 / 293 ) / R )
254+ k_water = k_water_ref * np .exp (- E_act [" water" ] * (1 / temperature - 1 / 293 ) / R )
223255
224- side_chain_dict = get_side_chain_dictionary (temperature , pD , k_reference )
256+ side_chain_dict = get_side_chain_dictionary (
257+ temperature , pD , k_reference , activation_energy
258+ )
225259
226260 k_int = []
227261 for i , residue in enumerate (sequence ):
228- if residue == 'NT' :
262+ if residue == "NT" :
229263 continue
230264 elif i == 1 : # First residue
231265 k_int .append (np .inf )
@@ -234,9 +268,9 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
234268 next_residue = sequence [i + 1 ]
235269 prev_residue = sequence [i - 1 ]
236270 # Proline or unknown residues are set to zero rate
237- if residue in ['P' , 'Pc' ] or wildcard in [prev_residue , residue ]:
238- k_int .append (0. )
239- if next_residue == 'CT' :
271+ if residue in ["P" , "Pc" ] or wildcard in [prev_residue , residue ]:
272+ k_int .append (0.0 )
273+ if next_residue == "CT" :
240274 break
241275 else :
242276 continue
@@ -245,29 +279,29 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
245279 _ , prev_rho_acid , _ , prev_rho_base = side_chain_dict [prev_residue ]
246280 curr_lambda_acid , _ , curr_lambda_base , _ = side_chain_dict [residue ]
247281
248- if next_residue == 'CT' :
249- cterm_acid = side_chain_dict ['CT' ][0 ]
250- cterm_base = side_chain_dict ['CT' ][2 ]
282+ if next_residue == "CT" :
283+ cterm_acid = side_chain_dict ["CT" ][0 ]
284+ cterm_base = side_chain_dict ["CT" ][2 ]
251285
252286 Fa = 10 ** (prev_rho_acid + curr_lambda_acid + cterm_acid )
253287 Fb = 10 ** (curr_lambda_base + prev_rho_base + cterm_base )
254288 elif i == 2 : # Second residue in the chain (starts at 1)
255- nterm_acid = side_chain_dict ['NT' ][1 ]
256- nterm_base = side_chain_dict ['NT' ][3 ]
289+ nterm_acid = side_chain_dict ["NT" ][1 ]
290+ nterm_base = side_chain_dict ["NT" ][3 ]
257291
258292 Fa = 10 ** (prev_rho_acid + curr_lambda_acid + nterm_acid )
259293 Fb = 10 ** (curr_lambda_base + prev_rho_base + nterm_base )
260294 else :
261295 Fa = 10 ** (prev_rho_acid + curr_lambda_acid )
262296 Fb = 10 ** (curr_lambda_base + prev_rho_base )
263297
264- k_total_acid = Fa * k_acid * conc_D
265- k_total_base = Fb * k_base * conc_OD
266- k_total_water = Fb * k_water
298+ k_total_acid = Fa * k_acid * conc_D
299+ k_total_base = Fb * k_base * conc_OD
300+ k_total_water = Fb * k_water
267301
268302 k_int .append (k_total_acid + k_total_base + k_total_water )
269303
270- if next_residue == 'CT' :
304+ if next_residue == "CT" :
271305 break
272306
273307 return np .array (k_int )
0 commit comments