pass activation energies dictionary copy instead of modifying

Jhsmit · Jhsmit · commit fc27475e1712 · 2025-06-27T12:02:54.000+02:00
diff --git a/hdxrate/hdxrate.py b/hdxrate/hdxrate.py
@@ -22,23 +22,21 @@
 
 R = 1.987
 
+
 # Activation energies (cal/mol)
 E_act = {
-    'acid': 14000.,
-    'base': 17000.,
-    'water': 19000.,
-    'D': 1000.,
-    'E': 1083.,
-    'H': 7500.
+    "acid": 14000.0,
+    "base": 17000.0,
+    "water": 19000.0,
+    "D": 1000.0,
+    "E": 1083.0,
+    "H": 7500.0,
 }
 
-D_E_act = {
-    'D_HD' : 1000.,
-    'D_DH' : 1000. - 40,
-    'D_HH' : 1000. - 40
-}
+D_E_act = {"D_HD": 1000.0, "D_DH": 1000.0 - 40, "D_HH": 1000.0 - 40}
+
 
-def get_side_chain_dictionary(temperature, pH, k_reference):
+def get_side_chain_dictionary(temperature, pH, k_reference, activation_energy):
     """
     Returns a dictionary with inductive effects of side chains on H/D exchange rates.
 
@@ -74,61 +72,94 @@ def get_side_chain_dictionary(temperature, pH, k_reference):
     """
 
     root_dir = Path(__file__).parent
-    names = ['name', 'short_name', 'acid_lambda', 'acid_rho', 'base_lambda', 'base_rho']
-    side_chain_array = np.genfromtxt(root_dir / 'constants.txt', comments='#', skip_header=2, delimiter='\t', dtype=None,
-                                     names=names, encoding=None, autostrip=True)
-
-    side_chain_dict = {elem['short_name']: np.array(list(elem)[2:]) for elem in side_chain_array}
-    for residue in ['D', 'E', 'H']: # residues D, E, H are calculated based on pH and pKa
-        k_corrected = -np.log10(10**-k_reference[residue] * np.exp(-E_act[residue] * (1 / temperature - 1 / 278) / R)) # Check correct reference temperature
-
-        deprotenated = side_chain_dict[residue + '0']
-        protenated = side_chain_dict[residue + '+']
-
-        values = np.log10(np.divide(10 ** (protenated - pH) + 10 ** (deprotenated - k_corrected),
-                                    10 ** -k_corrected + 10 ** -pH))
+    names = ["name", "short_name", "acid_lambda", "acid_rho", "base_lambda", "base_rho"]
+    side_chain_array = np.genfromtxt(
+        root_dir / "constants.txt",
+        comments="#",
+        skip_header=2,
+        delimiter="\t",
+        dtype=None,
+        names=names,
+        encoding=None,
+        autostrip=True,
+    )
+
+    side_chain_dict = {
+        elem["short_name"]: np.array(list(elem)[2:]) for elem in side_chain_array
+    }
+    for residue in [
+        "D",
+        "E",
+        "H",
+    ]:  # residues D, E, H are calculated based on pH and pKa
+        k_corrected = -np.log10(
+            10 ** -k_reference[residue]
+            * np.exp(-activation_energy[residue] * (1 / temperature - 1 / 278) / R)
+        )  # Check correct reference temperature
+
+        deprotenated = side_chain_dict[residue + "0"]
+        protenated = side_chain_dict[residue + "+"]
+
+        values = np.log10(
+            np.divide(
+                10 ** (protenated - pH) + 10 ** (deprotenated - k_corrected),
+                10**-k_corrected + 10**-pH,
+            )
+        )
         side_chain_dict[residue] = values
-        if residue == 'E':
-            side_chain_dict['CT'][0] = np.log10(np.divide(10 ** (0.05 - pH) + 10 ** (0.96 - k_corrected),
-                                               10 ** -k_corrected + 10 ** -pH))
+        if residue == "E":
+            side_chain_dict["CT"][0] = np.log10(
+                np.divide(
+                    10 ** (0.05 - pH) + 10 ** (0.96 - k_corrected),
+                    10**-k_corrected + 10**-pH,
+                )
+            )
 
     return side_chain_dict
 
 
-def correct_pH(pH_read, d_percentage=100.):
+def correct_pH(pH_read, d_percentage=100.0):
+    """
+     Correct for pH as described in Nguyen et al, 2018[1]_.
+     This adds 0.4 to the pH value multiplied by the deuteration percentage.
+
+     Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
+
+     Parameters
+     ----------
+     pH_read: :obj:`float`
+         pH value of the solution as read by a standard glass electrode pH meter.
+     d_percentage: :obj:`float`
+         Percentage of deuterium in the solution.
+
+     Returns
+     -------
+     pH_corrected : :obj:`float`
+         Corrected pH value (pD)
+
+     References
+     ----------
+
+    .. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
+       Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
+    .. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
+       Anal. Methods 9, 2744–2750 (2017).
     """
-    Correct for pH as described in Nguyen et al, 2018[1]_.
-    This adds 0.4 to the pH value multiplied by the deuteration percentage.
-
-    Note that there is no consensus on this correction factor. See also Rubinson, 2017[2]_
-
-    Parameters
-    ----------
-    pH_read: :obj:`float`
-        pH value of the solution as read by a standard glass electrode pH meter.
-    d_percentage: :obj:`float`
-        Percentage of deuterium in the solution.
-
-    Returns
-    -------
-    pH_corrected : :obj:`float`
-        Corrected pH value (pD)
-
-    References
-    ----------
-
-   .. [1] Nguyen, D., Mayne, L., Phillips, M. C. & Walter Englander, S. Reference Parameters for Protein
-      Hydrogen Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
-   .. [2] Rubinson, K. A. Practical corrections for p(H,D) measurements in mixed H 2 O/D 2 O biological buffers.
-      Anal. Methods 9, 2744–2750 (2017).
-   """
 
     pH_corrected = pH_read + 0.4 * d_percentage / 100
     return pH_corrected
 
 
-def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchange_type='HD',
-                        d_percentage=100., ph_correction=True, wildcard='X'):
+def k_int_from_sequence(
+    sequence,
+    temperature,
+    pH_read,
+    reference="poly",
+    exchange_type="HD",
+    d_percentage=100.0,
+    ph_correction=True,
+    wildcard="X",
+):
     """
     Calculated intrisic rates of exchange for amide hydrogens in proteins.
 
@@ -176,56 +207,59 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
        Exchange Rates. J. Am. Soc. Mass Spectrom. 29, 1936–1939 (2018).
     """
 
-    if len(sequence) <3:
-        raise ValueError('Sequence needs a minimum length of 3')
-    if exchange_type not in ['HD', 'DH', 'HH']:
+    if len(sequence) < 3:
+        raise ValueError("Sequence needs a minimum length of 3")
+    if exchange_type not in ["HD", "DH", "HH"]:
         raise ValueError(f"Unsupported exchange type '{exchange_type}'")
 
-    if exchange_type == 'HD':
+    activation_energy = E_act.copy()
+    if exchange_type == "HD":
         exponents = np.array([1.62, 10.18, -1.5])
         pD = correct_pH(pH_read, d_percentage) if ph_correction else pH_read
         pKD = 15.05
-        k_reference = {'D': 4.48, 'E': 4.93, 'H': 7.42}  # HD
-        E_act['D'] = D_E_act['D_HD']
-    elif exchange_type == 'DH':
-        exponents = np.array([1.4, 10., -1.6])
+        k_reference = {"D": 4.48, "E": 4.93, "H": 7.42}  # HD
+        activation_energy["D"] = D_E_act["D_HD"]
+    elif exchange_type == "DH":
+        exponents = np.array([1.4, 10.0, -1.6])
         pD = pH_read
         pKD = 14.17
-        E_act['D'] = D_E_act['D_DH']
-        k_reference = {'D': 3.87, 'E': 4.33, 'H': 7.0}  #DH
-    elif exchange_type == 'HH':
+        k_reference = {"D": 3.87, "E": 4.33, "H": 7.0}  # DH
+        activation_energy["D"] = D_E_act["D_DH"]
+    elif exchange_type == "HH":
         exponents = np.array([1.39, 10.08, -1.6])
         pD = pH_read
         pKD = 14.17
-        E_act['D'] = D_E_act['D_HH']
-        k_reference = {'D': 3.88, 'E': 4.35, 'H': 7.11}  #HH
+        k_reference = {"D": 3.88, "E": 4.35, "H": 7.11}  # HH
+        activation_energy["D"] = D_E_act["D_HH"]
 
-    conc_D = 10. ** -pD
-    conc_OD = 10. ** (pD - pKD)
+    conc_D = 10.0**-pD
+    conc_OD = 10.0 ** (pD - pKD)
 
-    k_values = (10 ** exponents) / 60
+    k_values = (10**exponents) / 60
     oligo_factors = [2.34, 1.35, 1.585]
-    if reference == 'poly':
+    if reference == "poly":
         k_acid_ref, k_base_ref, k_water_ref = k_values
-    elif reference == 'oligo':
+    elif reference == "oligo":
         k_acid_ref, k_base_ref, k_water_ref = k_values * oligo_factors
     else:
         raise ValueError("Value for 'reference' mush be either 'poly' or 'oligo'")
 
     sequence = list(sequence)
-    sequence.insert(0, 'NT')
-    sequence.append('CT')
+    sequence.insert(0, "NT")
+    sequence.append("CT")
 
     # Rates without inductive effects from neighbours, corrected for temperature
-    k_acid = k_acid_ref * np.exp(-E_act['acid'] * (1 / temperature - 1 / 293) / R)
-    k_base = k_base_ref * np.exp(-E_act['base'] * (1 / temperature - 1 / 293) / R)
-    k_water = k_water_ref * np.exp(-E_act['water'] * (1 / temperature - 1 / 293) / R)
+    k_acid = k_acid_ref * np.exp(-E_act["acid"] * (1 / temperature - 1 / 293) / R)
+    k_base = k_base_ref * np.exp(-E_act["base"] * (1 / temperature - 1 / 293) / R)
+    k_water = k_water_ref * np.exp(-E_act["water"] * (1 / temperature - 1 / 293) / R)
 
-    side_chain_dict = get_side_chain_dictionary(temperature, pD, k_reference)
+    side_chain_dict = get_side_chain_dictionary(
+        temperature, pD, k_reference, activation_energy
+    )
 
     k_int = []
     for i, residue in enumerate(sequence):
-        if residue == 'NT':
+        if residue == "NT":
             continue
         elif i == 1:  # First residue
             k_int.append(np.inf)
@@ -234,9 +268,9 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
         next_residue = sequence[i + 1]
         prev_residue = sequence[i - 1]
         # Proline or unknown residues are set to zero rate
-        if residue in ['P', 'Pc'] or wildcard in [prev_residue, residue]:
-            k_int.append(0.)
-            if next_residue == 'CT':
+        if residue in ["P", "Pc"] or wildcard in [prev_residue, residue]:
+            k_int.append(0.0)
+            if next_residue == "CT":
                 break
             else:
                 continue
@@ -245,29 +279,29 @@ def k_int_from_sequence(sequence, temperature, pH_read, reference='poly', exchan
         _, prev_rho_acid, _, prev_rho_base = side_chain_dict[prev_residue]
         curr_lambda_acid, _, curr_lambda_base, _ = side_chain_dict[residue]
 
-        if next_residue == 'CT':
-            cterm_acid = side_chain_dict['CT'][0]
-            cterm_base = side_chain_dict['CT'][2]
+        if next_residue == "CT":
+            cterm_acid = side_chain_dict["CT"][0]
+            cterm_base = side_chain_dict["CT"][2]
 
             Fa = 10 ** (prev_rho_acid + curr_lambda_acid + cterm_acid)
             Fb = 10 ** (curr_lambda_base + prev_rho_base + cterm_base)
         elif i == 2:  # Second residue in the chain (starts at 1)
-            nterm_acid = side_chain_dict['NT'][1]
-            nterm_base = side_chain_dict['NT'][3]
+            nterm_acid = side_chain_dict["NT"][1]
+            nterm_base = side_chain_dict["NT"][3]
 
             Fa = 10 ** (prev_rho_acid + curr_lambda_acid + nterm_acid)
             Fb = 10 ** (curr_lambda_base + prev_rho_base + nterm_base)
         else:
             Fa = 10 ** (prev_rho_acid + curr_lambda_acid)
             Fb = 10 ** (curr_lambda_base + prev_rho_base)
 
-        k_total_acid = Fa*k_acid*conc_D
-        k_total_base = Fb*k_base*conc_OD
-        k_total_water = Fb*k_water
+        k_total_acid = Fa * k_acid * conc_D
+        k_total_base = Fb * k_base * conc_OD
+        k_total_water = Fb * k_water
 
         k_int.append(k_total_acid + k_total_base + k_total_water)
 
-        if next_residue == 'CT':
+        if next_residue == "CT":
             break
 
     return np.array(k_int)
diff --git a/tests/test_hdxrate.py b/tests/test_hdxrate.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 from hdxrate import k_int_from_sequence
-from hdxrate.hdxrate import get_side_chain_dictionary
+from hdxrate.hdxrate import get_side_chain_dictionary, E_act
 from pathlib import Path
 from functools import reduce
 from itertools import combinations
@@ -22,7 +22,7 @@ def seq1():
 def seq2():
     """sequence two a sequence of the pairwise combination of all side chains"""
     k_reference = {"D": 3.87, "E": 4.33, "H": 7.0}  # DH
-    chains_dict = get_side_chain_dictionary(278, 8, k_reference)
+    chains_dict = get_side_chain_dictionary(278, 8, k_reference, E_act)
     one_letter = [k for k in chains_dict.keys() if len(k) == 1]
     seq2 = reduce(add, [a + b for a, b in combinations(one_letter, 2)])