Skip to content

Commit 41de358

Browse files
committed
Interaction energy for QM/MM, update docs
1 parent 750cf73 commit 41de358

File tree

2 files changed

+212
-0
lines changed

2 files changed

+212
-0
lines changed
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
from CPET.source.calculator import calculator
2+
3+
import argparse
4+
import json
5+
import numpy as np
6+
from tqdm import tqdm
7+
from pyscf import gto, tools
8+
from pathlib import Path
9+
10+
def E_elec_qmmm(mol, x, q, dm, track=False):
11+
E_elec = 0.0
12+
if not track:
13+
for i in range(len(x)):
14+
mol.set_rinv_origin(x[i])
15+
V = (-1)*mol.intor('int1e_rinv')
16+
E_elec += q[i] * np.einsum('ij,ij', dm, V)
17+
else:
18+
for i in tqdm(range(len(x))):
19+
mol.set_rinv_origin(x[i])
20+
V = (-1)*mol.intor('int1e_rinv')
21+
E_elec += q[i] * np.einsum('ij,ij', dm, V)
22+
print(E_elec)
23+
return E_elec
24+
25+
def E_nuc_qmmm(x, x_qm, q, q_qm, track=False):
26+
E_nuc = 0.0
27+
if not track:
28+
for i in range(len(x)):
29+
for j in range(len(x_qm)):
30+
r = np.linalg.norm(x_qm[j] - x[i]) #Take care of cases where nuclei overlap in case filtering failed
31+
if r < 1e-4:
32+
print(f"Warning: Overlapping nuclei detected at QM index {i} and PDB index {j}. Skipping this interaction, make sure your filtering is set up correctly")
33+
continue
34+
E_nuc += q[i] * q_qm[j] / r
35+
else:
36+
for i in range(len(x)):
37+
for j in range(len(x_qm)):
38+
r = np.linalg.norm(x_qm[j] - x[i]) #Take care of cases where nuclei overlap in case filtering failed
39+
if r < 1e-4:
40+
print(f"Warning: Overlapping nuclei detected at QM index {i} and PDB index {j}. Skipping this interaction, make sure your filtering is set up correctly")
41+
continue
42+
E_nuc += q[i] * q_qm[j] / r
43+
print(E_nuc)
44+
return E_nuc
45+
46+
47+
def density_matrix(mo_coeff, mo_occ):
48+
if isinstance(mo_coeff, np.ndarray): # closed shell
49+
dm = (mo_coeff * np.sqrt(mo_occ)) @ (mo_coeff * np.sqrt(mo_occ)).T
50+
elif isinstance(mo_coeff, (tuple, list)) and len(mo_coeff) == 2: # open shell
51+
Ca, Cb = mo_coeff # alpha and beta coeffs
52+
occ_a, occ_b = mo_occ
53+
dm_a = (Ca * np.sqrt(occ_a)) @ (Ca * np.sqrt(occ_a)).T
54+
dm_b = (Cb * np.sqrt(occ_b)) @ (Cb * np.sqrt(occ_b)).T
55+
dm = dm_a + dm_b # total density
56+
else:
57+
raise ValueError("unexpected mo_coeff format")
58+
59+
return dm
60+
61+
def main():
62+
parser = argparse.ArgumentParser(description="Electrostatic Interaction Analysis of Charge Density with Surrouding Point Charges")
63+
parser.add_argument("-m", "--molden", help="Molden Input File", required=True)
64+
parser.add_argument("-p", "--pdb", help="PDB file with charges", required=True)
65+
parser.add_argument(
66+
"-r", "--res", help="Flag for residue breakdown. If provided, analysis will be done by residue as well", action="store_true", default=False
67+
)
68+
parser.add_argument(
69+
"-o", "--options", help="Path to options file (mainly for filtering purposes).", required=True
70+
)
71+
parser.add_argument(
72+
"-v", "--verbose", help="Verbose output", action="store_true", default=False
73+
)
74+
75+
ANGSTROM_TO_BOHR = 1.8897259886
76+
77+
args = parser.parse_args()
78+
molden = args.molden
79+
res = args.res
80+
verbose = args.verbose
81+
options = json.load(open(args.options, "r"))
82+
options["dtype"] = "float64" # Ensure that the dtype is set to float64 for consistency
83+
options["CPET_method"] = "point_field" #Preventative to ensure that time isn't wasted on creating other attributes
84+
options["center"] = [0,0,0]
85+
calculator_object = calculator(options, args.pdb) #Creates object, filters, etc. based on options file
86+
x = calculator_object.x * ANGSTROM_TO_BOHR # Convert Angstrom to Bohr
87+
q = calculator_object.Q # Charges in e
88+
print(x.dtype, q.dtype) # Print data types for debugging
89+
atom_number = calculator_object.atom_number # Atomic numbers
90+
atom_type = calculator_object.atom_type # Atomic types
91+
resids = calculator_object.resids # Residue names
92+
residue_number = calculator_object.residue_number # Residue numbers
93+
94+
print("Parsing Molden file...")
95+
parsed_molden = tools.molden.parse(molden, verbose=0)
96+
mol, mo_energy, mo_coeff, mo_occ = parsed_molden[0:4]
97+
x_qm = mol.atom_coords() # Get coordinates of atoms in Bohr
98+
q_qm = mol.atom_charges() # Get atomic numbers to get nuclear charges
99+
dm = density_matrix(mo_coeff, mo_occ) # Density matrix in AO basis
100+
print(f"Molden file parsed successfully. MO coefficients shape (AOxMO): {mo_coeff.shape}")
101+
102+
print(x[0:5], q[0:5]) # Print first 5 coordinates and charges for debugging
103+
print(x_qm[0:5], q_qm[0:5]) # Print first 5 QM coordinates and charges for debugging
104+
print(len(x), len(q))
105+
106+
# Create a PySCF molecule object
107+
mol.build()
108+
print("Molecule built successfully.")
109+
110+
if res:
111+
print("Residue breakdown enabled. Analysis will be done by residue and saved to file residue_breakdown.txt")
112+
res_breakdown_dict = {}
113+
114+
#Loop to build dictionary of residues
115+
print("Building residue breakdown dictionary...")
116+
for i in tqdm(range(len(x))):
117+
resn = residue_number[i]
118+
if resn not in res_breakdown_dict:
119+
"""
120+
x captures the coordinates of atoms in the residue.
121+
count keeps track of the number of atoms in the residue.
122+
E_elec and E_nuc are initialized to 0.0, they will be calculated later.
123+
"""
124+
res_breakdown_dict[resn] = {
125+
"E_elec": 0.0,
126+
"E_nuc": 0.0,
127+
"V_qmmm": 0.0,
128+
"count": 0,
129+
"x": [],
130+
"q": []
131+
}
132+
res_breakdown_dict[resn]["x"].append(x[i])
133+
res_breakdown_dict[resn]["q"].append(q[i]) # Store charge for the residue
134+
res_breakdown_dict[resn]["count"] += 1
135+
else:
136+
#First case, make sure that the previous residue is the same as the current, if it is present in the breakdown. If not, this indicates duplicate residue numbering...
137+
if resn == residue_number[i-1]: #Continuing the residue
138+
res_breakdown_dict[resn]["x"].append(x[i])
139+
res_breakdown_dict[resn]["q"].append(q[i])
140+
res_breakdown_dict[resn]["count"] += 1
141+
else: #Duplicate residue, throw an error
142+
raise ValueError(f"Duplicate residue numbering detected at index {i}. Residue number: {resn}. Please check your PDB file and options.")
143+
144+
#Loop to calculate interaction energies for each residue
145+
for resn, data in tqdm(res_breakdown_dict.items()):
146+
x_temp = np.array(data["x"]) # Coordinates of atoms in the residue
147+
q_temp = np.array(data["q"])
148+
count = data["count"]
149+
if count == 0:
150+
raise ValueError(f"No atoms found for residue {resn}. Please check your PDB file and options.")
151+
# Interaction energy of PC with charge density
152+
if verbose:
153+
print(f"Calculating interaction energy for residue {resn} with {count} atoms...")
154+
res_breakdown_dict[resn]["E_elec"] = E_elec_qmmm(mol, x_temp, q_temp, dm)
155+
res_breakdown_dict[resn]["E_nuc"] = E_nuc_qmmm(x_temp, x_qm, q_temp, q_qm)
156+
res_breakdown_dict[resn]["V_qmmm"] = res_breakdown_dict[resn]["E_elec"] + res_breakdown_dict[resn]["E_nuc"]
157+
158+
# Write the breakdown to a file from the dictionary, sorted by residue number
159+
print("Writing residue breakdown to file residue_breakdown.txt")
160+
with open("residue_breakdown.txt", "w") as f:
161+
f.write("Residue\tE_elec\tE_nuc\tV_qmmm\tCount\n")
162+
for resn in sorted(res_breakdown_dict.keys(), key=lambda x: int(x)):
163+
data = res_breakdown_dict[resn]
164+
f.write(f"{resn}\t{float(data['E_elec']):.8f}\t{float(data['E_nuc']):.8f}\t{float(data['V_qmmm']):.8f}\t{data['count']}\n")
165+
print(f"Writing complete. Overall interaction energy V_qmmm: {sum(data['V_qmmm'] for data in res_breakdown_dict.values())} Hartree")
166+
167+
else:
168+
print("Residue breakdown disabled. Analysis will be done for the entire molecule.")
169+
# Interaction energy of PC with charge density
170+
print("E_elec")
171+
#Print floating point precision of the variables
172+
print(f"x: {x.dtype}, q: {q.dtype}")
173+
174+
E_elec = E_elec_qmmm(mol, x, q, dm, track=True)
175+
# Interaction energy of PC with nuclear charges
176+
print("E_nuc")
177+
E_nuc = E_nuc_qmmm(x, x_qm, q, q_qm, track=True)
178+
V_qmmm = E_elec + E_nuc
179+
print(f"Total interaction energy V_qmmm: {V_qmmm} Hartree")
180+
181+
if __name__ == "__main__":
182+
main()

docs/GetStarted.rst

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
Getting Started
2+
=================
3+
4+
Examples for pycpet are located in the `examples` directory of the repository. The examples are designed to demonstrate how to use the package for various applications.
5+
Here, we will provide a fairly in-depth overview of *PyCPET*'s features, and how to use them.
6+
7+
PyCPET functionalities
8+
----------------------
9+
*PyCPET* is designed for the high throughput computation of electrostatic properties at enzyme active sites, including their dynamic behavior. This includes things like:
10+
- Computing point electric fields/electrostatic potentials at arbitrary points, and over dynamic trajectories
11+
- Computing fields/potentials in volumes, and their topological features defined by user input
12+
- Clustering fields/potentials to get representative structures and understand the fluctuation of these quantities
13+
- PCA on electric fields, useful for comparing fields across different enzyme classes
14+
- Residue breakdown analysis of classical electric field topologies
15+
- (Coming Soon) Electrostatic interaction energies between protein scaffold and active site charge densities
16+
- And more!
17+
18+
We outline the tested functionalities below, primarily focusing on making your own "options" file for the cpet.py executable.
19+
20+
The Options File
21+
-----------------
22+
The options file is the cornerstone of *PyCPET*. It is a .json file that contains all relevant parameters you wish to specify for the calculation. Examples can be found in `examples`
23+
24+
General Options File Parameters
25+
-------------------------------------------------
26+
These are some general options that are required for most/all calculations.
27+
- 'method': The type of calculation to run. The following methods are available:
28+
- 'point_field'
29+
- 'point_mag'
30+
-

0 commit comments

Comments
 (0)