|
| 1 | +from CPET.source.calculator import calculator |
| 2 | + |
| 3 | +import argparse |
| 4 | +import json |
| 5 | +import numpy as np |
| 6 | +from tqdm import tqdm |
| 7 | +from pyscf import gto, tools |
| 8 | +from pathlib import Path |
| 9 | + |
| 10 | +def E_elec_qmmm(mol, x, q, dm, track=False): |
| 11 | + E_elec = 0.0 |
| 12 | + if not track: |
| 13 | + for i in range(len(x)): |
| 14 | + mol.set_rinv_origin(x[i]) |
| 15 | + V = (-1)*mol.intor('int1e_rinv') |
| 16 | + E_elec += q[i] * np.einsum('ij,ij', dm, V) |
| 17 | + else: |
| 18 | + for i in tqdm(range(len(x))): |
| 19 | + mol.set_rinv_origin(x[i]) |
| 20 | + V = (-1)*mol.intor('int1e_rinv') |
| 21 | + E_elec += q[i] * np.einsum('ij,ij', dm, V) |
| 22 | + print(E_elec) |
| 23 | + return E_elec |
| 24 | + |
| 25 | +def E_nuc_qmmm(x, x_qm, q, q_qm, track=False): |
| 26 | + E_nuc = 0.0 |
| 27 | + if not track: |
| 28 | + for i in range(len(x)): |
| 29 | + for j in range(len(x_qm)): |
| 30 | + r = np.linalg.norm(x_qm[j] - x[i]) #Take care of cases where nuclei overlap in case filtering failed |
| 31 | + if r < 1e-4: |
| 32 | + print(f"Warning: Overlapping nuclei detected at QM index {i} and PDB index {j}. Skipping this interaction, make sure your filtering is set up correctly") |
| 33 | + continue |
| 34 | + E_nuc += q[i] * q_qm[j] / r |
| 35 | + else: |
| 36 | + for i in range(len(x)): |
| 37 | + for j in range(len(x_qm)): |
| 38 | + r = np.linalg.norm(x_qm[j] - x[i]) #Take care of cases where nuclei overlap in case filtering failed |
| 39 | + if r < 1e-4: |
| 40 | + print(f"Warning: Overlapping nuclei detected at QM index {i} and PDB index {j}. Skipping this interaction, make sure your filtering is set up correctly") |
| 41 | + continue |
| 42 | + E_nuc += q[i] * q_qm[j] / r |
| 43 | + print(E_nuc) |
| 44 | + return E_nuc |
| 45 | + |
| 46 | + |
| 47 | +def density_matrix(mo_coeff, mo_occ): |
| 48 | + if isinstance(mo_coeff, np.ndarray): # closed shell |
| 49 | + dm = (mo_coeff * np.sqrt(mo_occ)) @ (mo_coeff * np.sqrt(mo_occ)).T |
| 50 | + elif isinstance(mo_coeff, (tuple, list)) and len(mo_coeff) == 2: # open shell |
| 51 | + Ca, Cb = mo_coeff # alpha and beta coeffs |
| 52 | + occ_a, occ_b = mo_occ |
| 53 | + dm_a = (Ca * np.sqrt(occ_a)) @ (Ca * np.sqrt(occ_a)).T |
| 54 | + dm_b = (Cb * np.sqrt(occ_b)) @ (Cb * np.sqrt(occ_b)).T |
| 55 | + dm = dm_a + dm_b # total density |
| 56 | + else: |
| 57 | + raise ValueError("unexpected mo_coeff format") |
| 58 | + |
| 59 | + return dm |
| 60 | + |
| 61 | +def main(): |
| 62 | + parser = argparse.ArgumentParser(description="Electrostatic Interaction Analysis of Charge Density with Surrouding Point Charges") |
| 63 | + parser.add_argument("-m", "--molden", help="Molden Input File", required=True) |
| 64 | + parser.add_argument("-p", "--pdb", help="PDB file with charges", required=True) |
| 65 | + parser.add_argument( |
| 66 | + "-r", "--res", help="Flag for residue breakdown. If provided, analysis will be done by residue as well", action="store_true", default=False |
| 67 | + ) |
| 68 | + parser.add_argument( |
| 69 | + "-o", "--options", help="Path to options file (mainly for filtering purposes).", required=True |
| 70 | + ) |
| 71 | + parser.add_argument( |
| 72 | + "-v", "--verbose", help="Verbose output", action="store_true", default=False |
| 73 | + ) |
| 74 | + |
| 75 | + ANGSTROM_TO_BOHR = 1.8897259886 |
| 76 | + |
| 77 | + args = parser.parse_args() |
| 78 | + molden = args.molden |
| 79 | + res = args.res |
| 80 | + verbose = args.verbose |
| 81 | + options = json.load(open(args.options, "r")) |
| 82 | + options["dtype"] = "float64" # Ensure that the dtype is set to float64 for consistency |
| 83 | + options["CPET_method"] = "point_field" #Preventative to ensure that time isn't wasted on creating other attributes |
| 84 | + options["center"] = [0,0,0] |
| 85 | + calculator_object = calculator(options, args.pdb) #Creates object, filters, etc. based on options file |
| 86 | + x = calculator_object.x * ANGSTROM_TO_BOHR # Convert Angstrom to Bohr |
| 87 | + q = calculator_object.Q # Charges in e |
| 88 | + print(x.dtype, q.dtype) # Print data types for debugging |
| 89 | + atom_number = calculator_object.atom_number # Atomic numbers |
| 90 | + atom_type = calculator_object.atom_type # Atomic types |
| 91 | + resids = calculator_object.resids # Residue names |
| 92 | + residue_number = calculator_object.residue_number # Residue numbers |
| 93 | + |
| 94 | + print("Parsing Molden file...") |
| 95 | + parsed_molden = tools.molden.parse(molden, verbose=0) |
| 96 | + mol, mo_energy, mo_coeff, mo_occ = parsed_molden[0:4] |
| 97 | + x_qm = mol.atom_coords() # Get coordinates of atoms in Bohr |
| 98 | + q_qm = mol.atom_charges() # Get atomic numbers to get nuclear charges |
| 99 | + dm = density_matrix(mo_coeff, mo_occ) # Density matrix in AO basis |
| 100 | + print(f"Molden file parsed successfully. MO coefficients shape (AOxMO): {mo_coeff.shape}") |
| 101 | + |
| 102 | + print(x[0:5], q[0:5]) # Print first 5 coordinates and charges for debugging |
| 103 | + print(x_qm[0:5], q_qm[0:5]) # Print first 5 QM coordinates and charges for debugging |
| 104 | + print(len(x), len(q)) |
| 105 | + |
| 106 | + # Create a PySCF molecule object |
| 107 | + mol.build() |
| 108 | + print("Molecule built successfully.") |
| 109 | + |
| 110 | + if res: |
| 111 | + print("Residue breakdown enabled. Analysis will be done by residue and saved to file residue_breakdown.txt") |
| 112 | + res_breakdown_dict = {} |
| 113 | + |
| 114 | + #Loop to build dictionary of residues |
| 115 | + print("Building residue breakdown dictionary...") |
| 116 | + for i in tqdm(range(len(x))): |
| 117 | + resn = residue_number[i] |
| 118 | + if resn not in res_breakdown_dict: |
| 119 | + """ |
| 120 | + x captures the coordinates of atoms in the residue. |
| 121 | + count keeps track of the number of atoms in the residue. |
| 122 | + E_elec and E_nuc are initialized to 0.0, they will be calculated later. |
| 123 | + """ |
| 124 | + res_breakdown_dict[resn] = { |
| 125 | + "E_elec": 0.0, |
| 126 | + "E_nuc": 0.0, |
| 127 | + "V_qmmm": 0.0, |
| 128 | + "count": 0, |
| 129 | + "x": [], |
| 130 | + "q": [] |
| 131 | + } |
| 132 | + res_breakdown_dict[resn]["x"].append(x[i]) |
| 133 | + res_breakdown_dict[resn]["q"].append(q[i]) # Store charge for the residue |
| 134 | + res_breakdown_dict[resn]["count"] += 1 |
| 135 | + else: |
| 136 | + #First case, make sure that the previous residue is the same as the current, if it is present in the breakdown. If not, this indicates duplicate residue numbering... |
| 137 | + if resn == residue_number[i-1]: #Continuing the residue |
| 138 | + res_breakdown_dict[resn]["x"].append(x[i]) |
| 139 | + res_breakdown_dict[resn]["q"].append(q[i]) |
| 140 | + res_breakdown_dict[resn]["count"] += 1 |
| 141 | + else: #Duplicate residue, throw an error |
| 142 | + raise ValueError(f"Duplicate residue numbering detected at index {i}. Residue number: {resn}. Please check your PDB file and options.") |
| 143 | + |
| 144 | + #Loop to calculate interaction energies for each residue |
| 145 | + for resn, data in tqdm(res_breakdown_dict.items()): |
| 146 | + x_temp = np.array(data["x"]) # Coordinates of atoms in the residue |
| 147 | + q_temp = np.array(data["q"]) |
| 148 | + count = data["count"] |
| 149 | + if count == 0: |
| 150 | + raise ValueError(f"No atoms found for residue {resn}. Please check your PDB file and options.") |
| 151 | + # Interaction energy of PC with charge density |
| 152 | + if verbose: |
| 153 | + print(f"Calculating interaction energy for residue {resn} with {count} atoms...") |
| 154 | + res_breakdown_dict[resn]["E_elec"] = E_elec_qmmm(mol, x_temp, q_temp, dm) |
| 155 | + res_breakdown_dict[resn]["E_nuc"] = E_nuc_qmmm(x_temp, x_qm, q_temp, q_qm) |
| 156 | + res_breakdown_dict[resn]["V_qmmm"] = res_breakdown_dict[resn]["E_elec"] + res_breakdown_dict[resn]["E_nuc"] |
| 157 | + |
| 158 | + # Write the breakdown to a file from the dictionary, sorted by residue number |
| 159 | + print("Writing residue breakdown to file residue_breakdown.txt") |
| 160 | + with open("residue_breakdown.txt", "w") as f: |
| 161 | + f.write("Residue\tE_elec\tE_nuc\tV_qmmm\tCount\n") |
| 162 | + for resn in sorted(res_breakdown_dict.keys(), key=lambda x: int(x)): |
| 163 | + data = res_breakdown_dict[resn] |
| 164 | + f.write(f"{resn}\t{float(data['E_elec']):.8f}\t{float(data['E_nuc']):.8f}\t{float(data['V_qmmm']):.8f}\t{data['count']}\n") |
| 165 | + print(f"Writing complete. Overall interaction energy V_qmmm: {sum(data['V_qmmm'] for data in res_breakdown_dict.values())} Hartree") |
| 166 | + |
| 167 | + else: |
| 168 | + print("Residue breakdown disabled. Analysis will be done for the entire molecule.") |
| 169 | + # Interaction energy of PC with charge density |
| 170 | + print("E_elec") |
| 171 | + #Print floating point precision of the variables |
| 172 | + print(f"x: {x.dtype}, q: {q.dtype}") |
| 173 | + |
| 174 | + E_elec = E_elec_qmmm(mol, x, q, dm, track=True) |
| 175 | + # Interaction energy of PC with nuclear charges |
| 176 | + print("E_nuc") |
| 177 | + E_nuc = E_nuc_qmmm(x, x_qm, q, q_qm, track=True) |
| 178 | + V_qmmm = E_elec + E_nuc |
| 179 | + print(f"Total interaction energy V_qmmm: {V_qmmm} Hartree") |
| 180 | + |
| 181 | +if __name__ == "__main__": |
| 182 | + main() |
0 commit comments