Skip to content
Open
49 changes: 49 additions & 0 deletions atomate/qchem/drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,14 @@
from monty.json import jsanitize
from pymatgen.apps.borg.hive import AbstractDrone
from pymatgen.core import Molecule
from pymatgen.core.periodic_table import Element
from pymatgen.io.babel import BabelMolAdaptor
from pymatgen.io.qchem.inputs import QCInput
from pymatgen.io.qchem.outputs import QCOutput
from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender
from pymatgen.symmetry.analyzer import PointGroupAnalyzer
from pymatgen.util.graph_hashing import weisfeiler_lehman_graph_hash

from atomate import __version__ as atomate_version
from atomate.utils.utils import get_logger
Expand All @@ -31,6 +35,13 @@
logger = get_logger(__name__)


METALS = {
str(e)
for e in [Element.from_Z(i) for i in range(1, 119)]
if e.is_metal
}


class QChemDrone(AbstractDrone):
"""
A QChem drone to parse QChem calculations and insert an organized, searchable entry into the database.
Expand All @@ -53,6 +64,9 @@ class QChemDrone(AbstractDrone):
"chemsys",
"pointgroup",
"formula_alphabetical",
"species_hash",
"coord_hash",
"species_hash_nometal"
},
"input": {"initial_molecule", "job_type"},
"output": {"initial_molecule", "job_type", "final_energy"},
Expand Down Expand Up @@ -346,6 +360,41 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
smiles = pbmol.write("smi").split()[0]
d["smiles"] = smiles

# Add graph hashes
# This is primarily for emmet builders
if "optimized_molecule" in d["output"]:
hash_mol = d["output"]["optimized_molecule"]
else:
hash_mol = d["output"]["initial_molecule"]

hash_mg = MoleculeGraph.with_local_env_strategy(hash_mol, OpenBabelNN())
hash_mg = metal_edge_extender(hash_mg)
undir_mg = hash_mg.graph.to_undirected()

metal_inds = [i for i, e in enumerate(hash_mol.species) if str(e) in METALS]

to_delete = list()
for bond in hash_mg.graph.edges():
if bond[0] in metal_inds or bond[1] in metal_inds:
to_delete.append((bond[0], bond[1]))

mg_nometal = copy.deepcopy(hash_mg)
for b in to_delete:
mg_nometal.break_edge(b[0], b[1], allow_reverse=True)

d["coord_hash"] = weisfeiler_lehman_graph_hash(
undir_mg,
node_attr="coords"
)
d["species_hash"] = weisfeiler_lehman_graph_hash(
undir_mg,
node_attr="specie"
)
d["species_hash_nometal"] = weisfeiler_lehman_graph_hash(
mg_nometal.graph.to_undirected(),
node_attr="specie"
)

d["state"] = "successful" if d_calc_final["completion"] else "unsuccessful"
if "special_run_type" in d:
if d["special_run_type"] in [
Expand Down
33 changes: 33 additions & 0 deletions atomate/qchem/tests/test_drones.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ def test_assimilate_opt(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -77,6 +80,9 @@ def test_assimilate_pes_scan(self):
self.assertEqual(doc["smiles"], "[O]C(=O)[O]")
self.assertEqual(doc["formula_pretty"], "CO3")
self.assertEqual(doc["formula_anonymous"], "AB3")
self.assertEqual(doc["species_hash"], "75e7a4125709cb5a14d1ce2b84c3cdbd")
self.assertEqual(doc["coord_hash"], "65b93a5088773337b9372c4ce65aeb37")
self.assertEqual(doc["species_hash_nometal"], "75e7a4125709cb5a14d1ce2b84c3cdbd")
self.assertEqual(doc["chemsys"], "C-O")
self.assertEqual(doc["pointgroup"], "C2v")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -130,6 +136,9 @@ def test_assimilate_freq(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -203,6 +212,9 @@ def test_assimilate_FF(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "Cs")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -313,6 +325,9 @@ def test_assimilate_ffts(self):
self.assertEqual(doc["smiles"], "O(C(=O)[O])[Li].[CH2]COC(=O)O[Li]")
self.assertEqual(doc["formula_pretty"], "LiH2C2O3")
self.assertEqual(doc["formula_anonymous"], "AB2C2D3")
self.assertEqual(doc["species_hash"], "b58892da682cac0193cf85f25fe8c25b")
self.assertEqual(doc["coord_hash"], "ba40774a9d7a39f8354d0ca7efaff6d0")
self.assertEqual(doc["species_hash_nometal"], "d7ab8a26c0d207bad6bd1316a368c8d5")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -396,6 +411,9 @@ def test_multirun(self):
self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1")
self.assertEqual(doc["formula_pretty"], "LiH2(CO)3")
self.assertEqual(doc["formula_anonymous"], "AB2C3D3")
self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d")
self.assertEqual(doc["coord_hash"], "d11f6abeef573141250f38af1388ca0c")
self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8")
self.assertEqual(doc["chemsys"], "C-H-Li-O")
self.assertEqual(doc["pointgroup"], "C2")
self.assertIn("calcs_reversed", doc)
Expand Down Expand Up @@ -437,6 +455,9 @@ def test_assimilate_unstable_opt(self):
self.assertEqual(doc["cputime"], None)
self.assertEqual(doc["formula_pretty"], "CS2NO")
self.assertEqual(doc["formula_anonymous"], "ABCD2")
self.assertEqual(doc["species_hash"], "1559ce7584cf8c27f1c6044a6af76dd1")
self.assertEqual(doc["coord_hash"], "8698b987cdb70eed57bd0a7e77b7e00c")
self.assertEqual(doc["species_hash_nometal"], "1559ce7584cf8c27f1c6044a6af76dd1")
self.assertEqual(doc["chemsys"], "C-N-O-S")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand Down Expand Up @@ -471,6 +492,9 @@ def test_assimilate_opt_with_hidden_changes_from_handler(self):
self.assertEqual(doc["cputime"], 7471.17)
self.assertEqual(doc["formula_pretty"], "HC2O")
self.assertEqual(doc["formula_anonymous"], "ABC2")
self.assertEqual(doc["species_hash"], "6dc4aca792bcd6bd45bc5176f42f6aee")
self.assertEqual(doc["coord_hash"], "7cd547f71ddf74efcfb00743161d07f2")
self.assertEqual(doc["species_hash_nometal"], "6dc4aca792bcd6bd45bc5176f42f6aee")
self.assertEqual(doc["chemsys"], "C-H-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand Down Expand Up @@ -504,6 +528,9 @@ def test_assimilate_disconnected_opt(self):
self.assertEqual(doc["cputime"], 8825.76)
self.assertEqual(doc["formula_pretty"], "H2C2O3")
self.assertEqual(doc["formula_anonymous"], "A2B2C3")
self.assertEqual(doc["species_hash"], "c87c6b5a4bb8632cdb934e400a0237fb")
self.assertEqual(doc["coord_hash"], "6adeeb1d55585a35a6bcf9e1513218f2")
self.assertEqual(doc["species_hash_nometal"], "c87c6b5a4bb8632cdb934e400a0237fb")
self.assertEqual(doc["chemsys"], "C-H-O")
self.assertEqual(doc["pointgroup"], "C1")
self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"])
Expand All @@ -527,6 +554,9 @@ def test_assimilate_sp(self):
self.assertEqual(doc["smiles"], "[O]")
self.assertEqual(doc["formula_pretty"], "O2")
self.assertEqual(doc["formula_anonymous"], "A")
self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["coord_hash"], "d1e604b6971a7e2d889e3172456da7db")
self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["chemsys"], "O")
self.assertEqual(doc["pointgroup"], "Kh")
self.assertIn("custodian", doc)
Expand Down Expand Up @@ -572,6 +602,9 @@ def test_sp_with_orig(self):
self.assertEqual(doc["smiles"], "[O]")
self.assertEqual(doc["formula_pretty"], "O2")
self.assertEqual(doc["formula_anonymous"], "A")
self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["coord_hash"], "4b44e5b5c47ec269779254ae49ca0b51")
self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914")
self.assertEqual(doc["chemsys"], "O")
self.assertEqual(doc["pointgroup"], "Kh")
self.assertIn("custodian", doc)
Expand Down