diff --git a/atomate/qchem/drones.py b/atomate/qchem/drones.py index 08c416b4b..28e54d06b 100644 --- a/atomate/qchem/drones.py +++ b/atomate/qchem/drones.py @@ -11,10 +11,14 @@ from monty.json import jsanitize from pymatgen.apps.borg.hive import AbstractDrone from pymatgen.core import Molecule +from pymatgen.core.periodic_table import Element from pymatgen.io.babel import BabelMolAdaptor from pymatgen.io.qchem.inputs import QCInput from pymatgen.io.qchem.outputs import QCOutput +from pymatgen.analysis.graphs import MoleculeGraph +from pymatgen.analysis.local_env import OpenBabelNN, metal_edge_extender from pymatgen.symmetry.analyzer import PointGroupAnalyzer +from pymatgen.util.graph_hashing import weisfeiler_lehman_graph_hash from atomate import __version__ as atomate_version from atomate.utils.utils import get_logger @@ -31,6 +35,13 @@ logger = get_logger(__name__) +METALS = { + str(e) + for e in [Element.from_Z(i) for i in range(1, 119)] + if e.is_metal +} + + class QChemDrone(AbstractDrone): """ A QChem drone to parse QChem calculations and insert an organized, searchable entry into the database. @@ -53,6 +64,9 @@ class QChemDrone(AbstractDrone): "chemsys", "pointgroup", "formula_alphabetical", + "species_hash", + "coord_hash", + "species_hash_nometal" }, "input": {"initial_molecule", "job_type"}, "output": {"initial_molecule", "job_type", "final_energy"}, @@ -346,6 +360,41 @@ def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun): smiles = pbmol.write("smi").split()[0] d["smiles"] = smiles + # Add graph hashes + # This is primarily for emmet builders + if "optimized_molecule" in d["output"]: + hash_mol = d["output"]["optimized_molecule"] + else: + hash_mol = d["output"]["initial_molecule"] + + hash_mg = MoleculeGraph.with_local_env_strategy(hash_mol, OpenBabelNN()) + hash_mg = metal_edge_extender(hash_mg) + undir_mg = hash_mg.graph.to_undirected() + + metal_inds = [i for i, e in enumerate(hash_mol.species) if str(e) in METALS] + + to_delete = list() + for bond in hash_mg.graph.edges(): + if bond[0] in metal_inds or bond[1] in metal_inds: + to_delete.append((bond[0], bond[1])) + + mg_nometal = copy.deepcopy(hash_mg) + for b in to_delete: + mg_nometal.break_edge(b[0], b[1], allow_reverse=True) + + d["coord_hash"] = weisfeiler_lehman_graph_hash( + undir_mg, + node_attr="coords" + ) + d["species_hash"] = weisfeiler_lehman_graph_hash( + undir_mg, + node_attr="specie" + ) + d["species_hash_nometal"] = weisfeiler_lehman_graph_hash( + mg_nometal.graph.to_undirected(), + node_attr="specie" + ) + d["state"] = "successful" if d_calc_final["completion"] else "unsuccessful" if "special_run_type" in d: if d["special_run_type"] in [ diff --git a/atomate/qchem/tests/test_drones.py b/atomate/qchem/tests/test_drones.py index 64c8aff6d..464de18fe 100644 --- a/atomate/qchem/tests/test_drones.py +++ b/atomate/qchem/tests/test_drones.py @@ -41,6 +41,9 @@ def test_assimilate_opt(self): self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1") self.assertEqual(doc["formula_pretty"], "LiH2(CO)3") self.assertEqual(doc["formula_anonymous"], "AB2C3D3") + self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d") + self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f") + self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8") self.assertEqual(doc["chemsys"], "C-H-Li-O") self.assertEqual(doc["pointgroup"], "Cs") self.assertIn("custodian", doc) @@ -77,6 +80,9 @@ def test_assimilate_pes_scan(self): self.assertEqual(doc["smiles"], "[O]C(=O)[O]") self.assertEqual(doc["formula_pretty"], "CO3") self.assertEqual(doc["formula_anonymous"], "AB3") + self.assertEqual(doc["species_hash"], "75e7a4125709cb5a14d1ce2b84c3cdbd") + self.assertEqual(doc["coord_hash"], "65b93a5088773337b9372c4ce65aeb37") + self.assertEqual(doc["species_hash_nometal"], "75e7a4125709cb5a14d1ce2b84c3cdbd") self.assertEqual(doc["chemsys"], "C-O") self.assertEqual(doc["pointgroup"], "C2v") self.assertIn("custodian", doc) @@ -130,6 +136,9 @@ def test_assimilate_freq(self): self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1") self.assertEqual(doc["formula_pretty"], "LiH2(CO)3") self.assertEqual(doc["formula_anonymous"], "AB2C3D3") + self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d") + self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f") + self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8") self.assertEqual(doc["chemsys"], "C-H-Li-O") self.assertEqual(doc["pointgroup"], "Cs") self.assertIn("custodian", doc) @@ -203,6 +212,9 @@ def test_assimilate_FF(self): self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1") self.assertEqual(doc["formula_pretty"], "LiH2(CO)3") self.assertEqual(doc["formula_anonymous"], "AB2C3D3") + self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d") + self.assertEqual(doc["coord_hash"], "acbb408648e992fea44acb87e912fd5f") + self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8") self.assertEqual(doc["chemsys"], "C-H-Li-O") self.assertEqual(doc["pointgroup"], "Cs") self.assertIn("custodian", doc) @@ -313,6 +325,9 @@ def test_assimilate_ffts(self): self.assertEqual(doc["smiles"], "O(C(=O)[O])[Li].[CH2]COC(=O)O[Li]") self.assertEqual(doc["formula_pretty"], "LiH2C2O3") self.assertEqual(doc["formula_anonymous"], "AB2C2D3") + self.assertEqual(doc["species_hash"], "b58892da682cac0193cf85f25fe8c25b") + self.assertEqual(doc["coord_hash"], "ba40774a9d7a39f8354d0ca7efaff6d0") + self.assertEqual(doc["species_hash_nometal"], "d7ab8a26c0d207bad6bd1316a368c8d5") self.assertEqual(doc["chemsys"], "C-H-Li-O") self.assertEqual(doc["pointgroup"], "C1") self.assertIn("custodian", doc) @@ -396,6 +411,9 @@ def test_multirun(self): self.assertEqual(doc["smiles"], "O1[C](O[Li])OC=C1") self.assertEqual(doc["formula_pretty"], "LiH2(CO)3") self.assertEqual(doc["formula_anonymous"], "AB2C3D3") + self.assertEqual(doc["species_hash"], "44fb6a8c8e99aed63f23e573e720018d") + self.assertEqual(doc["coord_hash"], "d11f6abeef573141250f38af1388ca0c") + self.assertEqual(doc["species_hash_nometal"], "48ba8b7456a39b5ee1f8d76772d9f4c8") self.assertEqual(doc["chemsys"], "C-H-Li-O") self.assertEqual(doc["pointgroup"], "C2") self.assertIn("calcs_reversed", doc) @@ -437,6 +455,9 @@ def test_assimilate_unstable_opt(self): self.assertEqual(doc["cputime"], None) self.assertEqual(doc["formula_pretty"], "CS2NO") self.assertEqual(doc["formula_anonymous"], "ABCD2") + self.assertEqual(doc["species_hash"], "1559ce7584cf8c27f1c6044a6af76dd1") + self.assertEqual(doc["coord_hash"], "8698b987cdb70eed57bd0a7e77b7e00c") + self.assertEqual(doc["species_hash_nometal"], "1559ce7584cf8c27f1c6044a6af76dd1") self.assertEqual(doc["chemsys"], "C-N-O-S") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) @@ -471,6 +492,9 @@ def test_assimilate_opt_with_hidden_changes_from_handler(self): self.assertEqual(doc["cputime"], 7471.17) self.assertEqual(doc["formula_pretty"], "HC2O") self.assertEqual(doc["formula_anonymous"], "ABC2") + self.assertEqual(doc["species_hash"], "6dc4aca792bcd6bd45bc5176f42f6aee") + self.assertEqual(doc["coord_hash"], "7cd547f71ddf74efcfb00743161d07f2") + self.assertEqual(doc["species_hash_nometal"], "6dc4aca792bcd6bd45bc5176f42f6aee") self.assertEqual(doc["chemsys"], "C-H-O") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) @@ -504,6 +528,9 @@ def test_assimilate_disconnected_opt(self): self.assertEqual(doc["cputime"], 8825.76) self.assertEqual(doc["formula_pretty"], "H2C2O3") self.assertEqual(doc["formula_anonymous"], "A2B2C3") + self.assertEqual(doc["species_hash"], "c87c6b5a4bb8632cdb934e400a0237fb") + self.assertEqual(doc["coord_hash"], "6adeeb1d55585a35a6bcf9e1513218f2") + self.assertEqual(doc["species_hash_nometal"], "c87c6b5a4bb8632cdb934e400a0237fb") self.assertEqual(doc["chemsys"], "C-H-O") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) @@ -527,6 +554,9 @@ def test_assimilate_sp(self): self.assertEqual(doc["smiles"], "[O]") self.assertEqual(doc["formula_pretty"], "O2") self.assertEqual(doc["formula_anonymous"], "A") + self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914") + self.assertEqual(doc["coord_hash"], "d1e604b6971a7e2d889e3172456da7db") + self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914") self.assertEqual(doc["chemsys"], "O") self.assertEqual(doc["pointgroup"], "Kh") self.assertIn("custodian", doc) @@ -572,6 +602,9 @@ def test_sp_with_orig(self): self.assertEqual(doc["smiles"], "[O]") self.assertEqual(doc["formula_pretty"], "O2") self.assertEqual(doc["formula_anonymous"], "A") + self.assertEqual(doc["species_hash"], "d6bfd1bfa289860fea9c71dd64fab914") + self.assertEqual(doc["coord_hash"], "4b44e5b5c47ec269779254ae49ca0b51") + self.assertEqual(doc["species_hash_nometal"], "d6bfd1bfa289860fea9c71dd64fab914") self.assertEqual(doc["chemsys"], "O") self.assertEqual(doc["pointgroup"], "Kh") self.assertIn("custodian", doc)