From c9c49a46e3fd5c42a11d2b6356d464281d9430cc Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Nov 2023 08:25:43 -0600
Subject: [PATCH 01/36] Create gaussian.py

---
 qcengine/programs/gaussian.py | 388 ++++++++++++++++++++++++++++++++++
 1 file changed, 388 insertions(+)
 create mode 100644 qcengine/programs/gaussian.py

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
new file mode 100644
index 000000000..6fdaf1534
--- /dev/null
+++ b/qcengine/programs/gaussian.py
@@ -0,0 +1,388 @@
+"""
+Calls the GAUSSIAN excutable
+"""
+
+import os
+import re
+import tempfile
+import warnings
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Tuple
+
+import numpy as np
+from qcelemental import constants
+from qcelemental.models import AtomicInput, AtomicResult, Molecule, Provenance
+from qcelemental.molparse import regex
+from qcelemental.util import parse_version, safe_version, which
+
+from qcengine.config import TaskConfig, get_config
+
+from ..exceptions import InputError, UnknownError
+from ..util import disk_files, execute, temporary_directory
+from .model import ProgramHarness
+
+class GaussianHarness(ProgramHarness):
+
+    _defaults = {
+        "name": "Gaussian",
+        "scratch": True,
+        "thread_safe": False,
+        "thread_parallel": True,
+        "node_parallel": False,
+        "managed_memory": True,
+    }
+    version_cache: Dict[str, str] = {}
+
+    class Config(ProgramHarness.Config):
+        pass
+
+
+    def found(self, raise_error: bool = False) -> bool:
+        return which(
+            "g09",
+            return_bool=True,
+            raise_error=raise_error,
+            raise_msg="Please install Gaussian. Check it's in your PATH with `which g09`.",
+        )
+
+    def get_version(self) -> str:
+        self.found(raise_error=True)
+
+        # Get the node configuration
+        #config = get_config()
+
+        which_prog = which("g09")
+        if which_prog not in self.version_cache:
+            success, output = execute([which_prog, "v.com"], {"v.com": ""})
+
+            #mobj = re.search(r"Gaussian\s+([\d.]+)\s+for", exc["stdout"])
+            #if not mobj:
+            #    mobj = re.search(r"Gaussian version:\s+([\d.]+)\s+", exc["stdout"])
+
+            #if mobj:
+            #    self.version_cache[which_prog] = safe_version(mobj.group(1))
+
+            # if "QC not defined" in exc["stdout"]:
+            #else:
+            #    return safe_version("09")
+
+        return self.version_cache[which_prog]
+
+    def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResult":
+        """
+        Run Gaussian
+        """
+        # Check if Gaussian executable is found
+        self.found(raise_error=True)
+
+        # Setup the job
+        job_inputs = self.build_input(input_model, config)
+
+        # Run Gaussian
+        exe_success, proc = self.execute(job_inputs)
+
+        # Determine whether the calculation succeeded
+        print("SUCCESS", exe_success)
+        if exe_success:
+            # If execution succeeded, collect results
+            return self.parse_output(proc["outfiles"], input_model)
+
+
+    def build_input(
+        self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
+    ) -> Dict[str, Any]:
+        gaussian_ret = {
+            "infiles": {},
+            "commands": [which("g09"),  "input.inp"],
+            "scratch_directory": config.scratch_directory
+            }
+
+        input_file = '''%mem=6000000
+#P HF/6-31G(d) scf=tight
+
+test1 HF/6-31G(d) sp formaldehyde
+
+0 1
+C1
+O2  1  r2
+H3  1  r3  2  a3
+H4  1  r4  2  a4  3  d4
+
+r2=1.20
+r3=1.0
+r4=1.0
+a3=120.
+a4=120.
+d4=180.
+
+        '''
+        gaussian_ret['infiles']["input.inp"] = input_file
+
+        return gaussian_ret
+
+    def execute(self, inputs, extra_outfiles=None, extra_commands=None, scratch_name=None, timeout=None):
+
+        success, dexe = execute(
+        inputs["commands"],
+        inputs["infiles"],
+        )
+        print(dexe['proc'].stdout.read())
+        print(dexe['proc'].stderr.read())
+        return success, dexe
+
+    def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:
+        print("IN PARSE OUTPUT")
+
+        #output_data = {}
+        stdout = outfiles.pop('stdout')
+        stderr = outfiles.pop('stderr')
+
+        method = input_model.model.method.lower()
+        #method = method[4:] if method.startswith("") else method
+
+        try:
+            qcvars, gaussianmol, module = harvest(
+                input_model.molecule, method, stdout, **outfiles
+            )
+        except:
+            pass
+
+        #properties = {
+        #    "nuclear_repulsion_energy": bdata['99.0'][0],
+        #    "scf_total_energy": bdata["99.0"][1],
+        #    "return_energy": bdata["99.0"][-1],
+        #}
+
+        qcvars = {}
+
+        #props, prov = self._parse_logfile_common(outtext, input_model.dict())
+        #output_data["provenance"] = prov
+        #output_data["properties"] = properties
+        #output_data["properties"].update(props)
+        output_data["stdout"] = stdout
+        output_data["success"] = True
+
+        merged_data = {**input_model.dict(), **output_data}
+        #merged_data["extras"]["qcvars"] = qcvars
+        print(merged_data)
+
+        return AtomicResult(**merged_data)
+
+    # DELETE THIS FUNC
+    def _parse_logfile_common(self, outtext: str, input_dict: Dict[str, Any]):
+        """
+        Parses fields from log file that are not parsed from QCSCRATCH in parse_output
+        """
+
+        properties = {}
+        provenance = Provenance(creator="QChem", version=self.get_version(), routine="qchem").dict()
+        mobj = re.search(r"This is a multi-thread run using ([0-9]+) threads", outtext)
+        if mobj:
+            provenance["nthreads"] = int(mobj.group(1))
+
+        mobj = re.search(r"Total job time:\s*" + NUMBER + r"s\(wall\)", outtext)
+        if mobj:
+            provenance["wall_time"] = float(mobj.group(1))
+
+        mobj = re.search(r"Archival summary:\s*\n[0-9]+\\[0-9+]\\([\w\.\-]+)\\", outtext)
+        if mobj:
+            provenance["hostname"] = mobj.group(1)
+
+        mobj = re.search(r"\n\s*There are\s+(\d+) alpha and\s+(\d+) beta electrons\s*\n", outtext)
+        if mobj:
+            properties["calcinfo_nalpha"] = int(mobj.group(1))
+            properties["calcinfo_nbeta"] = int(mobj.group(2))
+
+        mobj = re.search(r"\n\s*There are\s+\d+ shells and\s+(\d+) basis functions\s*\n", outtext)
+        if mobj:
+            properties["calcinfo_nbasis"] = int(mobj.group(1))
+
+        mobj = re.search(r"\n\s*RI-MP2 CORRELATION ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        if mobj:
+            properties["mp2_correlation_energy"] = float(mobj.group(1))
+
+        mobj = re.search(r"\n\s*RI-MP2 SINGLES ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        if mobj:
+            properties["mp2_singles_energy"] = float(mobj.group(1))
+
+        mobj_aaaa = re.search(r"\n\s*RI-MP2 ENERGY \(aa\|aa\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        mobj_bbbb = re.search(r"\n\s*RI-MP2 ENERGY \(bb\|bb\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        if mobj_aaaa and mobj_bbbb:
+            properties["mp2_same_spin_correlation_energy"] = float(mobj_aaaa.group(1)) + float(mobj_bbbb.group(1))
+
+        mobj_aabb = re.search(r"\n\s*RI-MP2 ENERGY \(aa\|bb\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        mobj_bbaa = re.search(r"\n\s*RI-MP2 ENERGY \(bb\|aa\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        if mobj_aaaa and mobj_bbbb:
+            properties["mp2_opposite_spin_correlation_energy"] = float(mobj_aabb.group(1)) + float(mobj_bbaa.group(1))
+
+        properties["calcinfo_natom"] = len(input_dict["molecule"]["symbols"])
+
+        mobj = re.search(r"\n\s*(\d+)\s+" + NUMBER + r"\s+" + NUMBER + r"\s+Convergence criterion met\s*\n", outtext)
+        if mobj:
+            properties["scf_iterations"] = int(mobj.group(1))
+
+        mobj = re.search(
+            r"\n\s+Dipole Moment \(Debye\)\s*\n\s+X\s+" + NUMBER + r"\s+Y\s+" + NUMBER + r"\s+Z\s+" + NUMBER + r"\s*\n",
+            outtext,
+        )
+        if mobj:
+            cf = constants.conversion_factor("debye", "e * bohr")
+            properties["scf_dipole_moment"] = [float(mobj.group(i)) * cf for i in range(1, 4)]
+
+        return properties, provenance
+
+    def parse_logfile(self, outfiles: Dict[str, str]) -> AtomicResult:
+        """
+        Parses a log file.
+        """
+        warnings.warn(
+            "parse_logfile will result in precision loss for some fields due to trunctation in " "Q-Chem output files."
+        )
+
+        outtext = outfiles["dispatch.log"]
+
+        mobj = re.search(r"(?:User input\:|Running Job?)\s+\d+\s+of\s+(\d+)", outtext)
+        if mobj:
+            if int(mobj.group(1)) > 1:
+                raise InputError("Multi-job Q-Chem log files not supported.")
+
+        input_dict = {}
+        mobj = re.search(r"\n-{20,}\nUser input:\n-{20,}\n(.+)\n-{20,}", outtext, re.DOTALL)
+        if mobj:
+            inputtext = mobj.group(1)
+
+            rem_match = re.search(r"\$rem\s*\n([^\$]+)\n\s*\$end", inputtext, re.DOTALL | re.IGNORECASE)
+            if rem_match:
+                rem_text = rem_match.group(1)
+                lines = rem_text.split("\n")
+                keywords = {}
+                for line in lines:
+                    s = re.sub(r"(^|[^\\])!.*", "", line).split()
+                    if len(s) == 0:
+                        continue
+                    keywords[s[0].lower()] = s[1].lower()
+                input_dict["model"] = {}
+                input_dict["model"]["method"] = keywords.pop("method").lower()
+                input_dict["model"]["basis"] = keywords.pop("basis").lower()
+                if "jobtype" in keywords:
+                    jobtype = keywords.pop("jobtype")
+                else:
+                    jobtype = "sp"
+                _jobtype_to_driver = {
+                    "sp": "energy",
+                    "force": "gradient",
+                    "freq": "hessian",
+                }  # optimization intentionally not supported
+                try:
+                    input_dict["driver"] = _jobtype_to_driver[jobtype]
+                except KeyError:
+                    raise KeyError(f"Jobtype {jobtype} not supported in qchem log file parser.")
+
+                for key in keywords:
+                    if keywords[key] == "false":
+                        keywords[key] = False
+                    if keywords[key] == "true":
+                        keywords[key] = True
+                input_dict["keywords"] = keywords
+
+            molecule_match = re.search(r"\$molecule\s*\n([^\$]+)\n\s*\$end", inputtext, re.DOTALL | re.IGNORECASE)
+            if molecule_match:
+                molecule_text = molecule_match.group(1)
+                if keywords.get("input_bohr", False):
+                    molecule_text += "\nunits au"
+                molecule = Molecule.from_data(molecule_text, dtype="psi4")
+                input_dict["molecule"] = molecule.dict()
+
+            _excluded_rem = {
+                "input_bohr",
+                "mem_total",
+                "mem_static",
+            }  # parts of the rem normally written by build_input, and which do not affect results
+            for item in _excluded_rem:
+                if item in input_dict["keywords"]:
+                    input_dict["keywords"].pop(item)
+
+        try:
+            qcscr_result = self.parse_output(outfiles, AtomicInput(**input_dict)).dict()
+        except KeyError:
+            props, prov = self._parse_logfile_common(outtext, input_dict)
+            qcscr_result = {"properties": props, "provenance": prov, **input_dict}
+
+        mobj = re.search(r"\n\s*Total\s+energy in the final basis set =\s+" + NUMBER + r"\s*\n", outtext)
+        if mobj and qcscr_result["properties"].get("scf_total_energy", None) is None:
+            qcscr_result["properties"]["scf_total_energy"] = float(mobj.group(1))
+
+        mobj = re.search(r"\n\s*Nuclear Repulsion Energy =\s+" + NUMBER + r"\s+hartrees\s*\n", outtext)
+        if mobj and qcscr_result["properties"].get("nuclear_repulsion_energy", None) is None:
+            qcscr_result["properties"]["nuclear_repulsion_energy"] = float(mobj.group(1))
+
+        mobj = re.search(r"\n\s*RI-MP2 TOTAL ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
+        if mobj and qcscr_result["properties"].get("mp2_total_energy", None) is None:
+            qcscr_result["properties"]["mp2_total_energy"] = float(mobj.group(1))
+
+        _mp2_methods = {"rimp2"}
+
+        method = input_dict["model"]["method"].lower()
+        if qcscr_result["properties"].get("return_energy", None) is None:
+            if method in _mp2_methods:
+                qcscr_result["properties"]["return_energy"] = qcscr_result["properties"]["mp2_total_energy"]
+            elif method in _scf_methods:
+                qcscr_result["properties"]["return_energy"] = qcscr_result["properties"]["scf_total_energy"]
+            else:
+                raise NotImplementedError(f"Method {method} not supported by logfile parser for energy driver.")
+
+        if input_dict["driver"] == "gradient" and qcscr_result.get("return_result", None) is None:
+
+            def read_matrix(text):
+                lines = text.split("\n")
+                i = 0
+                mdict = defaultdict(dict)
+                maxcol = 0
+                maxrow = 0
+                while i < len(lines):
+                    cols = [int(idx) for idx in lines[i].split()]
+                    maxcol = max(maxcol, *cols)
+                    i += 1
+                    while i < len(lines):
+                        s = lines[i].split()
+                        if len(s) <= len(cols):
+                            break
+                        assert len(s) == len(cols) + 1, s
+                        row = int(s[0])
+                        maxrow = max(maxrow, row)
+                        data = [float(field) for field in s[1:]]
+                        for col_idx, col in enumerate(cols):
+                            mdict[row - 1][col - 1] = data[col_idx]
+                        i += 1
+
+                ret = np.zeros((maxrow, maxcol))
+                for row in mdict:
+                    for col in mdict[row]:
+                        ret[row, col] = mdict[row][col]
+                return ret
+
+            if method in _mp2_methods:
+                mobj = re.search(
+                    r"\n\s+Full Analytical Gradient of MP2 Energy \(in au.\)\s*\n"
+                    r"([\s\d\.EDed\+\-]+)\n"
+                    r"\s*Gradient time:",
+                    outtext,
+                )
+
+            elif method in _scf_methods:
+                mobj = re.search(
+                    r"\n\s+Gradient of SCF Energy\s*\n([\s\d\.EDed\+\-]+)\n\s*Max gradient component =", outtext
+                )
+
+            else:
+                raise NotImplementedError(f"Method {method} not supported by the logfile parser for gradient driver.")
+
+            if mobj:
+                qcscr_result["return_result"] = read_matrix(mobj.group(1)).T
+
+        qcscr_result["success"] = True  # XXX: have a nice day?
+        qcscr_result["stdout"] = outtext
+        if input_dict["driver"] == "energy" and qcscr_result.get("return_result", None) is None:
+            qcscr_result["return_result"] = qcscr_result["properties"]["return_energy"]
+
+        return AtomicResult(**qcscr_result)

From 70596ae0fd076f9ea14e8aef8ea0007bac188d63 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Nov 2023 09:42:11 -0600
Subject: [PATCH 02/36] Update gaussian.py

---
 qcengine/programs/gaussian.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 6fdaf1534..cc2694059 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -97,7 +97,7 @@ def build_input(
             "scratch_directory": config.scratch_directory
             }
 
-        input_file = '''%mem=6000000
+        input_file = '''%mem=20MW
 #P HF/6-31G(d) scf=tight
 
 test1 HF/6-31G(d) sp formaldehyde

From b4c5d3023aa0967286fc84073659e562209cbfa9 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:29:28 -0600
Subject: [PATCH 03/36] Cleaning gaussian.py

---
 qcengine/programs/gaussian.py | 219 ----------------------------------
 1 file changed, 219 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index cc2694059..1968c0dea 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -167,222 +167,3 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         print(merged_data)
 
         return AtomicResult(**merged_data)
-
-    # DELETE THIS FUNC
-    def _parse_logfile_common(self, outtext: str, input_dict: Dict[str, Any]):
-        """
-        Parses fields from log file that are not parsed from QCSCRATCH in parse_output
-        """
-
-        properties = {}
-        provenance = Provenance(creator="QChem", version=self.get_version(), routine="qchem").dict()
-        mobj = re.search(r"This is a multi-thread run using ([0-9]+) threads", outtext)
-        if mobj:
-            provenance["nthreads"] = int(mobj.group(1))
-
-        mobj = re.search(r"Total job time:\s*" + NUMBER + r"s\(wall\)", outtext)
-        if mobj:
-            provenance["wall_time"] = float(mobj.group(1))
-
-        mobj = re.search(r"Archival summary:\s*\n[0-9]+\\[0-9+]\\([\w\.\-]+)\\", outtext)
-        if mobj:
-            provenance["hostname"] = mobj.group(1)
-
-        mobj = re.search(r"\n\s*There are\s+(\d+) alpha and\s+(\d+) beta electrons\s*\n", outtext)
-        if mobj:
-            properties["calcinfo_nalpha"] = int(mobj.group(1))
-            properties["calcinfo_nbeta"] = int(mobj.group(2))
-
-        mobj = re.search(r"\n\s*There are\s+\d+ shells and\s+(\d+) basis functions\s*\n", outtext)
-        if mobj:
-            properties["calcinfo_nbasis"] = int(mobj.group(1))
-
-        mobj = re.search(r"\n\s*RI-MP2 CORRELATION ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        if mobj:
-            properties["mp2_correlation_energy"] = float(mobj.group(1))
-
-        mobj = re.search(r"\n\s*RI-MP2 SINGLES ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        if mobj:
-            properties["mp2_singles_energy"] = float(mobj.group(1))
-
-        mobj_aaaa = re.search(r"\n\s*RI-MP2 ENERGY \(aa\|aa\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        mobj_bbbb = re.search(r"\n\s*RI-MP2 ENERGY \(bb\|bb\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        if mobj_aaaa and mobj_bbbb:
-            properties["mp2_same_spin_correlation_energy"] = float(mobj_aaaa.group(1)) + float(mobj_bbbb.group(1))
-
-        mobj_aabb = re.search(r"\n\s*RI-MP2 ENERGY \(aa\|bb\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        mobj_bbaa = re.search(r"\n\s*RI-MP2 ENERGY \(bb\|aa\)\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        if mobj_aaaa and mobj_bbbb:
-            properties["mp2_opposite_spin_correlation_energy"] = float(mobj_aabb.group(1)) + float(mobj_bbaa.group(1))
-
-        properties["calcinfo_natom"] = len(input_dict["molecule"]["symbols"])
-
-        mobj = re.search(r"\n\s*(\d+)\s+" + NUMBER + r"\s+" + NUMBER + r"\s+Convergence criterion met\s*\n", outtext)
-        if mobj:
-            properties["scf_iterations"] = int(mobj.group(1))
-
-        mobj = re.search(
-            r"\n\s+Dipole Moment \(Debye\)\s*\n\s+X\s+" + NUMBER + r"\s+Y\s+" + NUMBER + r"\s+Z\s+" + NUMBER + r"\s*\n",
-            outtext,
-        )
-        if mobj:
-            cf = constants.conversion_factor("debye", "e * bohr")
-            properties["scf_dipole_moment"] = [float(mobj.group(i)) * cf for i in range(1, 4)]
-
-        return properties, provenance
-
-    def parse_logfile(self, outfiles: Dict[str, str]) -> AtomicResult:
-        """
-        Parses a log file.
-        """
-        warnings.warn(
-            "parse_logfile will result in precision loss for some fields due to trunctation in " "Q-Chem output files."
-        )
-
-        outtext = outfiles["dispatch.log"]
-
-        mobj = re.search(r"(?:User input\:|Running Job?)\s+\d+\s+of\s+(\d+)", outtext)
-        if mobj:
-            if int(mobj.group(1)) > 1:
-                raise InputError("Multi-job Q-Chem log files not supported.")
-
-        input_dict = {}
-        mobj = re.search(r"\n-{20,}\nUser input:\n-{20,}\n(.+)\n-{20,}", outtext, re.DOTALL)
-        if mobj:
-            inputtext = mobj.group(1)
-
-            rem_match = re.search(r"\$rem\s*\n([^\$]+)\n\s*\$end", inputtext, re.DOTALL | re.IGNORECASE)
-            if rem_match:
-                rem_text = rem_match.group(1)
-                lines = rem_text.split("\n")
-                keywords = {}
-                for line in lines:
-                    s = re.sub(r"(^|[^\\])!.*", "", line).split()
-                    if len(s) == 0:
-                        continue
-                    keywords[s[0].lower()] = s[1].lower()
-                input_dict["model"] = {}
-                input_dict["model"]["method"] = keywords.pop("method").lower()
-                input_dict["model"]["basis"] = keywords.pop("basis").lower()
-                if "jobtype" in keywords:
-                    jobtype = keywords.pop("jobtype")
-                else:
-                    jobtype = "sp"
-                _jobtype_to_driver = {
-                    "sp": "energy",
-                    "force": "gradient",
-                    "freq": "hessian",
-                }  # optimization intentionally not supported
-                try:
-                    input_dict["driver"] = _jobtype_to_driver[jobtype]
-                except KeyError:
-                    raise KeyError(f"Jobtype {jobtype} not supported in qchem log file parser.")
-
-                for key in keywords:
-                    if keywords[key] == "false":
-                        keywords[key] = False
-                    if keywords[key] == "true":
-                        keywords[key] = True
-                input_dict["keywords"] = keywords
-
-            molecule_match = re.search(r"\$molecule\s*\n([^\$]+)\n\s*\$end", inputtext, re.DOTALL | re.IGNORECASE)
-            if molecule_match:
-                molecule_text = molecule_match.group(1)
-                if keywords.get("input_bohr", False):
-                    molecule_text += "\nunits au"
-                molecule = Molecule.from_data(molecule_text, dtype="psi4")
-                input_dict["molecule"] = molecule.dict()
-
-            _excluded_rem = {
-                "input_bohr",
-                "mem_total",
-                "mem_static",
-            }  # parts of the rem normally written by build_input, and which do not affect results
-            for item in _excluded_rem:
-                if item in input_dict["keywords"]:
-                    input_dict["keywords"].pop(item)
-
-        try:
-            qcscr_result = self.parse_output(outfiles, AtomicInput(**input_dict)).dict()
-        except KeyError:
-            props, prov = self._parse_logfile_common(outtext, input_dict)
-            qcscr_result = {"properties": props, "provenance": prov, **input_dict}
-
-        mobj = re.search(r"\n\s*Total\s+energy in the final basis set =\s+" + NUMBER + r"\s*\n", outtext)
-        if mobj and qcscr_result["properties"].get("scf_total_energy", None) is None:
-            qcscr_result["properties"]["scf_total_energy"] = float(mobj.group(1))
-
-        mobj = re.search(r"\n\s*Nuclear Repulsion Energy =\s+" + NUMBER + r"\s+hartrees\s*\n", outtext)
-        if mobj and qcscr_result["properties"].get("nuclear_repulsion_energy", None) is None:
-            qcscr_result["properties"]["nuclear_repulsion_energy"] = float(mobj.group(1))
-
-        mobj = re.search(r"\n\s*RI-MP2 TOTAL ENERGY\s+=\s+" + NUMBER + r"\s+au\s*\n", outtext)
-        if mobj and qcscr_result["properties"].get("mp2_total_energy", None) is None:
-            qcscr_result["properties"]["mp2_total_energy"] = float(mobj.group(1))
-
-        _mp2_methods = {"rimp2"}
-
-        method = input_dict["model"]["method"].lower()
-        if qcscr_result["properties"].get("return_energy", None) is None:
-            if method in _mp2_methods:
-                qcscr_result["properties"]["return_energy"] = qcscr_result["properties"]["mp2_total_energy"]
-            elif method in _scf_methods:
-                qcscr_result["properties"]["return_energy"] = qcscr_result["properties"]["scf_total_energy"]
-            else:
-                raise NotImplementedError(f"Method {method} not supported by logfile parser for energy driver.")
-
-        if input_dict["driver"] == "gradient" and qcscr_result.get("return_result", None) is None:
-
-            def read_matrix(text):
-                lines = text.split("\n")
-                i = 0
-                mdict = defaultdict(dict)
-                maxcol = 0
-                maxrow = 0
-                while i < len(lines):
-                    cols = [int(idx) for idx in lines[i].split()]
-                    maxcol = max(maxcol, *cols)
-                    i += 1
-                    while i < len(lines):
-                        s = lines[i].split()
-                        if len(s) <= len(cols):
-                            break
-                        assert len(s) == len(cols) + 1, s
-                        row = int(s[0])
-                        maxrow = max(maxrow, row)
-                        data = [float(field) for field in s[1:]]
-                        for col_idx, col in enumerate(cols):
-                            mdict[row - 1][col - 1] = data[col_idx]
-                        i += 1
-
-                ret = np.zeros((maxrow, maxcol))
-                for row in mdict:
-                    for col in mdict[row]:
-                        ret[row, col] = mdict[row][col]
-                return ret
-
-            if method in _mp2_methods:
-                mobj = re.search(
-                    r"\n\s+Full Analytical Gradient of MP2 Energy \(in au.\)\s*\n"
-                    r"([\s\d\.EDed\+\-]+)\n"
-                    r"\s*Gradient time:",
-                    outtext,
-                )
-
-            elif method in _scf_methods:
-                mobj = re.search(
-                    r"\n\s+Gradient of SCF Energy\s*\n([\s\d\.EDed\+\-]+)\n\s*Max gradient component =", outtext
-                )
-
-            else:
-                raise NotImplementedError(f"Method {method} not supported by the logfile parser for gradient driver.")
-
-            if mobj:
-                qcscr_result["return_result"] = read_matrix(mobj.group(1)).T
-
-        qcscr_result["success"] = True  # XXX: have a nice day?
-        qcscr_result["stdout"] = outtext
-        if input_dict["driver"] == "energy" and qcscr_result.get("return_result", None) is None:
-            qcscr_result["return_result"] = qcscr_result["properties"]["return_energy"]
-
-        return AtomicResult(**qcscr_result)

From 1aa3522a1e34be19841e1c49f8497e0a4a476bab Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 30 Nov 2023 09:33:36 -0600
Subject: [PATCH 04/36] Removed the empty line

---
 qcengine/programs/gaussian.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 1968c0dea..c47658390 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -114,8 +114,7 @@ def build_input(
 a3=120.
 a4=120.
 d4=180.
-
-        '''
+'''
         gaussian_ret['infiles']["input.inp"] = input_file
 
         return gaussian_ret

From 10ae387cab8b1e8a53a4e7afbd8760328937a16a Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Tue, 12 Dec 2023 10:16:50 -0600
Subject: [PATCH 05/36] implement cclib into gaussian.py

---
 qcengine/programs/gaussian.py | 68 ++++++++++++++++++++---------------
 1 file changed, 40 insertions(+), 28 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index c47658390..16ff89503 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -1,5 +1,5 @@
 """
-Calls the GAUSSIAN excutable
+Calls the GAUSSIAN executable
 """
 
 import os
@@ -8,6 +8,8 @@
 import warnings
 from collections import defaultdict
 from typing import Any, Dict, List, Optional, Tuple
+import cclib
+from cclib.method import Nuclear
 
 import numpy as np
 from qcelemental import constants
@@ -42,7 +44,7 @@ def found(self, raise_error: bool = False) -> bool:
             "g09",
             return_bool=True,
             raise_error=raise_error,
-            raise_msg="Please install Gaussian. Check it's in your PATH with `which g09`.",
+            raise_msg="Please install Gaussian. Check it's in your PATH with `which g09`."
         )
 
     def get_version(self) -> str:
@@ -55,17 +57,6 @@ def get_version(self) -> str:
         if which_prog not in self.version_cache:
             success, output = execute([which_prog, "v.com"], {"v.com": ""})
 
-            #mobj = re.search(r"Gaussian\s+([\d.]+)\s+for", exc["stdout"])
-            #if not mobj:
-            #    mobj = re.search(r"Gaussian version:\s+([\d.]+)\s+", exc["stdout"])
-
-            #if mobj:
-            #    self.version_cache[which_prog] = safe_version(mobj.group(1))
-
-            # if "QC not defined" in exc["stdout"]:
-            #else:
-            #    return safe_version("09")
-
         return self.version_cache[which_prog]
 
     def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResult":
@@ -82,18 +73,28 @@ def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResu
         exe_success, proc = self.execute(job_inputs)
 
         # Determine whether the calculation succeeded
-        print("SUCCESS", exe_success)
         if exe_success:
             # If execution succeeded, collect results
-            return self.parse_output(proc["outfiles"], input_model)
-
+            result = self.parse_output(proc, input_model)
+            
+            return result
+ 
+        else:
+            proc['outfiles']['stderr'] = proc['outfiles']['output.log']
+            outfile = proc['outfiles']['output.log']
+            
+            if 'Error termination via ' in outfile:
+                raise InputError(proc['outfiles']['output.log'])
+            else:
+                # Return UnknownError for error propagation
+                raise UnknownError(proc["outfiles"]["output.log"])
 
     def build_input(
         self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
     ) -> Dict[str, Any]:
         gaussian_ret = {
             "infiles": {},
-            "commands": [which("g09"),  "input.inp"],
+            "commands": [which("g09"),  "input.inp", "output.log"],
             "scratch_directory": config.scratch_directory
             }
 
@@ -115,24 +116,35 @@ def build_input(
 a4=120.
 d4=180.
 '''
-        gaussian_ret['infiles']["input.inp"] = input_file
+        gaussian_ret['infiles']['input.inp'] = input_file
 
         return gaussian_ret
 
-    def execute(self, inputs, extra_outfiles=None, extra_commands=None, scratch_name=None, timeout=None):
+    def execute(self,
+                inputs,
+                extra_outfiles: Optional[Dict[str, str]] = None,
+                extra_commands: Optional[List[str]] = None,
+                scratch_name = None,
+                timeout: Optional[int] = None
+               ):
 
         success, dexe = execute(
-        inputs["commands"],
-        inputs["infiles"],
-        )
-        print(dexe['proc'].stdout.read())
-        print(dexe['proc'].stderr.read())
+            inputs["commands"],
+            inputs["infiles"],
+            outfiles = ['output.log'],
+            scratch_messy = True
+            )
+        
+        if (dexe['outfiles']['output.log'] is None) or (
+           'Error termination via' in dexe['outfiles']['output.log']):
+           print ('THERE IS AN ERROR!')
+           
+           success = False
+        
         return success, dexe
-
+ 
     def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:
-        print("IN PARSE OUTPUT")
-
-        #output_data = {}
+        output_data = {}
         stdout = outfiles.pop('stdout')
         stderr = outfiles.pop('stderr')
 

From 11fc09784e37253efe7e027e422a4a828e43a647 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 20 Dec 2023 14:42:44 -0600
Subject: [PATCH 06/36] Convert QCElemental inputs to Gaussian inputs

---
 qcengine/programs/gaussian.py | 48 ++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 16ff89503..cda8ca6fb 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -92,32 +92,34 @@ def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResu
     def build_input(
         self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
     ) -> Dict[str, Any]:
+
+        # Build keywords
+        keywords = {k.upper(): v for k, v in input_model.keywords.items()}
+        keywords['MEM_TOTAL'] = str(int(config.memory * 1024 / 100))
+
+        # Begin input file
+        input_file = []
+        input_file.append('%mem={}MW'.format(keywords['MEM_TOTAL]))
+        input_file.append("#P HF/6-31G(d) scf=tight\n")
+        input_file.append("write your comment here\n")
+  
+        # Create a mol object
+        mol = input_model.molecule
+        input_file.append(f'{int(mol.molecular_charge)} {mol.molecular_multiplicity}')
+
+        # Write the geometry
+        for real, sym, geom in zip(mol.real, mol.symbols, mol.geometry):
+            if real is False:
+                raise InputError("Cannot handle ghost atoms yet.")
+            input_file.append(f"{sym} {geom[0]:14.8f} {geom[1]:14.8f} {geom[2]:14.8f}")
+        input_file.append("\n")
+
         gaussian_ret = {
-            "infiles": {},
-            "commands": [which("g09"),  "input.inp", "output.log"],
-            "scratch_directory": config.scratch_directory
+            'infiles': {'input.inp': '\n'.join(input_file)},
+            'commands': [which("g09"),  'input.inp', 'output.log'],
+            'scratch_directory": config.scratch_directory
             }
 
-        input_file = '''%mem=20MW
-#P HF/6-31G(d) scf=tight
-
-test1 HF/6-31G(d) sp formaldehyde
-
-0 1
-C1
-O2  1  r2
-H3  1  r3  2  a3
-H4  1  r4  2  a4  3  d4
-
-r2=1.20
-r3=1.0
-r4=1.0
-a3=120.
-a4=120.
-d4=180.
-'''
-        gaussian_ret['infiles']['input.inp'] = input_file
-
         return gaussian_ret
 
     def execute(self,

From ee0dcb4445578ecc6e71a0ee168c69fedfc30657 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 20 Dec 2023 14:45:25 -0600
Subject: [PATCH 07/36] Convert QCElemental inputs to Gaussian inputs

---
 qcengine/programs/gaussian.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index cda8ca6fb..03164ee31 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -117,7 +117,7 @@ def build_input(
         gaussian_ret = {
             'infiles': {'input.inp': '\n'.join(input_file)},
             'commands': [which("g09"),  'input.inp', 'output.log'],
-            'scratch_directory": config.scratch_directory
+            'scratch_directory': config.scratch_directory
             }
 
         return gaussian_ret

From f93420c1b535cefaf4bcbc656fc22782f50382ef Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 10 Jan 2024 12:35:05 -0600
Subject: [PATCH 08/36] Add input generator

---
 qcengine/programs/gaussian.py | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 03164ee31..b97871667 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -95,12 +95,25 @@ def build_input(
 
         # Build keywords
         keywords = {k.upper(): v for k, v in input_model.keywords.items()}
-        keywords['MEM_TOTAL'] = str(int(config.memory * 1024 / 100))
+        keywords = {'scf_damp': 'true',
+                    'scf_diis': 'false'
+                    }
+        if input_model.driver == "energy":
+            keywords["JOBTYPE"] = "sp"
+        elif input_model.driver == "gradient":
+            keywords["JOBTYPE"] = "force"
+        elif input_model.driver == "hessian":
+            keywords["JOBTYPE"] = "freq"
+        else:
+            raise InputError(f"Driver {input_model.driver} not implemented for Gaussian.")
 
+        if input_model.molecule.fix_com or input_model.molecule.fix_orientation:
+            keywords["SYM_IGNORE"] = "TRUE"
+        
         # Begin input file
         input_file = []
-        input_file.append('%mem={}MW'.format(keywords['MEM_TOTAL]))
-        input_file.append("#P HF/6-31G(d) scf=tight\n")
+        input_file.append('%mem={}MW'.format(int(config.memory * 1024 / 100))
+        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + '\n')
         input_file.append("write your comment here\n")
   
         # Create a mol object
@@ -110,8 +123,8 @@ def build_input(
         # Write the geometry
         for real, sym, geom in zip(mol.real, mol.symbols, mol.geometry):
             if real is False:
-                raise InputError("Cannot handle ghost atoms yet.")
-            input_file.append(f"{sym} {geom[0]:14.8f} {geom[1]:14.8f} {geom[2]:14.8f}")
+                raise InputError('Cannot handle ghost atoms yet.')
+            input_file.append(f'{sym} {geom[0]:14.8f} {geom[1]:14.8f} {geom[2]:14.8f}')
         input_file.append("\n")
 
         gaussian_ret = {

From 305015550e64c587f26e22774d4b64e0222eaa77 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 10 Jan 2024 12:41:18 -0600
Subject: [PATCH 09/36] Improved the parse_output function

---
 qcengine/programs/gaussian.py | 82 +++++++++++++++++++++++++----------
 1 file changed, 59 insertions(+), 23 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index b97871667..12199e2ff 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -160,36 +160,72 @@ def execute(self,
  
     def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:
         output_data = {}
+        properties = {}
+        
+        tmp_output_path = outfiles['scratch_directory']
+        tmp_output_file = os.path.join(tmp_output_path, 'output.log')
+        data = cclib.io.ccread(tmp_output_file)
+        
+        last_occupied_energy = data.moenergies[0][data.homos[0]]
+        output_data['HOMO ENERGY'] = last_occupied_energy
+        #print (F'HOMO ENERGY: {last_occupied_energy:2.6f} eV')
+        
+        scf_energy = data.scfenergies[0]
+        output_data['SCF ENERGY'] = scf_energy
+        print (F'SCF ENERGY: {scf_energy:3.6f} eV')
+        
+        #if input_model.driver == 'energy':
+           #output_data['return_result'] = 
+        #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing
+        
+        #if input_model.driver == 'energy':
+        #   print (cclib.__version__)
+        #   print (output_data)
+        #print (input_model)
+        
+        #provenance = Provenance(creator="Gaussian", version=self.get_version(), routine="g09").dict()
+        
+        #properties = {
+        #    'nuclear_repulsion_energy': Nuclear(data).repulsion_energy(),
+        #    'scf_total_energy': data.scfenergies[0],
+        #    'return_energy': data.scfenergies[0]
+        #    }
+
+        outtext = ''
+        cnt = 0
+        f = open(os.path.join(tmp_output_path, 'output.log'), 'r')
+        outtext = f.readlines()
+        for num, line in enumerate(outtext, 1):
+           if 'Version=' in line:
+              version_line = line.split('Version=')[-1]
+              version_line = version_line.strip()
+              version_line = version_line.split('\\')[0]
+              print ('version : ', version_line)
+              #cnt = num + 1
+           #if cnt == num:
+              #version_line += line.split("\\")[0]
+              #print ('version is: ', version_line)
+              
+
+        provenance = Provenance(creator="Gaussian 09", version=self.get_version(), routine='g09').dict()
+        print ('we are in mobj')
+        mobj = re.search(r"^Job cpu time:*seconds.$", outfiles['output.log'])
+        print ('we are in mobj')
+        if (mobj):
+           print ('mobj true')
+
         stdout = outfiles.pop('stdout')
         stderr = outfiles.pop('stderr')
-
+        #print("\nPRINT STDOUT: \n", stdout)
+        
         method = input_model.model.method.lower()
         #method = method[4:] if method.startswith("") else method
 
-        try:
-            qcvars, gaussianmol, module = harvest(
-                input_model.molecule, method, stdout, **outfiles
-            )
-        except:
-            pass
-
-        #properties = {
-        #    "nuclear_repulsion_energy": bdata['99.0'][0],
-        #    "scf_total_energy": bdata["99.0"][1],
-        #    "return_energy": bdata["99.0"][-1],
-        #}
-
-        qcvars = {}
-
-        #props, prov = self._parse_logfile_common(outtext, input_model.dict())
-        #output_data["provenance"] = prov
-        #output_data["properties"] = properties
-        #output_data["properties"].update(props)
-        output_data["stdout"] = stdout
+        #output_data["stdout"] = stdout
         output_data["success"] = True
 
         merged_data = {**input_model.dict(), **output_data}
-        #merged_data["extras"]["qcvars"] = qcvars
-        print(merged_data)
+
+        print('\nPRINT MERGED DATA: \n', merged_data)
 
         return AtomicResult(**merged_data)

From b1097be92186e8f77e0354a91662eb13e86325dc Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 18 Jan 2024 09:25:42 -0600
Subject: [PATCH 10/36] Add versioning

---
 qcengine/programs/gaussian.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 12199e2ff..29b0b3773 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -50,13 +50,30 @@ def found(self, raise_error: bool = False) -> bool:
     def get_version(self) -> str:
         self.found(raise_error=True)
 
-        # Get the node configuration
-        #config = get_config()
-
         which_prog = which("g09")
-        if which_prog not in self.version_cache:
-            success, output = execute([which_prog, "v.com"], {"v.com": ""})
+        v_input = '''%mem=20MW
+#P HF/sto-3g
+
+#test HF/sto-3g for H atom
 
+0 2
+H
+
+'''
+        if which_prog not in self.version_cache:
+            success, output = execute([which_prog, 'v.com', 'v.log'],
+                                      {'v.com': v_input},
+                                      ['v.log']
+                                     )
+            if success:
+                outtext = output['outfiles']['v.log']
+                outtext = outtext.splitlines()
+                for line in outtext:
+                    if 'Gaussian 09' in line:
+                        version_line = line.split('Gaussian 09:')[-1]
+                        version_line = version_line.split()[0]
+                        self.version_cache[which_prog] = safe_version(version_line)
+                        
         return self.version_cache[which_prog]
 
     def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResult":

From a5265b651db9c5e87282a3a9d5e7b242092fe10c Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Tue, 23 Jan 2024 11:15:32 -0600
Subject: [PATCH 11/36] Improved parse_output function

---
 qcengine/programs/gaussian.py | 97 +++++++++++++++--------------------
 1 file changed, 41 insertions(+), 56 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 29b0b3773..41aae49ab 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -38,7 +38,6 @@ class GaussianHarness(ProgramHarness):
     class Config(ProgramHarness.Config):
         pass
 
-
     def found(self, raise_error: bool = False) -> bool:
         return which(
             "g09",
@@ -51,6 +50,7 @@ def get_version(self) -> str:
         self.found(raise_error=True)
 
         which_prog = which("g09")
+        
         v_input = '''%mem=20MW
 #P HF/sto-3g
 
@@ -102,6 +102,7 @@ def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResu
             
             if 'Error termination via ' in outfile:
                 raise InputError(proc['outfiles']['output.log'])
+         
             else:
                 # Return UnknownError for error propagation
                 raise UnknownError(proc["outfiles"]["output.log"])
@@ -112,25 +113,31 @@ def build_input(
 
         # Build keywords
         keywords = {k.upper(): v for k, v in input_model.keywords.items()}
-        keywords = {'scf_damp': 'true',
-                    'scf_diis': 'false'
-                    }
+
+        gaussian_kw = []
+
         if input_model.driver == "energy":
-            keywords["JOBTYPE"] = "sp"
+            gaussian_kw.append("sp")
         elif input_model.driver == "gradient":
-            keywords["JOBTYPE"] = "force"
+            gaussian_kw.append("force")
         elif input_model.driver == "hessian":
-            keywords["JOBTYPE"] = "freq"
+            gaussian_kw.append("freq")
         else:
             raise InputError(f"Driver {input_model.driver} not implemented for Gaussian.")
 
-        if input_model.molecule.fix_com or input_model.molecule.fix_orientation:
-            keywords["SYM_IGNORE"] = "TRUE"
-        
+        #if input_model.molecule.fix_com or input_model.molecule.fix_orientation:
+        #    keywords["SYM_IGNORE"] = "TRUE"
+        if 'SCF_CONVERGENCE' in keywords:
+           gaussian_kw.append('SCF=' + keywords["SCF_CONVERGENCE"])
+        if 'POPULATION' in keywords:
+           gaussian_kw.append('Pop=' + keywords['POPULATION'])   
+           
+        keywords = {'scf_damp': 'true',
+                    'scf_diis': 'false'}
         # Begin input file
         input_file = []
-        input_file.append('%mem={}MW'.format(int(config.memory * 1024 / 100))
-        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + '\n')
+        input_file.append('%mem={}MB'.format(int(config.memory * 1024)))
+        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
         input_file.append("write your comment here\n")
   
         # Create a mol object
@@ -147,13 +154,14 @@ def build_input(
         gaussian_ret = {
             'infiles': {'input.inp': '\n'.join(input_file)},
             'commands': [which("g09"),  'input.inp', 'output.log'],
-            'scratch_directory': config.scratch_directory
+            #'scratch_directory': config.scratch_directory
             }
 
         return gaussian_ret
 
     def execute(self,
                 inputs,
+                *,
                 extra_outfiles: Optional[Dict[str, str]] = None,
                 extra_commands: Optional[List[str]] = None,
                 scratch_name = None,
@@ -184,65 +192,42 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         data = cclib.io.ccread(tmp_output_file)
         
         last_occupied_energy = data.moenergies[0][data.homos[0]]
-        output_data['HOMO ENERGY'] = last_occupied_energy
-        #print (F'HOMO ENERGY: {last_occupied_energy:2.6f} eV')
+        #output_data['HOMO ENERGY'] = last_occupied_energy
         
-        scf_energy = data.scfenergies[0]
-        output_data['SCF ENERGY'] = scf_energy
-        print (F'SCF ENERGY: {scf_energy:3.6f} eV')
+        scf_energy = data.scfenergies[0] / 27.21138505  # to Hartree
+        #output_data['SCF ENERGY'] = scf_energy
         
-        #if input_model.driver == 'energy':
-           #output_data['return_result'] = 
+        if input_model.driver == 'energy':
+            output_data['return_result'] = scf_energy
         #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing
-        
+
         #if input_model.driver == 'energy':
-        #   print (cclib.__version__)
+        #   print (cclib.__version__)        
         #   print (output_data)
         #print (input_model)
+
+        properties = {
+            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy(),
+            'scf_total_energy': scf_energy,
+            'return_energy': scf_energy
+            }
         
-        #provenance = Provenance(creator="Gaussian", version=self.get_version(), routine="g09").dict()
-        
-        #properties = {
-        #    'nuclear_repulsion_energy': Nuclear(data).repulsion_energy(),
-        #    'scf_total_energy': data.scfenergies[0],
-        #    'return_energy': data.scfenergies[0]
-        #    }
-
-        outtext = ''
-        cnt = 0
-        f = open(os.path.join(tmp_output_path, 'output.log'), 'r')
-        outtext = f.readlines()
-        for num, line in enumerate(outtext, 1):
-           if 'Version=' in line:
-              version_line = line.split('Version=')[-1]
-              version_line = version_line.strip()
-              version_line = version_line.split('\\')[0]
-              print ('version : ', version_line)
-              #cnt = num + 1
-           #if cnt == num:
-              #version_line += line.split("\\")[0]
-              #print ('version is: ', version_line)
-              
+        output_data['properties'] = properties
+        output_data['stdout'] = outfiles['outfiles']['output.log']
+        output_data['success'] = True
+        #print ('output_data: ', output_data)
 
         provenance = Provenance(creator="Gaussian 09", version=self.get_version(), routine='g09').dict()
-        print ('we are in mobj')
-        mobj = re.search(r"^Job cpu time:*seconds.$", outfiles['output.log'])
-        print ('we are in mobj')
-        if (mobj):
-           print ('mobj true')
 
         stdout = outfiles.pop('stdout')
         stderr = outfiles.pop('stderr')
         #print("\nPRINT STDOUT: \n", stdout)
         
+
         method = input_model.model.method.lower()
         #method = method[4:] if method.startswith("") else method
-
-        #output_data["stdout"] = stdout
-        output_data["success"] = True
-
+        
         merged_data = {**input_model.dict(), **output_data}
 
-        print('\nPRINT MERGED DATA: \n', merged_data)
-
         return AtomicResult(**merged_data)
+

From 5d73c9f6df4445b7fe9b8024e360de6a2d16ba54 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 31 Jan 2024 18:27:53 -0600
Subject: [PATCH 12/36] the parse_output function extracts more data

---
 qcengine/programs/gaussian.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 41aae49ab..6e495cba1 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -184,17 +184,20 @@ def execute(self,
         return success, dexe
  
     def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:
+        
         output_data = {}
         properties = {}
+        cclib_vars = {}
         
         tmp_output_path = outfiles['scratch_directory']
         tmp_output_file = os.path.join(tmp_output_path, 'output.log')
         data = cclib.io.ccread(tmp_output_file)
+        cclib_vars = data.getattributes(True)
         
         last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
         
-        scf_energy = data.scfenergies[0] / 27.21138505  # to Hartree
+        scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
         #output_data['SCF ENERGY'] = scf_energy
         
         if input_model.driver == 'energy':
@@ -217,7 +220,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         output_data['success'] = True
         #print ('output_data: ', output_data)
 
-        provenance = Provenance(creator="Gaussian 09", version=self.get_version(), routine='g09').dict()
+        provenance = Provenance(creator="Gaussian", version=self.get_version(), routine='g09').dict()
 
         stdout = outfiles.pop('stdout')
         stderr = outfiles.pop('stderr')
@@ -226,6 +229,10 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
 
         method = input_model.model.method.lower()
         #method = method[4:] if method.startswith("") else method
+
+        # filter unwanted data
+        to_remove = ['atomnos', 'atomcoords', 'natom']
+        output_data['extras'] = {'cclib': {k:v for k, v in cclib_vars.items() if k not in to_remove}}
         
         merged_data = {**input_model.dict(), **output_data}
 

From 6eb55e49dc896337aad46b539ae3cd675046ff7e Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 14 Feb 2024 09:07:32 -0600
Subject: [PATCH 13/36] Cleaning gaussian.py

---
 qcengine/programs/gaussian.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 6e495cba1..8cf7b08fa 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -61,8 +61,8 @@ def get_version(self) -> str:
 
 '''
         if which_prog not in self.version_cache:
-            success, output = execute([which_prog, 'v.com', 'v.log'],
-                                      {'v.com': v_input},
+            success, output = execute([which_prog, 'v.inp', 'v.log'],
+                                      {'v.inp': v_input},
                                       ['v.log']
                                      )
             if success:
@@ -105,7 +105,7 @@ def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResu
          
             else:
                 # Return UnknownError for error propagation
-                raise UnknownError(proc["outfiles"]["output.log"])
+                raise UnknownError(proc['outfiles']['output.log'])
 
     def build_input(
         self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
@@ -153,7 +153,7 @@ def build_input(
 
         gaussian_ret = {
             'infiles': {'input.inp': '\n'.join(input_file)},
-            'commands': [which("g09"),  'input.inp', 'output.log'],
+            'commands': [which("g09"),  'input.inp', 'output.log']
             #'scratch_directory': config.scratch_directory
             }
 
@@ -169,8 +169,8 @@ def execute(self,
                ):
 
         success, dexe = execute(
-            inputs["commands"],
-            inputs["infiles"],
+            inputs['commands'],
+            inputs['infiles'],
             outfiles = ['output.log'],
             scratch_messy = True
             )

From aad52cf5dad327cd77988212772b54c245753ca7 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Fri, 16 Feb 2024 08:40:01 -0600
Subject: [PATCH 14/36] Create programs.py

---
 qcengine/programs.py | 142 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)
 create mode 100644 qcengine/programs.py

diff --git a/qcengine/programs.py b/qcengine/programs.py
new file mode 100644
index 000000000..0a9b1cc11
--- /dev/null
+++ b/qcengine/programs.py
@@ -0,0 +1,142 @@
+"""
+Imports the various compute backends
+"""
+
+from typing import Set
+
+from ..exceptions import InputError, ResourceError
+from .model import ProgramHarness
+from .adcc import AdccHarness
+from .cfour import CFOURHarness
+from .dftd3 import DFTD3Harness
+from .dftd_ng import DFTD4Harness, SDFTD3Harness
+from .gamess import GAMESSHarness
+from .gcp import GCPHarness, MCTCGCPHarness
+from .molpro import MolproHarness
+from .mopac import MopacHarness
+from .mp2d import MP2DHarness
+from .mrchem import MRChemHarness
+from .nwchem import NWChemHarness
+from .openmm import OpenMMHarness
+from .psi4 import Psi4Harness
+from .qchem import QChemHarness
+from .qcore import EntosHarness, QcoreHarness
+from .rdkit import RDKitHarness
+from .terachem import TeraChemHarness
+from .terachem_frontend import TeraChemFrontEndHarness
+from .terachem_pbs import TeraChemPBSHarness
+from .torchani import TorchANIHarness
+from .turbomole import TurbomoleHarness
+from .xtb import XTBHarness
+from .gaussian import GaussianHarness
+
+
+__all__ = ["register_program", "get_program", "list_all_programs", "list_available_programs"]
+
+programs = {}
+
+
+def register_program(entry_point: ProgramHarness) -> None:
+    """
+    Register a new ProgramHarness with QCEngine.
+    """
+
+    name = entry_point.name
+    if name.lower() in programs.keys():
+        raise ValueError("{} is already a registered program.".format(name))
+
+    programs[name.lower()] = entry_point
+
+
+def unregister_program(name: str) -> None:
+    """
+    Unregisters a given program.
+    """
+
+    ret = programs.pop(name.lower(), None)
+    if ret is None:
+        raise KeyError(f"Program {name} is not registered with QCEngine")
+
+
+def get_program(name: str, check: bool = True) -> ProgramHarness:
+    """
+    Returns a program's executor class
+
+    Parameters
+    ----------
+    check
+        ``True`` Do raise error if program not found. ``False`` is handy for
+        the specialized case of calling non-execution methods (like parsing for testing)
+        on the returned ``Harness``.
+
+    """
+    name = name.lower()
+
+    if name not in programs:
+        raise InputError(f"Program {name} is not registered to QCEngine.")
+
+    ret = programs[name]
+    if check:
+        try:
+            ret.found(raise_error=True)
+        except ModuleNotFoundError as err:
+            raise ResourceError(f"Program {name} is registered with QCEngine, but cannot be found.") from err
+
+    return ret
+
+
+def list_all_programs() -> Set[str]:
+    """
+    List all programs registered by QCEngine.
+    """
+    return set(programs.keys())
+
+
+def list_available_programs() -> Set[str]:
+    """
+    List all programs that can be exectued (found) by QCEngine.
+    """
+
+    ret = set()
+    for k, p in programs.items():
+        if p.found():
+            ret.add(k)
+
+    return ret
+
+
+# Quantum
+register_program(AdccHarness())
+register_program(CFOURHarness())
+register_program(EntosHarness())  # Duplicate of Qcore harness to transition the namespace, to be deprecated
+register_program(GAMESSHarness())
+register_program(MRChemHarness())
+register_program(MolproHarness())
+register_program(NWChemHarness())
+register_program(Psi4Harness())
+register_program(QChemHarness())
+register_program(QcoreHarness())
+register_program(TeraChemHarness())
+register_program(TurbomoleHarness())
+register_program(TeraChemFrontEndHarness())
+register_program(TeraChemPBSHarness())
+register_program(GaussianHarness())
+
+# Semi-empirical
+register_program(MopacHarness())
+register_program(XTBHarness())
+
+# AI
+register_program(TorchANIHarness())
+
+# Molecular Mechanics
+register_program(RDKitHarness())
+register_program(OpenMMHarness())
+
+# Analytical Corrections
+register_program(DFTD3Harness())
+register_program(DFTD4Harness())
+register_program(SDFTD3Harness())
+register_program(GCPHarness())
+register_program(MCTCGCPHarness())
+register_program(MP2DHarness())

From ce55302956780a8dae00af28bea2b8a6cda1bf02 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Fri, 16 Feb 2024 13:30:48 -0600
Subject: [PATCH 15/36] Add gaussian to base.py

---
 qcengine/programs/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/qcengine/programs/base.py b/qcengine/programs/base.py
index 8d9cfc9ff..ebc3f4237 100644
--- a/qcengine/programs/base.py
+++ b/qcengine/programs/base.py
@@ -28,6 +28,7 @@
 from .torchani import TorchANIHarness
 from .turbomole import TurbomoleHarness
 from .xtb import XTBHarness
+from .gaussian import GaussianHarness
 
 __all__ = ["register_program", "get_program", "list_all_programs", "list_available_programs"]
 
@@ -118,6 +119,7 @@ def list_available_programs() -> Set[str]:
 register_program(TurbomoleHarness())
 register_program(TeraChemFrontEndHarness())
 register_program(TeraChemPBSHarness())
+register_program(GaussianHarness())
 
 # Semi-empirical
 register_program(MopacHarness())

From 0ccc8ef9fb27714b99c7c7bb7a0c775a93ce91a9 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Fri, 1 Mar 2024 10:50:07 -0600
Subject: [PATCH 16/36] Delete qcengine/programs.py

---
 qcengine/programs.py | 142 -------------------------------------------
 1 file changed, 142 deletions(-)
 delete mode 100644 qcengine/programs.py

diff --git a/qcengine/programs.py b/qcengine/programs.py
deleted file mode 100644
index 0a9b1cc11..000000000
--- a/qcengine/programs.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-Imports the various compute backends
-"""
-
-from typing import Set
-
-from ..exceptions import InputError, ResourceError
-from .model import ProgramHarness
-from .adcc import AdccHarness
-from .cfour import CFOURHarness
-from .dftd3 import DFTD3Harness
-from .dftd_ng import DFTD4Harness, SDFTD3Harness
-from .gamess import GAMESSHarness
-from .gcp import GCPHarness, MCTCGCPHarness
-from .molpro import MolproHarness
-from .mopac import MopacHarness
-from .mp2d import MP2DHarness
-from .mrchem import MRChemHarness
-from .nwchem import NWChemHarness
-from .openmm import OpenMMHarness
-from .psi4 import Psi4Harness
-from .qchem import QChemHarness
-from .qcore import EntosHarness, QcoreHarness
-from .rdkit import RDKitHarness
-from .terachem import TeraChemHarness
-from .terachem_frontend import TeraChemFrontEndHarness
-from .terachem_pbs import TeraChemPBSHarness
-from .torchani import TorchANIHarness
-from .turbomole import TurbomoleHarness
-from .xtb import XTBHarness
-from .gaussian import GaussianHarness
-
-
-__all__ = ["register_program", "get_program", "list_all_programs", "list_available_programs"]
-
-programs = {}
-
-
-def register_program(entry_point: ProgramHarness) -> None:
-    """
-    Register a new ProgramHarness with QCEngine.
-    """
-
-    name = entry_point.name
-    if name.lower() in programs.keys():
-        raise ValueError("{} is already a registered program.".format(name))
-
-    programs[name.lower()] = entry_point
-
-
-def unregister_program(name: str) -> None:
-    """
-    Unregisters a given program.
-    """
-
-    ret = programs.pop(name.lower(), None)
-    if ret is None:
-        raise KeyError(f"Program {name} is not registered with QCEngine")
-
-
-def get_program(name: str, check: bool = True) -> ProgramHarness:
-    """
-    Returns a program's executor class
-
-    Parameters
-    ----------
-    check
-        ``True`` Do raise error if program not found. ``False`` is handy for
-        the specialized case of calling non-execution methods (like parsing for testing)
-        on the returned ``Harness``.
-
-    """
-    name = name.lower()
-
-    if name not in programs:
-        raise InputError(f"Program {name} is not registered to QCEngine.")
-
-    ret = programs[name]
-    if check:
-        try:
-            ret.found(raise_error=True)
-        except ModuleNotFoundError as err:
-            raise ResourceError(f"Program {name} is registered with QCEngine, but cannot be found.") from err
-
-    return ret
-
-
-def list_all_programs() -> Set[str]:
-    """
-    List all programs registered by QCEngine.
-    """
-    return set(programs.keys())
-
-
-def list_available_programs() -> Set[str]:
-    """
-    List all programs that can be exectued (found) by QCEngine.
-    """
-
-    ret = set()
-    for k, p in programs.items():
-        if p.found():
-            ret.add(k)
-
-    return ret
-
-
-# Quantum
-register_program(AdccHarness())
-register_program(CFOURHarness())
-register_program(EntosHarness())  # Duplicate of Qcore harness to transition the namespace, to be deprecated
-register_program(GAMESSHarness())
-register_program(MRChemHarness())
-register_program(MolproHarness())
-register_program(NWChemHarness())
-register_program(Psi4Harness())
-register_program(QChemHarness())
-register_program(QcoreHarness())
-register_program(TeraChemHarness())
-register_program(TurbomoleHarness())
-register_program(TeraChemFrontEndHarness())
-register_program(TeraChemPBSHarness())
-register_program(GaussianHarness())
-
-# Semi-empirical
-register_program(MopacHarness())
-register_program(XTBHarness())
-
-# AI
-register_program(TorchANIHarness())
-
-# Molecular Mechanics
-register_program(RDKitHarness())
-register_program(OpenMMHarness())
-
-# Analytical Corrections
-register_program(DFTD3Harness())
-register_program(DFTD4Harness())
-register_program(SDFTD3Harness())
-register_program(GCPHarness())
-register_program(MCTCGCPHarness())
-register_program(MP2DHarness())

From 30e2579fd5df995485efa506ff7c258c123e2feb Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 14 Mar 2024 12:08:25 -0500
Subject: [PATCH 17/36] Changed "Gaussian" to "gaussian"

---
 qcengine/programs/gaussian.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 8cf7b08fa..428379c14 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -26,7 +26,7 @@
 class GaussianHarness(ProgramHarness):
 
     _defaults = {
-        "name": "Gaussian",
+        "name": "gaussian",
         "scratch": True,
         "thread_safe": False,
         "thread_parallel": True,
@@ -70,9 +70,10 @@ def get_version(self) -> str:
                 outtext = outtext.splitlines()
                 for line in outtext:
                     if 'Gaussian 09' in line:
-                        version_line = line.split('Gaussian 09:')[-1]
-                        version_line = version_line.split()[0]
-                        self.version_cache[which_prog] = safe_version(version_line)
+                        #version_line = line.split('Gaussian 09:')[-1]
+                        #version_line = version_line.split()[0]
+                        #self.version_cache[which_prog] = safe_version(version_line)
+                        self.version_cache[which_prog] = '2009'
                         
         return self.version_cache[which_prog]
 
@@ -220,7 +221,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         output_data['success'] = True
         #print ('output_data: ', output_data)
 
-        provenance = Provenance(creator="Gaussian", version=self.get_version(), routine='g09').dict()
+        provenance = Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict()
 
         stdout = outfiles.pop('stdout')
         stderr = outfiles.pop('stderr')
@@ -233,6 +234,10 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         # filter unwanted data
         to_remove = ['atomnos', 'atomcoords', 'natom']
         output_data['extras'] = {'cclib': {k:v for k, v in cclib_vars.items() if k not in to_remove}}
+
+        # HACK - scf_values can have NaN
+        # remove for now
+        output_data['extras']['cclib'].pop('scfvalues')
         
         merged_data = {**input_model.dict(), **output_data}
 

From d76c5e7261bc3ca4358b91cf1437729325de74e7 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 11 Apr 2024 12:15:19 -0400
Subject: [PATCH 18/36] Try to handle .fchk file format in gaussian.py

---
 qcengine/programs/gaussian.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 428379c14..ca8f4184d 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -135,10 +135,26 @@ def build_input(
            
         keywords = {'scf_damp': 'true',
                     'scf_diis': 'false'}
+
+        save_fchk = False
+        
+        if input_model.protocols.native_files == 'all':
+            save_fchk = True
+            keywords['native_files'] = 'fcheck'
+            output['native_files'] = {'input': str}
+        if (save_fchk):
+            with open('Test.FChk', 'r') as f:
+                output['native_files']['fchk'] = f.read()
+                       
         # Begin input file
         input_file = []
         input_file.append('%mem={}MB'.format(int(config.memory * 1024)))
-        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
+        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw))
+
+        if (save_fchk):
+            input_file.append(' formcheck')
+
+        input_file.append("\n")
         input_file.append("write your comment here\n")
   
         # Create a mol object
@@ -163,6 +179,7 @@ def build_input(
     def execute(self,
                 inputs,
                 *,
+                extra_infiles: Optional[Dict[str, str]] = None,
                 extra_outfiles: Optional[Dict[str, str]] = None,
                 extra_commands: Optional[List[str]] = None,
                 scratch_name = None,

From 95d16f0646e7e1f9a066d525c823999370b8c283 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Fri, 12 Apr 2024 13:27:09 -0400
Subject: [PATCH 19/36] Handle the fchk file in gaussian.py

---
 qcengine/programs/gaussian.py | 37 +++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index ca8f4184d..8e1bfeb59 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -137,25 +137,16 @@ def build_input(
                     'scf_diis': 'false'}
 
         save_fchk = False
-        
+
         if input_model.protocols.native_files == 'all':
             save_fchk = True
-            keywords['native_files'] = 'fcheck'
-            output['native_files'] = {'input': str}
-        if (save_fchk):
-            with open('Test.FChk', 'r') as f:
-                output['native_files']['fchk'] = f.read()
-                       
+            gaussian_kw.append('formcheck')
+
         # Begin input file
         input_file = []
         input_file.append('%mem={}MB'.format(int(config.memory * 1024)))
-        input_file.append("#P {}/{}".format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw))
-
-        if (save_fchk):
-            input_file.append(' formcheck')
-
-        input_file.append("\n")
-        input_file.append("write your comment here\n")
+        input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
+        input_file.append('write your comment here\n')
   
         # Create a mol object
         mol = input_model.molecule
@@ -168,6 +159,10 @@ def build_input(
             input_file.append(f'{sym} {geom[0]:14.8f} {geom[1]:14.8f} {geom[2]:14.8f}')
         input_file.append("\n")
 
+#        print ('*' * 100)
+#        print ('\n'.join(input_file))
+#        print ('*' * 100)
+
         gaussian_ret = {
             'infiles': {'input.inp': '\n'.join(input_file)},
             'commands': [which("g09"),  'input.inp', 'output.log']
@@ -255,7 +250,19 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         # HACK - scf_values can have NaN
         # remove for now
         output_data['extras']['cclib'].pop('scfvalues')
-        
+
+        if (input_model.protocols.native_files == 'all'):
+            output_data['native_files'] = {}
+            
+            tmp_input_file = os.path.join(tmp_output_path, 'input.inp')
+            with open(tmp_input_file, 'rt') as f:
+                output_data['native_files']['input.inp'] = f.read()
+                
+            # formcheck keyword always creates the Test.FChk file
+            tmp_check_file = os.path.join(tmp_output_path, 'Test.FChk')
+            with open(tmp_check_file, 'rt') as f:
+                output_data['native_files']['gaussian.fchk'] = f.read()
+                
         merged_data = {**input_model.dict(), **output_data}
 
         return AtomicResult(**merged_data)

From 61216dd2f8cab98494478998793d1399087636e5 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 12 Jun 2024 10:53:29 -0400
Subject: [PATCH 20/36] Dependency cclib check

---
 qcengine/programs/gaussian.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 8e1bfeb59..4d74cae20 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -15,7 +15,7 @@
 from qcelemental import constants
 from qcelemental.models import AtomicInput, AtomicResult, Molecule, Provenance
 from qcelemental.molparse import regex
-from qcelemental.util import parse_version, safe_version, which
+from qcelemental.util import parse_version, safe_version, which, which_import
 
 from qcengine.config import TaskConfig, get_config
 
@@ -39,13 +39,37 @@ class Config(ProgramHarness.Config):
         pass
 
     def found(self, raise_error: bool = False) -> bool:
-        return which(
+        '''
+        It checks to see if GaussianHarness is ready for operation
+        
+        Parameters
+        ----------
+        raise_error: bool
+           Passed on to control negative return between False and ModuleNotFoundError raised.
+
+        Returns
+        -------
+        bool
+           If both gaussian and its dependency cclib are found, returns True
+           If gaussian or cclib are missing, returns False.
+           If raise_error is True and gaussian or cclib are missing, the error message for the missing one is raised.
+        '''
+        qc = which(
             "g09",
-            return_bool=True,
-            raise_error=raise_error,
-            raise_msg="Please install Gaussian. Check it's in your PATH with `which g09`."
+            return_bool = True,
+            raise_error = raise_error,
+            raise_msg = "Please install Gaussian. Check it's in your PATH with `which g09`."
         )
 
+        dep = which_import(
+                "cclib",
+                return_bool = True,
+                raise_error = raise_error,
+                raise_msg = "For gaussian harness, please install cclib by typing in `conda install -c conda-forge cclib`."
+                )
+        
+        return qc, dep
+
     def get_version(self) -> str:
         self.found(raise_error=True)
 

From a2ff5c86a2c4310f966148d697ade53420538355 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 12 Jun 2024 12:49:36 -0400
Subject: [PATCH 21/36]  gaussian.py encompasses g09 and g16

---
 qcengine/programs/gaussian.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 4d74cae20..8b4221c82 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -55,12 +55,20 @@ def found(self, raise_error: bool = False) -> bool:
            If raise_error is True and gaussian or cclib are missing, the error message for the missing one is raised.
         '''
         qc = which(
+            "g16",
+            return_bool = True,
+            raise_error = False,
+            raise_msg = "Please install Gaussian. Check it's in your PATH with `which g16`."
+        )
+
+        if not qc:
+            qc = which(
             "g09",
             return_bool = True,
             raise_error = raise_error,
-            raise_msg = "Please install Gaussian. Check it's in your PATH with `which g09`."
+            raise_msg = "Please install Gaussian. Check it's in your PATH with `which g09` or `which g16`."
         )
-
+        
         dep = which_import(
                 "cclib",
                 return_bool = True,
@@ -68,12 +76,12 @@ def found(self, raise_error: bool = False) -> bool:
                 raise_msg = "For gaussian harness, please install cclib by typing in `conda install -c conda-forge cclib`."
                 )
         
-        return qc, dep
+        return qc & dep
 
     def get_version(self) -> str:
         self.found(raise_error=True)
 
-        which_prog = which("g09")
+        #which_prog = which("g09")
         
         v_input = '''%mem=20MW
 #P HF/sto-3g

From ab23bb6b18c1a0f3833d0e6c0ca2611e424dd846 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Mon, 15 Jul 2024 11:10:20 -0400
Subject: [PATCH 22/36] Extract different energy values

---
 qcengine/programs/gaussian.py | 38 +++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 8b4221c82..61fda41f3 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -81,7 +81,9 @@ def found(self, raise_error: bool = False) -> bool:
     def get_version(self) -> str:
         self.found(raise_error=True)
 
-        #which_prog = which("g09")
+        which_prog = which("g16")
+        if which_prog is None:
+            which_prog = which('g09')
         
         v_input = '''%mem=20MW
 #P HF/sto-3g
@@ -106,6 +108,9 @@ def get_version(self) -> str:
                         #version_line = version_line.split()[0]
                         #self.version_cache[which_prog] = safe_version(version_line)
                         self.version_cache[which_prog] = '2009'
+                    
+                    if "Gaussian 16" in line:
+                        self.version_cache[which_prog] = '2016'
                         
         return self.version_cache[which_prog]
 
@@ -242,11 +247,15 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
         
-        scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+        #scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
         #output_data['SCF ENERGY'] = scf_energy
         
         if input_model.driver == 'energy':
             output_data['return_result'] = scf_energy
+        if input_model.driver == 'gradient':
+            output_data['return_result'] = data.grads
+            #output_data['return_gradient'] = data.grads
+        
         #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing
 
         #if input_model.driver == 'energy':
@@ -255,10 +264,27 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         #print (input_model)
 
         properties = {
-            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy(),
-            'scf_total_energy': scf_energy,
-            'return_energy': scf_energy
-            }
+            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
+        }
+
+        if input_model.model.method.lower() == 'hf':
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+            properties['scf_total_energy'] = scf_energy
+            properties['return_energy'] = scf_energy
+            output_data['return_result'] = scf_energy
+
+        if input_model.model.method.lower() == 'mp2':
+            mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+            #print ('mp2 energy is: ', mp2_energy[0])
+            properties['mp2_total_energy'] = mp2_energy[0]
+            properties['return_energy'] = mp2_energy[0]
+            output_data['return_result'] = mp2_energy[0]
+
+        if input_model.model.method.lower() == 'ccsd(t)':
+           cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+           properties['ccsd_prt_pr_total_energy'] = cc_energy
+           properties['return_energy'] = cc_energy
+           output_data['return_result'] = cc_energy           
         
         output_data['properties'] = properties
         output_data['stdout'] = outfiles['outfiles']['output.log']

From 5df82727986e5ffd1d7d6794cf0b300ccab6cb54 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Tue, 16 Jul 2024 12:33:51 -0400
Subject: [PATCH 23/36] reordered the if-else statement

---
 qcengine/programs/gaussian.py | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 61fda41f3..75245835a 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -267,24 +267,27 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
             'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
         }
 
-        if input_model.model.method.lower() == 'hf':
+        if input_model.model.method.lower().startswith('mp'):
             scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+            mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV")
             properties['scf_total_energy'] = scf_energy
-            properties['return_energy'] = scf_energy
-            output_data['return_result'] = scf_energy
-
-        if input_model.model.method.lower() == 'mp2':
-            mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-            #print ('mp2 energy is: ', mp2_energy[0])
             properties['mp2_total_energy'] = mp2_energy[0]
             properties['return_energy'] = mp2_energy[0]
             output_data['return_result'] = mp2_energy[0]
 
-        if input_model.model.method.lower() == 'ccsd(t)':
-           cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-           properties['ccsd_prt_pr_total_energy'] = cc_energy
-           properties['return_energy'] = cc_energy
-           output_data['return_result'] = cc_energy           
+        elif input_model.model.method.lower().startswith('cc'):
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+            cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV")
+            properties['scf_total_energy'] = scf_energy
+            properties['ccsd_prt_pr_total_energy'] = cc_energy
+            properties['return_energy'] = cc_energy
+            output_data['return_result'] = cc_energy           
+
+        else: # input_model.model.method.lower() in ['hf', 'scf']:
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+            properties['scf_total_energy'] = scf_energy
+            properties['return_energy'] = scf_energy
+            output_data['return_result'] = scf_energy
         
         output_data['properties'] = properties
         output_data['stdout'] = outfiles['outfiles']['output.log']

From 6377cb0171bba56c1541152ce96f49ca1805d171 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 16 Jan 2025 08:11:13 -0600
Subject: [PATCH 24/36] Add additional libraries to gaussian.py

---
 qcengine/programs/gaussian.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 75245835a..bf1a3641a 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -6,23 +6,30 @@
 import re
 import tempfile
 import warnings
+import pprint
+import copy
+
 from collections import defaultdict
+
 from typing import Any, Dict, List, Optional, Tuple
+
 import cclib
 from cclib.method import Nuclear
 
-import numpy as np
 from qcelemental import constants
 from qcelemental.models import AtomicInput, AtomicResult, Molecule, Provenance
+from qcelemental.models import OptimizationInput, OptimizationResult, BasisSet
 from qcelemental.molparse import regex
 from qcelemental.util import parse_version, safe_version, which, which_import
 
-from qcengine.config import TaskConfig, get_config
-
 from ..exceptions import InputError, UnknownError
 from ..util import disk_files, execute, temporary_directory
+from .util import error_stamp
 from .model import ProgramHarness
 
+from qcengine.config import TaskConfig, get_config
+from qcengine.procedures.model import ProcedureHarness
+
 class GaussianHarness(ProgramHarness):
 
     _defaults = {

From c7c0c1c81af63d1f05140002c6361828343bd862 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Thu, 16 Jan 2025 12:51:53 -0600
Subject: [PATCH 25/36] Add major updates to most of the methos in gaussian.py

---
 qcengine/programs/gaussian.py | 239 +++++++++++++++++-----------------
 1 file changed, 123 insertions(+), 116 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index bf1a3641a..7fafe5cc9 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -92,69 +92,52 @@ def get_version(self) -> str:
         if which_prog is None:
             which_prog = which('g09')
         
-        v_input = '''%mem=20MW
-#P HF/sto-3g
-
-#test HF/sto-3g for H atom
-
-0 2
-H
-
-'''
-        if which_prog not in self.version_cache:
-            success, output = execute([which_prog, 'v.inp', 'v.log'],
-                                      {'v.inp': v_input},
-                                      ['v.log']
-                                     )
-            if success:
-                outtext = output['outfiles']['v.log']
-                outtext = outtext.splitlines()
-                for line in outtext:
-                    if 'Gaussian 09' in line:
-                        #version_line = line.split('Gaussian 09:')[-1]
-                        #version_line = version_line.split()[0]
-                        #self.version_cache[which_prog] = safe_version(version_line)
-                        self.version_cache[which_prog] = '2009'
-                    
-                    if "Gaussian 16" in line:
-                        self.version_cache[which_prog] = '2016'
-                        
+        self.version_cache[which_prog] = safe_version(which_prog.split('/')[-1])
+        
         return self.version_cache[which_prog]
 
-    def compute(self, input_model: "AtomicInput", config: TaskConfig) -> "AtomicResult":
+    def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> 'AtomicResult':
         """
-        Run Gaussian
+        Run Gaussian program
         """
+        
         # Check if Gaussian executable is found
         self.found(raise_error=True)
-
+        
         # Setup the job
         job_inputs = self.build_input(input_model, config)
-
+        
         # Run Gaussian
-        exe_success, proc = self.execute(job_inputs)
+        success, _exe = self.execute(job_inputs)
+
+        stdin = job_inputs['infiles']['input.inp']
+
+        if isinstance(input_model.model.basis, BasisSet):
+            raise InputError("QCSchema BasisSet for model.basis not implemented. Use string basis name.")
 
         # Determine whether the calculation succeeded
-        if exe_success:
+        if success:
             # If execution succeeded, collect results
-            result = self.parse_output(proc, input_model)
-            
-            return result
- 
+            return self.parse_output(_exe, input_model)
         else:
-            proc['outfiles']['stderr'] = proc['outfiles']['output.log']
-            outfile = proc['outfiles']['output.log']
+            _exe['outfiles']['stderr'] = _exe['outfiles']['output.log']
+            outfile = _exe['outfiles']['output.log']
             
             if 'Error termination via ' in outfile:
-                raise InputError(proc['outfiles']['output.log'])
+                raise InputError(error_stamp(stdin, outfile))
          
             else:
                 # Return UnknownError for error propagation
-                raise UnknownError(proc['outfiles']['output.log'])
+                raise UnknownError(error_stamp(stdin, outfile))
 
     def build_input(
-        self, input_model: AtomicInput, config: TaskConfig, template: Optional[str] = None
-    ) -> Dict[str, Any]:
+            self, input_model: AtomicInput,
+            config: TaskConfig,
+            template: Optional[str] = None) -> Dict[str, Any]:
+
+        if template is None:
+            input_file = []
+            caseless_keywords = {k.lower(): v for k, v in input_model.keywords.items()}
 
         # Build keywords
         keywords = {k.upper(): v for k, v in input_model.keywords.items()}
@@ -188,20 +171,13 @@ def build_input(
 
         # Begin input file
         input_file = []
-        input_file.append('%mem={}MB'.format(int(config.memory * 1024)))
+        input_file.append('%mem={}MB'.format(int(config.memory * 1024))) # In MB
         input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
         input_file.append('write your comment here\n')
   
-        # Create a mol object
-        mol = input_model.molecule
-        input_file.append(f'{int(mol.molecular_charge)} {mol.molecular_multiplicity}')
-
-        # Write the geometry
-        for real, sym, geom in zip(mol.real, mol.symbols, mol.geometry):
-            if real is False:
-                raise InputError('Cannot handle ghost atoms yet.')
-            input_file.append(f'{sym} {geom[0]:14.8f} {geom[1]:14.8f} {geom[2]:14.8f}')
-        input_file.append("\n")
+        # Handle the geometry
+        molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True)
+        input_file.append(molcmd.lstrip())
 
 #        print ('*' * 100)
 #        print ('\n'.join(input_file))
@@ -209,15 +185,15 @@ def build_input(
 
         gaussian_ret = {
             'infiles': {'input.inp': '\n'.join(input_file)},
-            'commands': [which("g09"),  'input.inp', 'output.log']
-            #'scratch_directory': config.scratch_directory
-            }
-
+            'commands': [which("g09"),  'input.inp', 'output.log'],
+            'scratch_directory': config.scratch_directory,
+            'scratch_messy': config.scratch_messy
+        }
+        
         return gaussian_ret
 
     def execute(self,
                 inputs,
-                *,
                 extra_infiles: Optional[Dict[str, str]] = None,
                 extra_outfiles: Optional[Dict[str, str]] = None,
                 extra_commands: Optional[List[str]] = None,
@@ -229,39 +205,56 @@ def execute(self,
             inputs['commands'],
             inputs['infiles'],
             outfiles = ['output.log'],
-            scratch_messy = True
+            scratch_directory = inputs['scratch_directory'],
+            scratch_messy = True #inputs['scratch_messy']
             )
         
-        if (dexe['outfiles']['output.log'] is None) or (
-           'Error termination via' in dexe['outfiles']['output.log']):
-           print ('THERE IS AN ERROR!')
-           
-           success = False
-        
         return success, dexe
  
-    def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> AtomicResult:
-        
-        output_data = {}
-        properties = {}
-        cclib_vars = {}
-        
+    def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') -> 'AtomicResult':
+
         tmp_output_path = outfiles['scratch_directory']
+        print ('tmp_output_path=',tmp_output_path)
         tmp_output_file = os.path.join(tmp_output_path, 'output.log')
+        print ('tmp_output_file=',tmp_output_file)
         data = cclib.io.ccread(tmp_output_file)
-        cclib_vars = data.getattributes(True)
+        print ('DATA=', data)
+        scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+        print ('SCF ENERGY: ', scf_energy)
+        
+        output_data = {
+        	"schema_version": 1,
+        	"molecule": input_model.molecule,
+        	'extras': input_model.extras,
+            "native_files": {k: v for k, v in outfiles.items() if v is not None},
+            'properties': '',
+            'provenance': Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict(),
+            'return_result': '500',
+            "stderr": '100',
+            "stdout": outfiles['outfiles']['output.log'],
+            "success": True,
+        		}
+
+        #output_data = {}
+        #properties = {}
+        #cclib_vars = {}
         
-        last_occupied_energy = data.moenergies[0][data.homos[0]]
+        #tmp_output_path = outfiles['scratch_directory']
+        #tmp_output_file = os.path.join(tmp_output_path, 'output.log')
+        #data = cclib.io.ccread(tmp_output_file)
+        #cclib_vars = data.getattributes(True)
+        
+        #last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
         
         #scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
         #output_data['SCF ENERGY'] = scf_energy
         
-        if input_model.driver == 'energy':
-            output_data['return_result'] = scf_energy
-        if input_model.driver == 'gradient':
-            output_data['return_result'] = data.grads
-            #output_data['return_gradient'] = data.grads
+        #if input_model.driver == 'energy':
+        #    output_data['return_result'] = scf_energy
+        #if input_model.driver == 'gradient':
+        #    output_data['return_result'] = data.grads
+        #    #output_data['return_gradient'] = data.grads
         
         #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing
 
@@ -270,64 +263,65 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
         #   print (output_data)
         #print (input_model)
 
-        properties = {
-            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
-        }
-
-        if input_model.model.method.lower().startswith('mp'):
-            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-            mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV")
-            properties['scf_total_energy'] = scf_energy
-            properties['mp2_total_energy'] = mp2_energy[0]
-            properties['return_energy'] = mp2_energy[0]
-            output_data['return_result'] = mp2_energy[0]
-
-        elif input_model.model.method.lower().startswith('cc'):
-            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
-            cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV")
-            properties['scf_total_energy'] = scf_energy
-            properties['ccsd_prt_pr_total_energy'] = cc_energy
-            properties['return_energy'] = cc_energy
-            output_data['return_result'] = cc_energy           
-
-        else: # input_model.model.method.lower() in ['hf', 'scf']:
-            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
-            properties['scf_total_energy'] = scf_energy
-            properties['return_energy'] = scf_energy
-            output_data['return_result'] = scf_energy
+        #properties = {
+        #    'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
+        #}
+
+        #if input_model.model.method.lower().startswith('mp'):
+        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+        #    mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV")
+        #    properties['scf_total_energy'] = scf_energy
+        #    properties['mp2_total_energy'] = mp2_energy[0]
+        #    properties['return_energy'] = mp2_energy[0]
+        #    output_data['return_result'] = mp2_energy[0]
+
+        #elif input_model.model.method.lower().startswith('cc'):
+        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+        #    cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV")
+        #    properties['scf_total_energy'] = scf_energy
+        #    properties['ccsd_prt_pr_total_energy'] = cc_energy
+        #    properties['return_energy'] = cc_energy
+        #    output_data['return_result'] = cc_energy           
+
+        #else: # input_model.model.method.lower() in ['hf', 'scf']:
+        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+        #    properties['scf_total_energy'] = scf_energy
+        #    properties['return_energy'] = scf_energy
+        #    output_data['return_result'] = scf_energy
         
-        output_data['properties'] = properties
-        output_data['stdout'] = outfiles['outfiles']['output.log']
-        output_data['success'] = True
-        #print ('output_data: ', output_data)
+        #output_data['properties'] = properties
+        #output_data['stdout'] = outfiles['outfiles']['output.log']
+        #output_data['success'] = True
+        ##print ('output_data: ', output_data)
 
-        provenance = Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict()
+        #provenance = Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict()
 
-        stdout = outfiles.pop('stdout')
-        stderr = outfiles.pop('stderr')
+        #stdout = outfiles.pop('stdout')
+        #stderr = outfiles.pop('stderr')
         #print("\nPRINT STDOUT: \n", stdout)
         
 
-        method = input_model.model.method.lower()
+        #method = input_model.model.method.lower()
         #method = method[4:] if method.startswith("") else method
 
         # filter unwanted data
-        to_remove = ['atomnos', 'atomcoords', 'natom']
-        output_data['extras'] = {'cclib': {k:v for k, v in cclib_vars.items() if k not in to_remove}}
+        #to_remove = ['atomnos', 'atomcoords', 'natom']
+        #output_data['extras'] = {'cclib': {k:v for k, v in cclib_vars.items() if k not in to_remove}}
 
         # HACK - scf_values can have NaN
         # remove for now
-        output_data['extras']['cclib'].pop('scfvalues')
+        #output_data['extras']['cclib'].pop('scfvalues')
 
         if (input_model.protocols.native_files == 'all'):
             output_data['native_files'] = {}
-            
             tmp_input_file = os.path.join(tmp_output_path, 'input.inp')
+            
             with open(tmp_input_file, 'rt') as f:
                 output_data['native_files']['input.inp'] = f.read()
                 
             # formcheck keyword always creates the Test.FChk file
             tmp_check_file = os.path.join(tmp_output_path, 'Test.FChk')
+            
             with open(tmp_check_file, 'rt') as f:
                 output_data['native_files']['gaussian.fchk'] = f.read()
                 
@@ -335,3 +329,16 @@ def parse_output(self, outfiles: Dict[str, str], input_model: AtomicInput) -> At
 
         return AtomicResult(**merged_data)
 
+class GaussianDriverProcedure(ProcedureHarness):
+    '''
+    Optimize a geometry
+    '''
+
+    _defaults = {'name': 'GaussianDriver', 'procedure': 'optimization'}
+
+    class Config(ProcedureHarness.Config):
+        pass
+
+    def found(self, raise_error: bool = False) -> bool:
+        gaussian_harness = GaussianHarness()
+        return gaussian_harness(raise_error)

From 39d565abe0d71b594077820f0b7e5376c08d1b0b Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Jan 2025 08:09:02 -0600
Subject: [PATCH 26/36] Update parse_output method

---
 qcengine/programs/gaussian.py | 67 ++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 32 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 7fafe5cc9..6d9774875 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -229,10 +229,10 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
             "native_files": {k: v for k, v in outfiles.items() if v is not None},
             'properties': '',
             'provenance': Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict(),
-            'return_result': '500',
-            "stderr": '100',
+            'return_result': scf_energy,
+            "stderr": outfiles['outfiles']['output.log'],,
             "stdout": outfiles['outfiles']['output.log'],
-            "success": True,
+            "success": True
         		}
 
         #output_data = {}
@@ -242,7 +242,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         #tmp_output_path = outfiles['scratch_directory']
         #tmp_output_file = os.path.join(tmp_output_path, 'output.log')
         #data = cclib.io.ccread(tmp_output_file)
-        #cclib_vars = data.getattributes(True)
+        cclib_vars = data.getattributes(True)
         
         #last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
@@ -263,39 +263,42 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         #   print (output_data)
         #print (input_model)
 
-        #properties = {
-        #    'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
-        #}
-
-        #if input_model.model.method.lower().startswith('mp'):
-        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-        #    mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV")
-        #    properties['scf_total_energy'] = scf_energy
-        #    properties['mp2_total_energy'] = mp2_energy[0]
-        #    properties['return_energy'] = mp2_energy[0]
-        #    output_data['return_result'] = mp2_energy[0]
-
-        #elif input_model.model.method.lower().startswith('cc'):
-        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
-        #    cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV")
-        #    properties['scf_total_energy'] = scf_energy
-        #    properties['ccsd_prt_pr_total_energy'] = cc_energy
-        #    properties['return_energy'] = cc_energy
-        #    output_data['return_result'] = cc_energy           
-
-        #else: # input_model.model.method.lower() in ['hf', 'scf']:
-        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
-        #    properties['scf_total_energy'] = scf_energy
-        #    properties['return_energy'] = scf_energy
-        #    output_data['return_result'] = scf_energy
+        properties = {
+            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
+        }
+
+
+        if input_model.model.method.lower() in ['hf', 'scf']:
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+            properties['scf_total_energy'] = scf_energy
+            properties['return_energy'] = scf_energy
+            output_data['return_result'] = scf_energy
+        
+        if input_model.model.method.lower().startswith('mp'):
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
+            mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV")
+            properties['scf_total_energy'] = scf_energy
+            properties['mp2_total_energy'] = mp2_energy[0]
+            properties['return_energy'] = mp2_energy[0]
+            output_data['return_result'] = mp2_energy[0]
+
+        if input_model.model.method.lower().startswith('cc'):
+            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+            cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV")
+            properties['scf_total_energy'] = scf_energy
+            properties['ccsd_prt_pr_total_energy'] = cc_energy
+            properties['return_energy'] = cc_energy
+            output_data['return_result'] = cc_energy           
+
+        properties['calcinfo_nbasis'] = data.nbasis
+        properties['calcinfo_nmo'] = data.nmo
+        properties['calcinfo_natom'] = data.natom
         
-        #output_data['properties'] = properties
+        output_data['properties'] = properties
         #output_data['stdout'] = outfiles['outfiles']['output.log']
         #output_data['success'] = True
         ##print ('output_data: ', output_data)
 
-        #provenance = Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict()
-
         #stdout = outfiles.pop('stdout')
         #stderr = outfiles.pop('stderr')
         #print("\nPRINT STDOUT: \n", stdout)

From 1d0497976f8fd223d55eb4abae28ad557d780b18 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Jan 2025 09:49:50 -0600
Subject: [PATCH 27/36] Update compute method

---
 qcengine/programs/gaussian.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 6d9774875..af60331dd 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -11,7 +11,7 @@
 
 from collections import defaultdict
 
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple, Union
 
 import cclib
 from cclib.method import Nuclear
@@ -19,7 +19,6 @@
 from qcelemental import constants
 from qcelemental.models import AtomicInput, AtomicResult, Molecule, Provenance
 from qcelemental.models import OptimizationInput, OptimizationResult, BasisSet
-from qcelemental.molparse import regex
 from qcelemental.util import parse_version, safe_version, which, which_import
 
 from ..exceptions import InputError, UnknownError
@@ -87,7 +86,9 @@ def found(self, raise_error: bool = False) -> bool:
 
     def get_version(self) -> str:
         self.found(raise_error=True)
-
+        
+        config = get_config()
+        
         which_prog = which("g16")
         if which_prog is None:
             which_prog = which('g09')
@@ -96,7 +97,7 @@ def get_version(self) -> str:
         
         return self.version_cache[which_prog]
 
-    def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> 'AtomicResult':
+    def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> Union[AtomicResult, FailedOperation]:
         """
         Run Gaussian program
         """
@@ -104,16 +105,16 @@ def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> 'AtomicRe
         # Check if Gaussian executable is found
         self.found(raise_error=True)
         
+        if isinstance(input_model.model.basis, BasisSet):
+            raise InputError("QCSchema BasisSet for model.basis not implemented. Use string basis name.")
+
         # Setup the job
         job_inputs = self.build_input(input_model, config)
         
-        # Run Gaussian
-        success, _exe = self.execute(job_inputs)
-
         stdin = job_inputs['infiles']['input.inp']
 
-        if isinstance(input_model.model.basis, BasisSet):
-            raise InputError("QCSchema BasisSet for model.basis not implemented. Use string basis name.")
+        # Run Gaussian
+        success, _exe = self.execute(job_inputs)
 
         # Determine whether the calculation succeeded
         if success:

From 62b0b9bebf9404257d1e17035f5eff5c34cb7040 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Jan 2025 11:19:53 -0600
Subject: [PATCH 28/36] Update build_input method

---
 qcengine/programs/gaussian.py | 79 +++++++++++++++++------------------
 1 file changed, 38 insertions(+), 41 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index af60331dd..8a6502d11 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -138,55 +138,52 @@ def build_input(
 
         if template is None:
             input_file = []
+            gaussian_kw = []
+            
             caseless_keywords = {k.lower(): v for k, v in input_model.keywords.items()}
 
-        # Build keywords
-        keywords = {k.upper(): v for k, v in input_model.keywords.items()}
+            if input_model.driver == "energy":
+                gaussian_kw.append("sp")
+            elif input_model.driver == "gradient":
+                gaussian_kw.append("force")
+            elif input_model.driver == "hessian":
+                gaussian_kw.append("freq")
+            else:
+                raise InputError(f"Driver {input_model.driver} not implemented for Gaussian.")
+
+            if 'scf_convergence' in caseless_keywords:
+                gaussian_kw.append('scf=' + caseless_keywords["scf_convergence"])
+            if 'population' in caseless_keywords:
+                gaussian_kw.append('pop=' + caseless_keywords['population'])
+
+            keywords = {'scf_damp': 'true',
+                        'scf_diis': 'false'}
+            
+            save_fchk = False
 
-        gaussian_kw = []
+            if input_model.protocols.native_files == 'all':
+                save_fchk = True
+                gaussian_kw.append('formcheck')
 
-        if input_model.driver == "energy":
-            gaussian_kw.append("sp")
-        elif input_model.driver == "gradient":
-            gaussian_kw.append("force")
-        elif input_model.driver == "hessian":
-            gaussian_kw.append("freq")
+            # Begin input file
+            input_file.append('%mem={}MB'.format(int(config.memory * 1024))) # In MB
+            input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
+            iput_file.append('write your comment here\n')
+
+            # Handle the geometry
+            molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True)
+            input_file.append(molcmd.lstrip())
+
+            #        print ('*' * 100)
+            #        print ('\n'.join(input_file))
+            #        print ('*' * 100)
         else:
-            raise InputError(f"Driver {input_model.driver} not implemented for Gaussian.")
-
-        #if input_model.molecule.fix_com or input_model.molecule.fix_orientation:
-        #    keywords["SYM_IGNORE"] = "TRUE"
-        if 'SCF_CONVERGENCE' in keywords:
-           gaussian_kw.append('SCF=' + keywords["SCF_CONVERGENCE"])
-        if 'POPULATION' in keywords:
-           gaussian_kw.append('Pop=' + keywords['POPULATION'])   
-           
-        keywords = {'scf_damp': 'true',
-                    'scf_diis': 'false'}
-
-        save_fchk = False
-
-        if input_model.protocols.native_files == 'all':
-            save_fchk = True
-            gaussian_kw.append('formcheck')
-
-        # Begin input file
-        input_file = []
-        input_file.append('%mem={}MB'.format(int(config.memory * 1024))) # In MB
-        input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
-        input_file.append('write your comment here\n')
-  
-        # Handle the geometry
-        molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True)
-        input_file.append(molcmd.lstrip())
-
-#        print ('*' * 100)
-#        print ('\n'.join(input_file))
-#        print ('*' * 100)
+            str_template = string.Template(template)
+            input_file = str_template.substitute()
 
         gaussian_ret = {
-            'infiles': {'input.inp': '\n'.join(input_file)},
             'commands': [which("g09"),  'input.inp', 'output.log'],
+            'infiles': {'input.inp': '\n'.join(input_file)},
             'scratch_directory': config.scratch_directory,
             'scratch_messy': config.scratch_messy
         }

From 3ccdc937635f03db0ca281032b4602b41493b162 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 29 Jan 2025 11:43:03 -0600
Subject: [PATCH 29/36] Update the execute method

---
 qcengine/programs/gaussian.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 8a6502d11..971e503dd 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -191,13 +191,12 @@ def build_input(
         return gaussian_ret
 
     def execute(self,
-                inputs,
-                extra_infiles: Optional[Dict[str, str]] = None,
-                extra_outfiles: Optional[Dict[str, str]] = None,
+                inputs: Dict[str, Any],
+                extra_outfiles: Optional[List[str]] = None,
                 extra_commands: Optional[List[str]] = None,
-                scratch_name = None,
+                scratch_name: Optional[str] = None,
                 timeout: Optional[int] = None
-               ):
+               ) -> Tuple[boll, Dict[str, Any]]:
 
         success, dexe = execute(
             inputs['commands'],

From 9c39b945341c04c169c1822c63f0fd99755b52b5 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Fri, 31 Jan 2025 08:06:11 -0600
Subject: [PATCH 30/36] Update the parse_output method

---
 qcengine/programs/gaussian.py | 41 +++++++++++++++--------------------
 1 file changed, 18 insertions(+), 23 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 971e503dd..e121662f9 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -211,13 +211,15 @@ def execute(self,
     def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') -> 'AtomicResult':
 
         tmp_output_path = outfiles['scratch_directory']
-        print ('tmp_output_path=',tmp_output_path)
+        #print ('tmp_output_path=',tmp_output_path)
         tmp_output_file = os.path.join(tmp_output_path, 'output.log')
-        print ('tmp_output_file=',tmp_output_file)
+        #print ('tmp_output_file=',tmp_output_file)
         data = cclib.io.ccread(tmp_output_file)
-        print ('DATA=', data)
+        #print ('DATA=', data)
         scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-        print ('SCF ENERGY: ', scf_energy)
+        #print ('SCF ENERGY: ', scf_energy)
+        grad = data.grads
+        grad_list = grad.ravel().tolist() # Stores gradient as a single list where the ordering is [a1, b1, c1, a2, b2, c2, ...]
         
         output_data = {
         	"schema_version": 1,
@@ -232,27 +234,19 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
             "success": True
         		}
 
-        #output_data = {}
-        #properties = {}
-        #cclib_vars = {}
+        if input_model.driver == 'energy':
+            output_data['return_result'] = scf_energy
+        if input_model.driver == 'gradient':
+            output_data['return_result'] = grad_list
         
         #tmp_output_path = outfiles['scratch_directory']
         #tmp_output_file = os.path.join(tmp_output_path, 'output.log')
         #data = cclib.io.ccread(tmp_output_file)
-        cclib_vars = data.getattributes(True)
+        #cclib_vars = data.getattributes(True)
         
         #last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
         
-        #scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
-        #output_data['SCF ENERGY'] = scf_energy
-        
-        #if input_model.driver == 'energy':
-        #    output_data['return_result'] = scf_energy
-        #if input_model.driver == 'gradient':
-        #    output_data['return_result'] = data.grads
-        #    #output_data['return_gradient'] = data.grads
-        
         #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing
 
         #if input_model.driver == 'energy':
@@ -265,11 +259,11 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         }
 
 
-        if input_model.model.method.lower() in ['hf', 'scf']:
-            scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
-            properties['scf_total_energy'] = scf_energy
-            properties['return_energy'] = scf_energy
-            output_data['return_result'] = scf_energy
+        #if input_model.model.method.lower() in ['hf', 'scf']:
+        #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
+        #    properties['scf_total_energy'] = scf_energy
+        #    properties['return_energy'] = scf_energy
+        #    output_data['return_result'] = scf_energy
         
         if input_model.model.method.lower().startswith('mp'):
             scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
@@ -290,11 +284,12 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         properties['calcinfo_nbasis'] = data.nbasis
         properties['calcinfo_nmo'] = data.nmo
         properties['calcinfo_natom'] = data.natom
+        properties['return_energy'] = scf_energy
         
         output_data['properties'] = properties
         #output_data['stdout'] = outfiles['outfiles']['output.log']
         #output_data['success'] = True
-        ##print ('output_data: ', output_data)
+        #print ('output_data: ', output_data)
 
         #stdout = outfiles.pop('stdout')
         #stderr = outfiles.pop('stderr')

From f9819c52e4ea64ee7b9a1e6a733d0e511cb62cce Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Mon, 24 Mar 2025 20:15:50 -0500
Subject: [PATCH 31/36] Create gaussian_opt.py

---
 .../procedures/gaussian_opt/gaussian_opt.py   | 112 ++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 qcengine/procedures/gaussian_opt/gaussian_opt.py

diff --git a/qcengine/procedures/gaussian_opt/gaussian_opt.py b/qcengine/procedures/gaussian_opt/gaussian_opt.py
new file mode 100644
index 000000000..248c80bec
--- /dev/null
+++ b/qcengine/procedures/gaussian_opt/gaussian_opt.py
@@ -0,0 +1,112 @@
+import re, os
+
+import cclib
+
+from typing import Union, Dict, Any, List, Tuple
+
+from qcelemental.models import (OptimizationInput, OptimizationResult,
+                                AtomicInput, AtomicResult,
+                                Molecule, Provenance)
+
+from qcengine.exceptions import UnknownError, InputError
+from qcengine.procedures.model import ProcedureHarness
+from qcengine.config import TaskConfig
+from qcengine.programs.gaussian import GaussianHarness
+from qcengine.programs.util import PreservingDict, error_stamp
+
+class GaussianDriverProcedure(ProcedureHarness):
+    '''
+    This makes a structure for geometry optimization in Gaussian.
+    '''
+    
+    _defaults = {'name': 'GaussianDriver', 'procedure': 'optimization'}
+    
+    class Config(ProcedureHarness.Config):
+        pass
+    
+    def found(self, raise_error: bool = False) -> bool:
+        ga_harness = GaussianHarness()
+        return ga_harness.found(raise_error)
+        
+    def get_version(self) -> str:
+        ga_harness = GaussianHarness()
+        return ga_harness.get_version()
+    
+    def build_input_model(self, input_data: Union[Dict[str, Any], 'OptimizationInput']) -> OptimizationInput:
+        return self._build_model(input_data, OptimizationInput)
+    
+    
+    def compute(self, input_data: OptimizationInput, config: TaskConfig) -> 'BaseModel':
+        ga_harness = GaussianHarness()
+        self.found(raise_error = True)
+        
+        keywords = input_data.keywords.copy()
+        keywords.update(input_data.input_specification.keywords)
+        
+        if keywords.get('program', 'gaussian').lower() != 'gaussian' :
+            raise InputError('GaussianDriver procedure only works with Gaussian software package.')
+        
+        input_data.input_specification.extras['is_driver'] = True
+        
+        # Make an atomic input
+        atomic_input = AtomicInput(
+            molecule = input_data.initial_molecule,
+            driver = 'energy',
+            keywords = keywords,
+            **input_data.input_specification.dict(exclude = {'driver', 'keywords'}),
+        )
+        
+        # Build an input
+        job_inputs = ga_harness.build_input(atomic_input, config, 'opt')
+        stdin = job_inputs['infiles']['input.inp']
+        #print ('STDIN:', stdin)
+                
+        # Run the input
+        success, _exe = ga_harness.execute(job_inputs)
+        
+        # Determine whether the calculation succeeded
+        if success:
+        	# If execution succeeded, collect results
+            return self.parse_opt_geom_output(_exe, atomic_input) 
+        else:
+            _exe['outfiles']['stderr'] = _exe['outfiles']['output.log']
+            outfile = _exe['outfiles']['output.log']
+            
+            if 'Error termination via ' in outfile:
+                raise InputError(error_stamp(stdin, outfile))
+         
+            else:
+                # Return UnknownError for error propagation
+                raise UnknownError(error_stamp(stdin, outfile))
+       
+    def parse_opt_geom_output(self, outfiles: Dict[str, str], input_model: 'OptimizationInput') -> 'OptimizationResult':
+
+        ga_harness = GaussianHarness()
+        
+        tmp_output_path = outfiles['scratch_directory']
+        #print ('tmp_output_path=',tmp_output_path)
+        tmp_output_file = os.path.join(tmp_output_path, 'output.log')
+        #print ('tmp_output_file=',tmp_output_file)
+        data = cclib.io.ccread(tmp_output_file)
+        #data = parser.parse()
+        print ('There are %i atoms and %i MO' %(data.natom, data.nmo))
+        print ('\nOptimized geometries at each step:\n')
+        print (data.atomcoords)
+       
+       # Get the stdout from the calculation (required)
+        stdout = outfiles.pop("stdout")
+        stderr = outfiles.pop("stderr")
+
+        return OptimizationResult(
+            initial_molecule = input_model.initial_molecule,
+            input_specification = input_model.input_specification,
+            final_molecule = '', #final_step.molecule,
+            trajectory = atomic_results,
+            energies = [float(r.extras["qcvars"]["CURRENT ENERGY"]) for r in atomic_results],
+            stdout = stdout,
+            stderr = stderr,
+            success = True,
+            #provenance = Provenance(creator= '', version = self.get_version(), routine = 'gaussian_opt'),
+        )
+    
+ 

From 9006ffdc96c6787027037ea93bb02cd9aec1b209 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Mon, 24 Mar 2025 20:16:42 -0500
Subject: [PATCH 32/36] Create __init__.py

---
 qcengine/procedures/gaussian_opt/__init__.py | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 qcengine/procedures/gaussian_opt/__init__.py

diff --git a/qcengine/procedures/gaussian_opt/__init__.py b/qcengine/procedures/gaussian_opt/__init__.py
new file mode 100644
index 000000000..138a3379a
--- /dev/null
+++ b/qcengine/procedures/gaussian_opt/__init__.py
@@ -0,0 +1 @@
+from .gaussian_opt import GaussianDriverProcedure

From 60a4270eef06d97bd18be92fff898e35dbada907 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Mon, 24 Mar 2025 22:03:20 -0500
Subject: [PATCH 33/36] Added GaussianDriverProcedure

---
 qcengine/procedures/base.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/qcengine/procedures/base.py b/qcengine/procedures/base.py
index 2e9979d4a..ee2985a53 100644
--- a/qcengine/procedures/base.py
+++ b/qcengine/procedures/base.py
@@ -11,6 +11,7 @@
 from .optking import OptKingProcedure
 from .torsiondrive import TorsionDriveProcedure
 from .model import ProcedureHarness
+from .gaussian_opt import GaussianDriverProcedure
 
 __all__ = ["register_procedure", "get_procedure", "list_all_procedures", "list_available_procedures"]
 
@@ -71,3 +72,4 @@ def list_available_procedures() -> Set[str]:
 register_procedure(BernyProcedure())
 register_procedure(NWChemDriverProcedure())
 register_procedure(TorsionDriveProcedure())
+register_procedure(GaussianDriverProcedure())

From e7829be3c63b20e672e5522a4dd63bf146e1fe3b Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Wed, 26 Mar 2025 12:44:39 -0500
Subject: [PATCH 34/36] Update gaussian.py

---
 qcengine/programs/gaussian.py | 40 ++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index e121662f9..52df777b6 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -99,9 +99,8 @@ def get_version(self) -> str:
 
     def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> Union[AtomicResult, FailedOperation]:
         """
-        Run Gaussian program
+        Runs Gaussian program and returns the output file if everything goes well. Otherwise, it raises an error.
         """
-        
         # Check if Gaussian executable is found
         self.found(raise_error=True)
         
@@ -118,6 +117,7 @@ def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> Union[Ato
 
         # Determine whether the calculation succeeded
         if success:
+            _exe['outfiles']['input'] = stdin
             # If execution succeeded, collect results
             return self.parse_output(_exe, input_model)
         else:
@@ -134,16 +134,18 @@ def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> Union[Ato
     def build_input(
             self, input_model: AtomicInput,
             config: TaskConfig,
+            keys: Optional[str] = None,
             template: Optional[str] = None) -> Dict[str, Any]:
 
         if template is None:
             input_file = []
             gaussian_kw = []
-            
+
+            # Build keywords
             caseless_keywords = {k.lower(): v for k, v in input_model.keywords.items()}
 
             if input_model.driver == "energy":
-                gaussian_kw.append("sp")
+                gaussian_kw.append(" ")
             elif input_model.driver == "gradient":
                 gaussian_kw.append("force")
             elif input_model.driver == "hessian":
@@ -174,18 +176,26 @@ def build_input(
             molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True)
             input_file.append(molcmd.lstrip())
 
-            #        print ('*' * 100)
-            #        print ('\n'.join(input_file))
-            #        print ('*' * 100)
+            #print ('*' * 100)
+            #print ('\n'.join(input_file))
+            #print ('*' * 100)
+                
         else:
             str_template = string.Template(template)
             input_file = str_template.substitute()
 
+        if keys is not None:
+            for i in range(len(input_file)):
+                if (input_file[i].startswith('#P ')):
+                    input_file[i] = '#P {}/{}'.format(input_model.model.method, input_model.model.basis) + \
+                          ' ' + ' '.join(gaussian_kw) + ' ' + keys + '\n'
+        
         gaussian_ret = {
             'commands': [which("g09"),  'input.inp', 'output.log'],
             'infiles': {'input.inp': '\n'.join(input_file)},
             'scratch_directory': config.scratch_directory,
-            'scratch_messy': config.scratch_messy
+            'scratch_messy': config.scratch_messy,
+            #"input_result": input_model.copy(deep=True),
         }
         
         return gaussian_ret
@@ -323,17 +333,3 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         merged_data = {**input_model.dict(), **output_data}
 
         return AtomicResult(**merged_data)
-
-class GaussianDriverProcedure(ProcedureHarness):
-    '''
-    Optimize a geometry
-    '''
-
-    _defaults = {'name': 'GaussianDriver', 'procedure': 'optimization'}
-
-    class Config(ProcedureHarness.Config):
-        pass
-
-    def found(self, raise_error: bool = False) -> bool:
-        gaussian_harness = GaussianHarness()
-        return gaussian_harness(raise_error)

From 6af8c3ceb158c82cea4a9ec59cd0383ce9e9eff1 Mon Sep 17 00:00:00 2001
From: Reza Hemmati <50717617+QuChem@users.noreply.github.com>
Date: Tue, 1 Apr 2025 10:05:39 -0500
Subject: [PATCH 35/36] Added some comments to methods

---
 qcengine/programs/gaussian.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index 52df777b6..f727f1308 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -219,6 +219,9 @@ def execute(self,
         return success, dexe
  
     def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') -> 'AtomicResult':
+        '''
+        Reads the output file and extracts the information from it.
+        '''
 
         tmp_output_path = outfiles['scratch_directory']
         #print ('tmp_output_path=',tmp_output_path)
@@ -228,31 +231,30 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         #print ('DATA=', data)
         scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit
         #print ('SCF ENERGY: ', scf_energy)
-        grad = data.grads
-        grad_list = grad.ravel().tolist() # Stores gradient as a single list where the ordering is [a1, b1, c1, a2, b2, c2, ...]
-        
+
         output_data = {
         	"schema_version": 1,
         	"molecule": input_model.molecule,
-        	'extras': input_model.extras,
+        	'extras': {**input_model.extras},
             "native_files": {k: v for k, v in outfiles.items() if v is not None},
             'properties': '',
-            'provenance': Provenance(creator="gaussian", version=self.get_version(), routine='g09').dict(),
-            'return_result': scf_energy,
+            'provenance': Provenance(creator="gaussian", version=self.get_version(), routine=self.get_version()).dict(),
             "stderr": outfiles['outfiles']['output.log'],,
             "stdout": outfiles['outfiles']['output.log'],
-            "success": True
+            "success": True,
         		}
 
         if input_model.driver == 'energy':
             output_data['return_result'] = scf_energy
         if input_model.driver == 'gradient':
+            grad = data.grads
+            grad_list = grad.ravel().tolist() # Stores gradient as a single list where the ordering is [a1, b1, c1, a2, b2, c2, ...]
             output_data['return_result'] = grad_list
         
         #tmp_output_path = outfiles['scratch_directory']
         #tmp_output_file = os.path.join(tmp_output_path, 'output.log')
         #data = cclib.io.ccread(tmp_output_file)
-        #cclib_vars = data.getattributes(True)
+        cclib_vars = data.getattributes(True)
         
         #last_occupied_energy = data.moenergies[0][data.homos[0]]
         #output_data['HOMO ENERGY'] = last_occupied_energy
@@ -265,10 +267,9 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         #print (input_model)
 
         properties = {
-            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy()
+            'nuclear_repulsion_energy': Nuclear(data).repulsion_energy() / constants.conversion_factor("hartree", "eV")
         }
 
-
         #if input_model.model.method.lower() in ['hf', 'scf']:
         #    scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV")
         #    properties['scf_total_energy'] = scf_energy
@@ -294,7 +295,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
         properties['calcinfo_nbasis'] = data.nbasis
         properties['calcinfo_nmo'] = data.nmo
         properties['calcinfo_natom'] = data.natom
-        properties['return_energy'] = scf_energy
+        #properties['return_energy'] = scf_energy
         
         output_data['properties'] = properties
         #output_data['stdout'] = outfiles['outfiles']['output.log']

From 4a7dca6b170e19ca41d190ff440fd268f4ebb11b Mon Sep 17 00:00:00 2001
From: Benjamin Pritchard <ben@bennyp.org>
Date: Thu, 1 May 2025 11:45:57 -0400
Subject: [PATCH 36/36] Cleanup & add additional_route

---
 qcengine/programs/gaussian.py | 37 +++++++++++++++--------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py
index f727f1308..5e7ff0e93 100644
--- a/qcengine/programs/gaussian.py
+++ b/qcengine/programs/gaussian.py
@@ -3,31 +3,22 @@
 """
 
 import os
-import re
-import tempfile
-import warnings
-import pprint
-import copy
-
-from collections import defaultdict
-
+import string
 from typing import Any, Dict, List, Optional, Tuple, Union
 
 import cclib
 from cclib.method import Nuclear
-
 from qcelemental import constants
-from qcelemental.models import AtomicInput, AtomicResult, Molecule, Provenance
-from qcelemental.models import OptimizationInput, OptimizationResult, BasisSet
-from qcelemental.util import parse_version, safe_version, which, which_import
+from qcelemental.models import AtomicInput, AtomicResult, Provenance, FailedOperation
+from qcelemental.models import BasisSet
+from qcelemental.util import safe_version, which, which_import
 
-from ..exceptions import InputError, UnknownError
-from ..util import disk_files, execute, temporary_directory
-from .util import error_stamp
+from qcengine.config import TaskConfig, get_config
 from .model import ProgramHarness
+from .util import error_stamp
+from ..exceptions import InputError, UnknownError
+from ..util import execute
 
-from qcengine.config import TaskConfig, get_config
-from qcengine.procedures.model import ProcedureHarness
 
 class GaussianHarness(ProgramHarness):
 
@@ -157,6 +148,10 @@ def build_input(
                 gaussian_kw.append('scf=' + caseless_keywords["scf_convergence"])
             if 'population' in caseless_keywords:
                 gaussian_kw.append('pop=' + caseless_keywords['population'])
+            if 'extra_route' in caseless_keywords:
+                extra_route = caseless_keywords['extra_route']
+            else:
+                extra_route = ''
 
             keywords = {'scf_damp': 'true',
                         'scf_diis': 'false'}
@@ -169,8 +164,8 @@ def build_input(
 
             # Begin input file
             input_file.append('%mem={}MB'.format(int(config.memory * 1024))) # In MB
-            input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + '\n')
-            iput_file.append('write your comment here\n')
+            input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + " " + extra_route + '\n')
+            input_file.append('write your comment here\n')
 
             # Handle the geometry
             molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True)
@@ -206,7 +201,7 @@ def execute(self,
                 extra_commands: Optional[List[str]] = None,
                 scratch_name: Optional[str] = None,
                 timeout: Optional[int] = None
-               ) -> Tuple[boll, Dict[str, Any]]:
+               ) -> Tuple[bool, Dict[str, Any]]:
 
         success, dexe = execute(
             inputs['commands'],
@@ -239,7 +234,7 @@ def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') ->
             "native_files": {k: v for k, v in outfiles.items() if v is not None},
             'properties': '',
             'provenance': Provenance(creator="gaussian", version=self.get_version(), routine=self.get_version()).dict(),
-            "stderr": outfiles['outfiles']['output.log'],,
+            "stderr": outfiles['outfiles']['output.log'],
             "stdout": outfiles['outfiles']['output.log'],
             "success": True,
         		}