diff --git a/qcengine/procedures/base.py b/qcengine/procedures/base.py index 2e9979d4a..ee2985a53 100644 --- a/qcengine/procedures/base.py +++ b/qcengine/procedures/base.py @@ -11,6 +11,7 @@ from .optking import OptKingProcedure from .torsiondrive import TorsionDriveProcedure from .model import ProcedureHarness +from .gaussian_opt import GaussianDriverProcedure __all__ = ["register_procedure", "get_procedure", "list_all_procedures", "list_available_procedures"] @@ -71,3 +72,4 @@ def list_available_procedures() -> Set[str]: register_procedure(BernyProcedure()) register_procedure(NWChemDriverProcedure()) register_procedure(TorsionDriveProcedure()) +register_procedure(GaussianDriverProcedure()) diff --git a/qcengine/procedures/gaussian_opt/__init__.py b/qcengine/procedures/gaussian_opt/__init__.py new file mode 100644 index 000000000..138a3379a --- /dev/null +++ b/qcengine/procedures/gaussian_opt/__init__.py @@ -0,0 +1 @@ +from .gaussian_opt import GaussianDriverProcedure diff --git a/qcengine/procedures/gaussian_opt/gaussian_opt.py b/qcengine/procedures/gaussian_opt/gaussian_opt.py new file mode 100644 index 000000000..248c80bec --- /dev/null +++ b/qcengine/procedures/gaussian_opt/gaussian_opt.py @@ -0,0 +1,112 @@ +import re, os + +import cclib + +from typing import Union, Dict, Any, List, Tuple + +from qcelemental.models import (OptimizationInput, OptimizationResult, + AtomicInput, AtomicResult, + Molecule, Provenance) + +from qcengine.exceptions import UnknownError, InputError +from qcengine.procedures.model import ProcedureHarness +from qcengine.config import TaskConfig +from qcengine.programs.gaussian import GaussianHarness +from qcengine.programs.util import PreservingDict, error_stamp + +class GaussianDriverProcedure(ProcedureHarness): + ''' + This makes a structure for geometry optimization in Gaussian. + ''' + + _defaults = {'name': 'GaussianDriver', 'procedure': 'optimization'} + + class Config(ProcedureHarness.Config): + pass + + def found(self, raise_error: bool = False) -> bool: + ga_harness = GaussianHarness() + return ga_harness.found(raise_error) + + def get_version(self) -> str: + ga_harness = GaussianHarness() + return ga_harness.get_version() + + def build_input_model(self, input_data: Union[Dict[str, Any], 'OptimizationInput']) -> OptimizationInput: + return self._build_model(input_data, OptimizationInput) + + + def compute(self, input_data: OptimizationInput, config: TaskConfig) -> 'BaseModel': + ga_harness = GaussianHarness() + self.found(raise_error = True) + + keywords = input_data.keywords.copy() + keywords.update(input_data.input_specification.keywords) + + if keywords.get('program', 'gaussian').lower() != 'gaussian' : + raise InputError('GaussianDriver procedure only works with Gaussian software package.') + + input_data.input_specification.extras['is_driver'] = True + + # Make an atomic input + atomic_input = AtomicInput( + molecule = input_data.initial_molecule, + driver = 'energy', + keywords = keywords, + **input_data.input_specification.dict(exclude = {'driver', 'keywords'}), + ) + + # Build an input + job_inputs = ga_harness.build_input(atomic_input, config, 'opt') + stdin = job_inputs['infiles']['input.inp'] + #print ('STDIN:', stdin) + + # Run the input + success, _exe = ga_harness.execute(job_inputs) + + # Determine whether the calculation succeeded + if success: + # If execution succeeded, collect results + return self.parse_opt_geom_output(_exe, atomic_input) + else: + _exe['outfiles']['stderr'] = _exe['outfiles']['output.log'] + outfile = _exe['outfiles']['output.log'] + + if 'Error termination via ' in outfile: + raise InputError(error_stamp(stdin, outfile)) + + else: + # Return UnknownError for error propagation + raise UnknownError(error_stamp(stdin, outfile)) + + def parse_opt_geom_output(self, outfiles: Dict[str, str], input_model: 'OptimizationInput') -> 'OptimizationResult': + + ga_harness = GaussianHarness() + + tmp_output_path = outfiles['scratch_directory'] + #print ('tmp_output_path=',tmp_output_path) + tmp_output_file = os.path.join(tmp_output_path, 'output.log') + #print ('tmp_output_file=',tmp_output_file) + data = cclib.io.ccread(tmp_output_file) + #data = parser.parse() + print ('There are %i atoms and %i MO' %(data.natom, data.nmo)) + print ('\nOptimized geometries at each step:\n') + print (data.atomcoords) + + # Get the stdout from the calculation (required) + stdout = outfiles.pop("stdout") + stderr = outfiles.pop("stderr") + + return OptimizationResult( + initial_molecule = input_model.initial_molecule, + input_specification = input_model.input_specification, + final_molecule = '', #final_step.molecule, + trajectory = atomic_results, + energies = [float(r.extras["qcvars"]["CURRENT ENERGY"]) for r in atomic_results], + stdout = stdout, + stderr = stderr, + success = True, + #provenance = Provenance(creator= '', version = self.get_version(), routine = 'gaussian_opt'), + ) + + diff --git a/qcengine/programs/base.py b/qcengine/programs/base.py index 8d9cfc9ff..ebc3f4237 100644 --- a/qcengine/programs/base.py +++ b/qcengine/programs/base.py @@ -28,6 +28,7 @@ from .torchani import TorchANIHarness from .turbomole import TurbomoleHarness from .xtb import XTBHarness +from .gaussian import GaussianHarness __all__ = ["register_program", "get_program", "list_all_programs", "list_available_programs"] @@ -118,6 +119,7 @@ def list_available_programs() -> Set[str]: register_program(TurbomoleHarness()) register_program(TeraChemFrontEndHarness()) register_program(TeraChemPBSHarness()) +register_program(GaussianHarness()) # Semi-empirical register_program(MopacHarness()) diff --git a/qcengine/programs/gaussian.py b/qcengine/programs/gaussian.py new file mode 100644 index 000000000..5e7ff0e93 --- /dev/null +++ b/qcengine/programs/gaussian.py @@ -0,0 +1,331 @@ +""" +Calls the GAUSSIAN executable +""" + +import os +import string +from typing import Any, Dict, List, Optional, Tuple, Union + +import cclib +from cclib.method import Nuclear +from qcelemental import constants +from qcelemental.models import AtomicInput, AtomicResult, Provenance, FailedOperation +from qcelemental.models import BasisSet +from qcelemental.util import safe_version, which, which_import + +from qcengine.config import TaskConfig, get_config +from .model import ProgramHarness +from .util import error_stamp +from ..exceptions import InputError, UnknownError +from ..util import execute + + +class GaussianHarness(ProgramHarness): + + _defaults = { + "name": "gaussian", + "scratch": True, + "thread_safe": False, + "thread_parallel": True, + "node_parallel": False, + "managed_memory": True, + } + version_cache: Dict[str, str] = {} + + class Config(ProgramHarness.Config): + pass + + def found(self, raise_error: bool = False) -> bool: + ''' + It checks to see if GaussianHarness is ready for operation + + Parameters + ---------- + raise_error: bool + Passed on to control negative return between False and ModuleNotFoundError raised. + + Returns + ------- + bool + If both gaussian and its dependency cclib are found, returns True + If gaussian or cclib are missing, returns False. + If raise_error is True and gaussian or cclib are missing, the error message for the missing one is raised. + ''' + qc = which( + "g16", + return_bool = True, + raise_error = False, + raise_msg = "Please install Gaussian. Check it's in your PATH with `which g16`." + ) + + if not qc: + qc = which( + "g09", + return_bool = True, + raise_error = raise_error, + raise_msg = "Please install Gaussian. Check it's in your PATH with `which g09` or `which g16`." + ) + + dep = which_import( + "cclib", + return_bool = True, + raise_error = raise_error, + raise_msg = "For gaussian harness, please install cclib by typing in `conda install -c conda-forge cclib`." + ) + + return qc & dep + + def get_version(self) -> str: + self.found(raise_error=True) + + config = get_config() + + which_prog = which("g16") + if which_prog is None: + which_prog = which('g09') + + self.version_cache[which_prog] = safe_version(which_prog.split('/')[-1]) + + return self.version_cache[which_prog] + + def compute(self, input_model: 'AtomicInput', config: 'TaskConfig') -> Union[AtomicResult, FailedOperation]: + """ + Runs Gaussian program and returns the output file if everything goes well. Otherwise, it raises an error. + """ + # Check if Gaussian executable is found + self.found(raise_error=True) + + if isinstance(input_model.model.basis, BasisSet): + raise InputError("QCSchema BasisSet for model.basis not implemented. Use string basis name.") + + # Setup the job + job_inputs = self.build_input(input_model, config) + + stdin = job_inputs['infiles']['input.inp'] + + # Run Gaussian + success, _exe = self.execute(job_inputs) + + # Determine whether the calculation succeeded + if success: + _exe['outfiles']['input'] = stdin + # If execution succeeded, collect results + return self.parse_output(_exe, input_model) + else: + _exe['outfiles']['stderr'] = _exe['outfiles']['output.log'] + outfile = _exe['outfiles']['output.log'] + + if 'Error termination via ' in outfile: + raise InputError(error_stamp(stdin, outfile)) + + else: + # Return UnknownError for error propagation + raise UnknownError(error_stamp(stdin, outfile)) + + def build_input( + self, input_model: AtomicInput, + config: TaskConfig, + keys: Optional[str] = None, + template: Optional[str] = None) -> Dict[str, Any]: + + if template is None: + input_file = [] + gaussian_kw = [] + + # Build keywords + caseless_keywords = {k.lower(): v for k, v in input_model.keywords.items()} + + if input_model.driver == "energy": + gaussian_kw.append(" ") + elif input_model.driver == "gradient": + gaussian_kw.append("force") + elif input_model.driver == "hessian": + gaussian_kw.append("freq") + else: + raise InputError(f"Driver {input_model.driver} not implemented for Gaussian.") + + if 'scf_convergence' in caseless_keywords: + gaussian_kw.append('scf=' + caseless_keywords["scf_convergence"]) + if 'population' in caseless_keywords: + gaussian_kw.append('pop=' + caseless_keywords['population']) + if 'extra_route' in caseless_keywords: + extra_route = caseless_keywords['extra_route'] + else: + extra_route = '' + + keywords = {'scf_damp': 'true', + 'scf_diis': 'false'} + + save_fchk = False + + if input_model.protocols.native_files == 'all': + save_fchk = True + gaussian_kw.append('formcheck') + + # Begin input file + input_file.append('%mem={}MB'.format(int(config.memory * 1024))) # In MB + input_file.append('#P {}/{}'.format(input_model.model.method, input_model.model.basis) + ' ' + ' '.join(gaussian_kw) + " " + extra_route + '\n') + input_file.append('write your comment here\n') + + # Handle the geometry + molcmd, moldata = input_model.molecule.to_string(dtype = 'gaussian', units = 'Angstrom', return_data = True) + input_file.append(molcmd.lstrip()) + + #print ('*' * 100) + #print ('\n'.join(input_file)) + #print ('*' * 100) + + else: + str_template = string.Template(template) + input_file = str_template.substitute() + + if keys is not None: + for i in range(len(input_file)): + if (input_file[i].startswith('#P ')): + input_file[i] = '#P {}/{}'.format(input_model.model.method, input_model.model.basis) + \ + ' ' + ' '.join(gaussian_kw) + ' ' + keys + '\n' + + gaussian_ret = { + 'commands': [which("g09"), 'input.inp', 'output.log'], + 'infiles': {'input.inp': '\n'.join(input_file)}, + 'scratch_directory': config.scratch_directory, + 'scratch_messy': config.scratch_messy, + #"input_result": input_model.copy(deep=True), + } + + return gaussian_ret + + def execute(self, + inputs: Dict[str, Any], + extra_outfiles: Optional[List[str]] = None, + extra_commands: Optional[List[str]] = None, + scratch_name: Optional[str] = None, + timeout: Optional[int] = None + ) -> Tuple[bool, Dict[str, Any]]: + + success, dexe = execute( + inputs['commands'], + inputs['infiles'], + outfiles = ['output.log'], + scratch_directory = inputs['scratch_directory'], + scratch_messy = True #inputs['scratch_messy'] + ) + + return success, dexe + + def parse_output(self, outfiles: Dict[str, str], input_model: 'AtomicInput') -> 'AtomicResult': + ''' + Reads the output file and extracts the information from it. + ''' + + tmp_output_path = outfiles['scratch_directory'] + #print ('tmp_output_path=',tmp_output_path) + tmp_output_file = os.path.join(tmp_output_path, 'output.log') + #print ('tmp_output_file=',tmp_output_file) + data = cclib.io.ccread(tmp_output_file) + #print ('DATA=', data) + scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit + #print ('SCF ENERGY: ', scf_energy) + + output_data = { + "schema_version": 1, + "molecule": input_model.molecule, + 'extras': {**input_model.extras}, + "native_files": {k: v for k, v in outfiles.items() if v is not None}, + 'properties': '', + 'provenance': Provenance(creator="gaussian", version=self.get_version(), routine=self.get_version()).dict(), + "stderr": outfiles['outfiles']['output.log'], + "stdout": outfiles['outfiles']['output.log'], + "success": True, + } + + if input_model.driver == 'energy': + output_data['return_result'] = scf_energy + if input_model.driver == 'gradient': + grad = data.grads + grad_list = grad.ravel().tolist() # Stores gradient as a single list where the ordering is [a1, b1, c1, a2, b2, c2, ...] + output_data['return_result'] = grad_list + + #tmp_output_path = outfiles['scratch_directory'] + #tmp_output_file = os.path.join(tmp_output_path, 'output.log') + #data = cclib.io.ccread(tmp_output_file) + cclib_vars = data.getattributes(True) + + #last_occupied_energy = data.moenergies[0][data.homos[0]] + #output_data['HOMO ENERGY'] = last_occupied_energy + + #print (os.system('ccget --list ' + tmp_output_file)) #data available in the output for parsing + + #if input_model.driver == 'energy': + # print (cclib.__version__) + # print (output_data) + #print (input_model) + + properties = { + 'nuclear_repulsion_energy': Nuclear(data).repulsion_energy() / constants.conversion_factor("hartree", "eV") + } + + #if input_model.model.method.lower() in ['hf', 'scf']: + # scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") + # properties['scf_total_energy'] = scf_energy + # properties['return_energy'] = scf_energy + # output_data['return_result'] = scf_energy + + if input_model.model.method.lower().startswith('mp'): + scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") # Change from the eV unit to the Hartree unit + mp2_energy = data.mpenergies[0] / constants.conversion_factor("hartree", "eV") + properties['scf_total_energy'] = scf_energy + properties['mp2_total_energy'] = mp2_energy[0] + properties['return_energy'] = mp2_energy[0] + output_data['return_result'] = mp2_energy[0] + + if input_model.model.method.lower().startswith('cc'): + scf_energy = data.scfenergies[0] / constants.conversion_factor("hartree", "eV") + cc_energy = data.ccenergies[0] / constants.conversion_factor("hartree", "eV") + properties['scf_total_energy'] = scf_energy + properties['ccsd_prt_pr_total_energy'] = cc_energy + properties['return_energy'] = cc_energy + output_data['return_result'] = cc_energy + + properties['calcinfo_nbasis'] = data.nbasis + properties['calcinfo_nmo'] = data.nmo + properties['calcinfo_natom'] = data.natom + #properties['return_energy'] = scf_energy + + output_data['properties'] = properties + #output_data['stdout'] = outfiles['outfiles']['output.log'] + #output_data['success'] = True + #print ('output_data: ', output_data) + + #stdout = outfiles.pop('stdout') + #stderr = outfiles.pop('stderr') + #print("\nPRINT STDOUT: \n", stdout) + + + #method = input_model.model.method.lower() + #method = method[4:] if method.startswith("") else method + + # filter unwanted data + #to_remove = ['atomnos', 'atomcoords', 'natom'] + #output_data['extras'] = {'cclib': {k:v for k, v in cclib_vars.items() if k not in to_remove}} + + # HACK - scf_values can have NaN + # remove for now + #output_data['extras']['cclib'].pop('scfvalues') + + if (input_model.protocols.native_files == 'all'): + output_data['native_files'] = {} + tmp_input_file = os.path.join(tmp_output_path, 'input.inp') + + with open(tmp_input_file, 'rt') as f: + output_data['native_files']['input.inp'] = f.read() + + # formcheck keyword always creates the Test.FChk file + tmp_check_file = os.path.join(tmp_output_path, 'Test.FChk') + + with open(tmp_check_file, 'rt') as f: + output_data['native_files']['gaussian.fchk'] = f.read() + + merged_data = {**input_model.dict(), **output_data} + + return AtomicResult(**merged_data)