ORCA Optimisation Fixes (#812)

calvinp0 · web-flow · commit 6fcd667df203 · 2026-01-04T12:02:50.000+02:00
When ORCA is used for optimisation, we attempt to get the scaling
factor, however the parser was broken. It caused the scaling factor to
return 0.002 at wb97x-d3. This PR should rectify it now to be:
```
FREQ: A PROGRAM FOR OPTIMIZING SCALE FACTORS (Version 1)
                 written by                 
Haoyu S. Yu, Lucas J. Fiedler, I.M. Alecu, and Donald G. Truhlar
Department of Chemistry and Supercomputing Institute
University of Minnesota, Minnesota 55455-0431
CITATIONS:
1. I.M., Alecu, J. Zheng, Y. Zhao, D.G. Truhlar, J. Chem. Theory Comput. 2010, 6, 9, 2872-2887,
   DOI: 10.1021/ct100326h
2. H.S. Yu, L.J. Fiedler, I.M. Alecu,, D.G. Truhlar, Computer Physics Communications 2017, 210, 132-138,
   DOI: 10.1016/j.cpc.2016.09.004



Level of theory: wb97xd3/def2tzvp
Scale Factor for Zero-Point Energies     = 0.973
Scale Factor for Harmonic Frequencies    = 0.987
Scale Factor for Fundamental Frequencies = 0.948
(execution time: 00:11:35)



You may copy-paste the computed harmonic frequency scaling factor(s) to ARC (under the `freq_dict` in ARC/data/freq_scale_factors.yml):
  'wb97xd3/def2tzvp': 0.987,  # [4]
```

Also, adjusted the optimisation keywords as they appear to be out of
date.
diff --git a/arc/job/adapters/orca.py b/arc/job/adapters/orca.py
@@ -32,6 +32,50 @@
 
 logger = get_logger()
 
+ORCA_METHOD_ALIASES = {
+    'wb97xd3': 'wb97x-d3',
+}
+
+
+def _format_orca_method(method: str) -> str:
+    """
+    Convert ARC method names to ORCA-friendly labels when needed.
+    """
+    if not method:
+        return method
+    if method.lower() == 'wb97xd':
+        logger.warning('ORCA does not support wb97xd; use wb97x or wb97x-d3.')
+    return ORCA_METHOD_ALIASES.get(method.lower(), method)
+
+
+def _format_orca_basis_token(token: str) -> str:
+    """
+    Convert def2 basis tokens to ORCA formatting (e.g., def2tzvp -> def2-tzvp).
+    """
+    if not token:
+        return token
+    parts = token.split('/')
+    base = parts[0]
+    if base.lower().startswith('def2'):
+        base_rest = base[4:]
+        if base_rest.startswith('-'):
+            base_rest = base_rest[1:]
+        if base_rest:
+            base = f"def2-{base_rest.lower()}"
+    if len(parts) > 1:
+        parts = [base] + [part.lower() for part in parts[1:]]
+        return '/'.join(parts)
+    return base
+
+
+def _format_orca_basis(basis: str) -> str:
+    """
+    Convert basis strings to ORCA-friendly labels where applicable.
+    """
+    if not basis:
+        return basis
+    return ' '.join(_format_orca_basis_token(token) for token in basis.split())
+
 default_job_settings, global_ess_settings, input_filenames, output_filenames, servers, submit_filenames = \
     settings['default_job_settings'], settings['global_ess_settings'], settings['input_filenames'], \
     settings['output_filenames'], settings['servers'], settings['submit_filenames']
@@ -219,13 +263,13 @@ def write_input_file(self) -> None:
                     'keywords',
                     ]:
             input_dict[key] = ''
-        input_dict['auxiliary_basis'] = self.level.auxiliary_basis or ''
-        input_dict['basis'] = self.level.basis or ''
+        input_dict['auxiliary_basis'] = _format_orca_basis(self.level.auxiliary_basis or '')
+        input_dict['basis'] = _format_orca_basis(self.level.basis or '')
         input_dict['charge'] = self.charge
         input_dict['cpus'] = self.cpu_cores
         input_dict['label'] = self.species_label
         input_dict['memory'] = self.input_file_memory
-        input_dict['method'] = self.level.method
+        input_dict['method'] = _format_orca_method(self.level.method)
         input_dict['multiplicity'] = self.multiplicity
         input_dict['xyz'] = xyz_to_str(self.xyz)
 
@@ -241,9 +285,9 @@ def write_input_file(self) -> None:
             input_dict['method_class'] = 'KS'
             # DFT grid must be the same for both opt and freq
             if self.fine:
-                self.add_to_args(val='Grid6 NoFinalGrid', key1='keyword')
+                self.add_to_args(val='defgrid3', key1='keyword')
             else:
-                self.add_to_args(val='Grid5 NoFinalGrid', key1='keyword')
+                self.add_to_args(val='defgrid2', key1='keyword')
         elif self.level.method_type == 'wavefunction':
             input_dict['method_class'] = 'HF'
             if 'dlpno' in self.level.method:
diff --git a/arc/job/adapters/orca_test.py b/arc/job/adapters/orca_test.py
@@ -12,7 +12,11 @@
 import unittest
 
 from arc.common import ARC_PATH
-from arc.job.adapters.orca import OrcaAdapter
+from arc.job.adapters.orca import (OrcaAdapter,
+                                   _format_orca_basis,
+                                   _format_orca_basis_token,
+                                   _format_orca_method,
+                                   )
 from arc.level import Level
 from arc.settings.settings import input_filenames, output_filenames
 from arc.species import ARCSpecies
@@ -173,6 +177,28 @@ def test_write_input_file_with_CPCM_solvation(self):
 """
         self.assertEqual(content_3, job_3_expected_input_file)
 
+    def test_format_orca_method(self):
+        """Test ORCA method formatting helper."""
+        self.assertEqual(_format_orca_method('wb97xd3'), 'wb97x-d3')
+        self.assertEqual(_format_orca_method('wb97xd'), 'wb97xd')
+        self.assertEqual(_format_orca_method('B3LYP'), 'B3LYP')
+
+    def test_format_orca_basis_token(self):
+        """Test ORCA basis token formatting helper."""
+        self.assertEqual(_format_orca_basis_token('def2tzvp'), 'def2-tzvp')
+        self.assertEqual(_format_orca_basis_token('def2-TZVP'), 'def2-tzvp')
+        self.assertEqual(_format_orca_basis_token('def2tzvp/c'), 'def2-tzvp/c')
+        self.assertEqual(_format_orca_basis_token('def2-TZVP/C'), 'def2-tzvp/c')
+        self.assertEqual(_format_orca_basis_token('cc-pvtz'), 'cc-pvtz')
+
+    def test_format_orca_basis(self):
+        """Test ORCA basis formatting helper."""
+        self.assertEqual(_format_orca_basis('def2tzvp'), 'def2-tzvp')
+        self.assertEqual(_format_orca_basis('def2-TZVP'), 'def2-tzvp')
+        self.assertEqual(_format_orca_basis('def2tzvp/c'), 'def2-tzvp/c')
+        self.assertEqual(_format_orca_basis('def2tzvp def2tzvp/c'),
+                         'def2-tzvp def2-tzvp/c')
+
     def test_set_files(self):
         """Test setting files"""
         job_1_files_to_upload = [{'file_name': 'submit.sub',
diff --git a/arc/parser/adapters/orca.py b/arc/parser/adapters/orca.py
@@ -224,7 +224,11 @@ def parse_zpe_correction(self) -> Optional[float]:
                 if 'Zero point energy' in line:
                     # Example: Zero point energy      ...    0.025410 Eh
                     try:
-                        zpe = float(line.split()[-2])
+                        parts = line.split()
+                        if 'Eh' in parts:
+                            zpe = float(parts[parts.index('Eh') - 1])
+                        else:
+                            zpe = float(parts[-2])
                         break
                     except (ValueError, IndexError):
                         continue