Skip to content

Commit 43d59cf

Browse files
authored
Hydrolysis families and tests (#770)
new hydrolysis families and tests
2 parents 6fcd667 + 38d792b commit 43d59cf

19 files changed

+2316
-324
lines changed

arc/family/arc_families_test.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#!/usr/bin/env python3
2+
# encoding: utf-8
3+
4+
"""
5+
This module contains unit tests for the kinetic families defined under arc.data.families.
6+
"""
7+
8+
import unittest
9+
import os
10+
11+
from arc.family.family import ReactionFamily, get_reaction_family_products, get_recipe_actions
12+
from arc.imports import settings
13+
from arc.reaction.reaction import ARCReaction
14+
from arc.species.species import ARCSpecies
15+
16+
ARC_FAMILIES_PATH = settings['ARC_FAMILIES_PATH']
17+
18+
19+
class TestCarbonylBasedHydrolysisReactionFamily(unittest.TestCase):
20+
"""
21+
Contains unit tests for the carbonyl-based hydrolysis reaction family.
22+
"""
23+
24+
@classmethod
25+
def setUpClass(cls):
26+
"""Set up the test by defining the carbonyl-based hydrolysis reaction family."""
27+
cls.family = ReactionFamily('carbonyl_based_hydrolysis')
28+
29+
def test_carbonyl_based_hydrolysis_reaction(self):
30+
"""Test if carbonyl_based hydrolysis products are correctly generated."""
31+
carbonyl = ARCSpecies(label='carbonyl', smiles='CC(=O)OC')
32+
water = ARCSpecies(label='H2O', smiles='O')
33+
acid = ARCSpecies(label='acid', smiles='CC(=O)O')
34+
alcohol = ARCSpecies(label='alcohol', smiles='CO')
35+
rxn = ARCReaction(r_species=[carbonyl, water], p_species=[acid, alcohol])
36+
products = get_reaction_family_products(rxn)
37+
product_smiles = [p.to_smiles() for p in products[0]['products']]
38+
expected_product_smiles = ['CC(=O)O', 'CO']
39+
self.assertEqual(product_smiles, expected_product_smiles)
40+
41+
def test_recipe_actions(self):
42+
"""Test if the reaction recipe is applied correctly."""
43+
groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'carbonyl_based_hydrolysis.py')
44+
with open(groups_file_path, 'r') as f:
45+
groups_as_lines = f.readlines()
46+
actions = get_recipe_actions(groups_as_lines)
47+
expected_actions = [
48+
['BREAK_BOND', '*1', 1, '*2'],
49+
['BREAK_BOND', '*3', 1, '*4'],
50+
['FORM_BOND', '*1', 1, '*4'],
51+
['FORM_BOND', '*2', 1, '*3'],
52+
]
53+
self.assertEqual(actions, expected_actions)
54+
55+
def test_carbonyl_based_hydrolysis_withP(self):
56+
"""Test if carbonyl-based hydrolysis products are correctly generated."""
57+
carbonyl= ARCSpecies(label='carbonyl', smiles='CP(=O)(OC)O')
58+
water = ARCSpecies(label='H2O', smiles='O')
59+
acid = ARCSpecies(label='acid', smiles='CP(=O)(O)O')
60+
alcohol = ARCSpecies(label='alcohol', smiles='CO')
61+
rxn = ARCReaction(r_species=[carbonyl, water], p_species=[acid, alcohol])
62+
products = get_reaction_family_products(rxn)
63+
product_smiles = [p.to_smiles() for p in products[0]['products']]
64+
expected_product_smiles = ['CP(=O)(O)O', 'CO']
65+
self.assertEqual(product_smiles, expected_product_smiles)
66+
67+
68+
class TestNitrileHydrolysisReactionFamily(unittest.TestCase):
69+
"""
70+
Contains unit tests for the nitrile hydrolysis reaction family.
71+
"""
72+
73+
@classmethod
74+
def setUpClass(cls):
75+
"""Set up the test by defining the nitrile hydrolysis reaction family."""
76+
cls.family = ReactionFamily('nitrile_hydrolysis')
77+
78+
def test_nitrile_hydrolysis_reaction(self):
79+
"""Test if nitrile hydrolysis products are correctly generated."""
80+
nitrile = ARCSpecies(label='nitrile', smiles='CC#N')
81+
water = ARCSpecies(label='H2O', smiles='O')
82+
acid = ARCSpecies(label='acid', smiles='CC(=N)O')
83+
rxn = ARCReaction(r_species=[nitrile, water], p_species=[acid])
84+
products = get_reaction_family_products(rxn)
85+
product_smiles = [p.to_smiles() for p in products[0]['products']]
86+
expected_product_smiles = ['CC(=N)O']
87+
self.assertEqual(product_smiles, expected_product_smiles)
88+
89+
def test_recipe_actions(self):
90+
"""Test if the reaction recipe is applied correctly for nitrile hydrolysis."""
91+
groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'nitrile_hydrolysis.py')
92+
with open(groups_file_path, 'r') as f:
93+
groups_as_lines = f.readlines()
94+
actions = get_recipe_actions(groups_as_lines)
95+
expected_actions =[
96+
['CHANGE_BOND', '*1', -1, '*2'],
97+
['BREAK_BOND', '*3', 1, '*4'],
98+
['FORM_BOND', '*1', 1, '*4'],
99+
['FORM_BOND', '*2', 1, '*3'],
100+
]
101+
self.assertEqual(actions, expected_actions)
102+
103+
104+
class TestEtherHydrolysisReactionFamily(unittest.TestCase):
105+
"""
106+
Contains unit tests for the ether hydrolysis reaction family.
107+
"""
108+
109+
@classmethod
110+
def setUpClass(cls):
111+
"""Set up the test by defining the ether hydrolysis reaction family."""
112+
cls.family = ReactionFamily('ether_hydrolysis')
113+
114+
def test_ether_hydrolysis_reaction(self):
115+
"""Test if ether hydrolysis products are correctly generated."""
116+
ether = ARCSpecies(label='ether', smiles='CCOC')
117+
water = ARCSpecies(label='H2O', smiles='O')
118+
alcohol1 = ARCSpecies(label='alcohol1', smiles='CCO')
119+
alcohol2 = ARCSpecies(label='alcohol2', smiles='CO')
120+
rxn = ARCReaction(r_species=[ether, water], p_species=[alcohol1, alcohol2])
121+
products = get_reaction_family_products(rxn)
122+
product_smiles = [p.to_smiles() for p in products[0]['products']]
123+
expected_product_smiles = ['CCO', 'CO']
124+
self.assertEqual(product_smiles, expected_product_smiles)
125+
126+
def test_recipe_actions(self):
127+
"""Test if the reaction recipe is applied correctly."""
128+
groups_file_path = os.path.join(ARC_FAMILIES_PATH, 'ether_hydrolysis.py')
129+
with open(groups_file_path, 'r') as f:
130+
groups_as_lines = f.readlines()
131+
actions = get_recipe_actions(groups_as_lines)
132+
expected_actions = [
133+
['BREAK_BOND', '*1', 1, '*2'],
134+
['BREAK_BOND', '*3', 1, '*4'],
135+
['FORM_BOND', '*1', 1, '*4'],
136+
['FORM_BOND', '*2', 1, '*3'],
137+
]
138+
self.assertEqual(actions, expected_actions)
139+
140+
141+
if __name__ == '__main__':
142+
unittest.main()

arc/family/family.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -588,11 +588,12 @@ def get_all_families(rmg_family_set: Union[List[str], str] = 'default',
588588
rmg_families.extend(list(families))
589589
else:
590590
rmg_families = list(family_sets[rmg_family_set]) \
591-
if isinstance(rmg_family_set, str) and rmg_family_set in family_sets else rmg_family_set
591+
if isinstance(rmg_family_set, str) and rmg_family_set in family_sets else [rmg_family_set]
592592
if consider_arc_families:
593-
arc_families = [os.path.splitext(family)[0] for family in os.listdir(ARC_FAMILIES_PATH)]
594-
rmg_families = [rmg_families] if isinstance(rmg_families, str) else rmg_families
595-
arc_families = [arc_families] if isinstance(arc_families, str) else arc_families
593+
for family in os.listdir(ARC_FAMILIES_PATH):
594+
if family.startswith('.') or family.startswith('_'):
595+
continue
596+
arc_families.append(os.path.splitext(family)[0])
596597
return rmg_families + arc_families if rmg_families is not None else arc_families
597598

598599

@@ -862,3 +863,18 @@ def isomorphic_products(rxn: 'ARCReaction',
862863
"""
863864
p_species = rxn.get_reactants_and_products(return_copies=True)[1]
864865
return check_product_isomorphism(products, p_species)
866+
867+
def check_family_name(family: str
868+
) -> bool:
869+
"""
870+
Check whether the family name is defined.
871+
872+
Args:
873+
family (str): The family name.
874+
875+
Returns:
876+
bool: Whether the family is defined.
877+
"""
878+
if not isinstance(family, str) and family is not None:
879+
raise TypeError("Family name must be a string or None.")
880+
return family in get_all_families() or family is None

arc/family/family_test.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
get_rmg_recommended_family_sets,
2929
is_own_reverse,
3030
is_reversible,
31+
check_family_name
3132
)
3233
from arc.molecule import Group, Molecule
3334
from arc.molecule.resonance import generate_resonance_structures_safely
@@ -701,7 +702,9 @@ def test_get_all_families(self):
701702
self.assertIn('intra_OH_migration', families)
702703
families = get_all_families(consider_rmg_families=False)
703704
self.assertIsInstance(families, list)
704-
self.assertIn('hydrolysis', families)
705+
self.assertIn('carbonyl_based_hydrolysis', families)
706+
self.assertIn('ether_hydrolysis', families)
707+
self.assertIn('nitrile_hydrolysis', families)
705708
families = get_all_families(rmg_family_set=['H_Abstraction'])
706709
self.assertEqual(families, ['H_Abstraction'])
707710

@@ -1059,6 +1062,16 @@ def test_get_isomorphic_subgraph(self):
10591062
)
10601063
self.assertEqual(isomorphic_subgraph, {0: '*3', 4: '*1', 7: '*2'})
10611064

1065+
def test_check_family_name(self):
1066+
"""Test check family name function"""
1067+
self.assertTrue(check_family_name('H_Abstraction'))
1068+
self.assertTrue(check_family_name('ether_hydrolysis'))
1069+
self.assertFalse(check_family_name('etherhydrolysis'))
1070+
self.assertFalse(check_family_name('amine_hydrolysis'))
1071+
self.assertTrue(check_family_name(None))
1072+
with self.assertRaises(TypeError):
1073+
check_family_name(123)
1074+
10621075

10631076
if __name__ == '__main__':
10641077
unittest.main(testRunner=unittest.TextTestRunner(verbosity=2))

arc/job/adapters/common.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
default_job_settings, global_ess_settings, rotor_scan_resolution = \
2727
settings['default_job_settings'], settings['global_ess_settings'], settings['rotor_scan_resolution']
2828

29-
3029
ts_adapters_by_rmg_family = {'1+2_Cycloaddition': ['kinbot'],
3130
'1,2_Insertion_CO': ['kinbot'],
3231
'1,2_Insertion_carbene': ['kinbot'],
@@ -43,6 +42,9 @@
4342
'Cyclopentadiene_scission': ['gcn', 'xtb_gsm'],
4443
'Diels_alder_addition': ['kinbot'],
4544
'H_Abstraction': ['heuristics', 'autotst'],
45+
'carbonyl_based_hydrolysis': ['heuristics'],
46+
'ether_hydrolysis': ['heuristics'],
47+
'nitrile_hydrolysis': ['heuristics'],
4648
'HO2_Elimination_from_PeroxyRadical': ['kinbot'],
4749
'Intra_2+2_cycloaddition_Cd': ['gcn', 'xtb_gsm'],
4850
'Intra_5_membered_conjugated_C=C_C=C_addition': ['gcn', 'xtb_gsm'],
@@ -117,7 +119,7 @@ def _initialize_adapter(obj: 'JobAdapter',
117119
times_rerun: int = 0,
118120
torsions: Optional[List[List[int]]] = None,
119121
tsg: Optional[int] = None,
120-
xyz: Optional[Union[dict,List[dict]]] = None,
122+
xyz: Optional[Union[dict, List[dict]]] = None,
121123
):
122124
"""
123125
A common Job adapter initializer function.
@@ -161,7 +163,7 @@ def _initialize_adapter(obj: 'JobAdapter',
161163
obj.job_num = job_num
162164
obj.job_server_name = job_server_name
163165
obj.job_status = job_status \
164-
or ['initializing', {'status': 'initializing', 'keywords': list(), 'error': '', 'line': ''}]
166+
or ['initializing', {'status': 'initializing', 'keywords': list(), 'error': '', 'line': ''}]
165167
obj.job_type = job_type if isinstance(job_type, str) else job_type[0] # always a string
166168
obj.job_types = job_type if isinstance(job_type, list) else [job_type] # always a list
167169
# When restarting ARC and re-setting the jobs, ``level`` is a string, convert it to a Level object instance
@@ -211,7 +213,7 @@ def _initialize_adapter(obj: 'JobAdapter',
211213
obj.is_ts = obj.species[0].is_ts
212214
obj.species_label = list()
213215
for spc in obj.species:
214-
obj.charge.append(spc.charge)
216+
obj.charge.append(spc.charge)
215217
obj.multiplicity.append(spc.multiplicity)
216218
obj.species_label.append(spc.label)
217219
elif obj.reactions is not None:
@@ -286,9 +288,9 @@ def is_species_restricted(obj: 'JobAdapter',
286288
bool: Whether to run as restricted (``True``) or not (``False``).
287289
"""
288290

289-
if obj.level.method_type in ['force_field','composite','semiempirical']:
291+
if obj.level.method_type in ['force_field', 'composite', 'semiempirical']:
290292
return True
291-
293+
292294
multiplicity = obj.multiplicity if species is None else species.multiplicity
293295
number_of_radicals = obj.species[0].number_of_radicals if species is None else species.number_of_radicals
294296
species_label = obj.species[0].label if species is None else species.label
@@ -322,7 +324,8 @@ def check_argument_consistency(obj: 'JobAdapter'):
322324
raise NotImplementedError(f'The {obj.job_adapter} job adapter does not support ESS scans.')
323325
if obj.job_type == 'scan' and divmod(360, obj.scan_res)[1]:
324326
raise ValueError(f'Got an illegal rotor scan resolution of {obj.scan_res}.')
325-
if obj.job_type == 'scan' and ((not obj.species[0].rotors_dict or obj.rotor_index is None) and obj.torsions is None):
327+
if obj.job_type == 'scan' and (
328+
(not obj.species[0].rotors_dict or obj.rotor_index is None) and obj.torsions is None):
326329
# If this is a scan job type and species.rotors_dict is empty (e.g., via pipe), then torsions must be set up.
327330
raise ValueError('Either torsions or a species rotors_dict along with a rotor_index argument '
328331
'must be specified for an ESS scan job.')
@@ -406,7 +409,7 @@ def update_input_dict_with_args(args: dict,
406409
else:
407410
if 'keywords' not in input_dict.keys():
408411
input_dict['keywords'] = ''
409-
# Check if input_dict['keywords'] already contains a value
412+
# Check if input_dict['keywords'] already contains a value
410413
if input_dict['keywords']:
411414
input_dict['keywords'] += f' {value}'
412415
else:
@@ -444,6 +447,7 @@ def update_input_dict_with_args(args: dict,
444447

445448
return input_dict
446449

450+
447451
def input_dict_strip(input_dict: dict) -> dict:
448452
"""
449453
Strip all values in the input dict of leading and trailing whitespace.
@@ -536,6 +540,7 @@ def which(command: Union[str, list],
536540
else:
537541
return ans
538542

543+
539544
def combine_parameters(input_dict: dict, terms: list) -> Tuple[dict, List]:
540545
"""
541546
Extract and combine specific parameters from a dictionary's string values based on a list of terms.

0 commit comments

Comments
 (0)