Skip to content

Commit 4034772

Browse files
authored
Merge pull request #2796 from ReactionMechanismGenerator/feat/rdkit_sssr
Use `rdkit` for SSSR and RCs (bug fix + Python upgrade)
2 parents 6b1368d + d1c4b84 commit 4034772

28 files changed

+881
-915
lines changed

.conda/meta.yaml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,6 @@ requirements:
6464
- conda-forge::gprof2dot
6565
- conda-forge::numdifftools
6666
- conda-forge::quantities !=0.16.0,!=0.16.1
67-
- conda-forge::ringdecomposerlib-python
6867
- rmg::pydas >=1.0.3
6968
- rmg::pydqed >=1.0.3
7069
- rmg::symmetry
@@ -114,7 +113,6 @@ requirements:
114113
- conda-forge::gprof2dot
115114
- conda-forge::numdifftools
116115
- conda-forge::quantities !=0.16.0,!=0.16.1
117-
- conda-forge::ringdecomposerlib-python
118116
- rmg::pydas >=1.0.3
119117
- rmg::pydqed >=1.0.3
120118
- rmg::symmetry
@@ -165,7 +163,6 @@ test:
165163
- conda-forge::gprof2dot
166164
- conda-forge::numdifftools
167165
- conda-forge::quantities !=0.16.0,!=0.16.1
168-
- conda-forge::ringdecomposerlib-python
169166
- rmg::pydas >=1.0.3
170167
- rmg::pydqed >=1.0.3
171168
- rmg::symmetry

.github/workflows/CI.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,9 @@ jobs:
6565
strategy:
6666
fail-fast: false
6767
matrix:
68-
python-version: ["3.9"]
68+
python-version: ["3.9", "3.10", "3.11"]
6969
os: [macos-15-intel, macos-latest, ubuntu-latest]
7070
include-rms: ["", "with RMS"]
71-
exclude:
72-
- os: macos-15-intel # GitHub's runners just aren't up to the task of installing Julia
73-
include-rms: 'with RMS'
7471
runs-on: ${{ matrix.os }}
7572
name: Python ${{ matrix.python-version }} ${{ matrix.os }} Build and Test ${{ matrix.include-rms }}
7673
# skip scheduled runs from forks

.github/workflows/conda_build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ jobs:
1717
matrix:
1818
os: [ubuntu-latest, macos-15-intel, macos-latest]
1919
numpy-version: ["1.26"]
20-
python-version: ["3.9"]
20+
python-version: ["3.9", "3.10", "3.11"]
2121
runs-on: ${{ matrix.os }}
2222
name: Build ${{ matrix.os }} Python ${{ matrix.python-version }} Numpy ${{ matrix.numpy-version }}
2323
defaults:

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ clean-solver:
2828

2929
install:
3030
@ python utilities.py check-pydas
31-
python -m pip install -vv -e .
31+
python -m pip install --no-build-isolation -vv -e .
3232

3333
q2dtor:
3434
@ echo -e "\nInstalling Q2DTor...\n"

environment.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ dependencies:
5858
- conda-forge::rdkit >=2022.09.1
5959

6060
# Python tools
61-
- conda-forge::python >=3.9 # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps)
61+
- conda-forge::python >=3.9,<3.12 # leave as GEQ so that GitHub actions can add EQ w/o breaking (contradictory deps)
6262
- conda-forge::setuptools <80
6363
- conda-forge::coverage
6464
- conda-forge::cython >=0.25.2,<3.1
@@ -84,7 +84,6 @@ dependencies:
8484
# bug in quantities, see:
8585
# https://github.com/ReactionMechanismGenerator/RMG-Py/pull/2694#issuecomment-2489286263
8686
- conda-forge::quantities !=0.16.0,!=0.16.1
87-
- conda-forge::ringdecomposerlib-python
8887

8988
# packages we maintain
9089
- rmg::pydas >=1.0.3

rmgpy/data/thermo.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def is_ring_partial_matched(ring, matched_group):
467467
else:
468468
submol_ring, _ = convert_ring_to_sub_molecule(ring)
469469
sssr = submol_ring.get_smallest_set_of_smallest_rings()
470-
sssr_grp = matched_group.get_smallest_set_of_smallest_rings()
470+
sssr_grp = matched_group.make_sample_molecule().get_smallest_set_of_smallest_rings()
471471
if sorted([len(sr) for sr in sssr]) == sorted([len(sr_grp) for sr_grp in sssr_grp]):
472472
return False
473473
else:
@@ -483,7 +483,7 @@ def bicyclic_decomposition_for_polyring(polyring):
483483
"""
484484

485485
submol, _ = convert_ring_to_sub_molecule(polyring)
486-
sssr = submol.get_deterministic_sssr()
486+
sssr = submol.get_smallest_set_of_smallest_rings()
487487

488488
ring_pair_with_common_atoms_list = []
489489
ring_occurances_dict = {}
@@ -555,7 +555,7 @@ def split_bicyclic_into_single_rings(bicyclic_submol):
555555
Splits a given bicyclic submolecule into two individual single
556556
ring submolecules (a list of `Molecule`s ).
557557
"""
558-
sssr = bicyclic_submol.get_deterministic_sssr()
558+
sssr = bicyclic_submol.get_smallest_set_of_smallest_rings()
559559

560560
return [convert_ring_to_sub_molecule(sssr[0])[0],
561561
convert_ring_to_sub_molecule(sssr[1])[0]]

rmgpy/molecule/adjlist.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1093,7 +1093,7 @@ def to_adjacency_list(atoms, multiplicity, metal='', facet='', label=None, group
10931093
# numbers if doesn't work
10941094
try:
10951095
adjlist += bond.get_order_str()
1096-
except ValueError:
1096+
except (ValueError, TypeError):
10971097
adjlist += str(bond.get_order_num())
10981098
adjlist += '}'
10991099

rmgpy/molecule/converter.pxd

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ cimport rmgpy.molecule.molecule as mm
2929
cimport rmgpy.molecule.element as elements
3030

3131

32-
cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, bint sanitize=*, bint save_order=?)
32+
cpdef to_rdkit_mol(mm.Molecule mol, bint remove_h=*, bint return_mapping=*, object sanitize=*, bint save_order=?, bint ignore_bond_orders=?)
3333

3434
cpdef mm.Molecule from_rdkit_mol(mm.Molecule mol, object rdkitmol, bint raise_atomtype_exception=?)
3535

rmgpy/molecule/converter.py

Lines changed: 42 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import cython
3939
# Assume that rdkit is installed
4040
from rdkit import Chem
41+
from rdkit.Chem.rdchem import KekulizeException, AtomKekulizeException
4142
# Test if openbabel is installed
4243
try:
4344
from openbabel import openbabel
@@ -49,55 +50,61 @@
4950
from rmgpy.exceptions import DependencyError
5051

5152

52-
def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_order=False):
53+
def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True,
54+
save_order=False, ignore_bond_orders=False):
5355
"""
5456
Convert a molecular structure to a RDKit rdmol object. Uses
5557
`RDKit <https://rdkit.org/>`_ to perform the conversion.
5658
Perceives aromaticity and, unless remove_h==False, removes Hydrogen atoms.
5759
5860
If return_mapping==True then it also returns a dictionary mapping the
5961
atoms to RDKit's atom indices.
62+
63+
If ignore_bond_orders==True, all bonds are converted to unknown bonds, and
64+
sanitization is skipped. This is helpful when all you want is ring perception,
65+
for example. Must also set sanitize=False.
6066
"""
61-
from rmgpy.molecule.fragment import Fragment
67+
if ignore_bond_orders and sanitize:
68+
raise ValueError("If ignore_bond_orders is True, sanitize must be False")
69+
from rmgpy.molecule.fragment import Fragment, CuttingLabel
6270
# Sort the atoms before converting to ensure output is consistent
6371
# between different runs
6472
if not save_order:
6573
mol.sort_atoms()
6674
atoms = mol.vertices
6775
rd_atom_indices = {} # dictionary of RDKit atom indices
68-
label_dict = {} # store label of atom for Framgent
76+
label_dict = {} # For fragment cutting labels. Key is rdkit atom index, value is label string
6977
rdkitmol = Chem.rdchem.EditableMol(Chem.rdchem.Mol())
7078
for index, atom in enumerate(mol.vertices):
7179
if atom.element.symbol == 'X':
7280
rd_atom = Chem.rdchem.Atom('Pt') # not sure how to do this with linear scaling when this might not be Pt
81+
elif atom.element.symbol in ['R', 'L']:
82+
rd_atom = Chem.rdchem.Atom(0)
7383
else:
7484
rd_atom = Chem.rdchem.Atom(atom.element.symbol)
7585
if atom.element.isotope != -1:
7686
rd_atom.SetIsotope(atom.element.isotope)
7787
rd_atom.SetNumRadicalElectrons(atom.radical_electrons)
7888
rd_atom.SetFormalCharge(atom.charge)
79-
if atom.element.symbol == 'C' and atom.lone_pairs == 1 and mol.multiplicity == 1: rd_atom.SetNumRadicalElectrons(
80-
2)
89+
if atom.element.symbol == 'C' and atom.lone_pairs == 1 and mol.multiplicity == 1:
90+
rd_atom.SetNumRadicalElectrons(2)
8191
rdkitmol.AddAtom(rd_atom)
8292
if remove_h and atom.symbol == 'H':
8393
pass
8494
else:
8595
rd_atom_indices[atom] = index
8696

87-
# Check if a cutting label is present. If preserve this so that it is added to the SMILES string
88-
# Fragment's representative species is Molecule (with CuttingLabel replaced by Si but label as CuttingLabel)
89-
# so we use detect_cutting_label to check atom.label
90-
_, cutting_label_list = Fragment().detect_cutting_label(atom.label)
91-
if cutting_label_list != []:
92-
saved_index = index
93-
label = atom.label
94-
if label in label_dict:
95-
label_dict[label].append(saved_index)
96-
else:
97-
label_dict[label] = [saved_index]
97+
# Save cutting labels to add to the SMILES string
98+
if atom.label and atom.label in ('R', 'L'):
99+
label_dict[index] = atom.label
100+
98101
rd_bonds = Chem.rdchem.BondType
99-
orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE, 'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC,
100-
'Q': rd_bonds.QUADRUPLE}
102+
# no vdW bond in RDKit, so "ZERO" or "OTHER" might be OK
103+
orders = {'S': rd_bonds.SINGLE, 'D': rd_bonds.DOUBLE,
104+
'T': rd_bonds.TRIPLE, 'B': rd_bonds.AROMATIC,
105+
'Q': rd_bonds.QUADRUPLE, 'vdW': rd_bonds.ZERO,
106+
'H': rd_bonds.HYDROGEN, 'R': rd_bonds.UNSPECIFIED,
107+
None: rd_bonds.UNSPECIFIED}
101108
# Add the bonds
102109
for atom1 in mol.vertices:
103110
for atom2, bond in atom1.edges.items():
@@ -106,23 +113,31 @@ def to_rdkit_mol(mol, remove_h=True, return_mapping=False, sanitize=True, save_o
106113
index1 = atoms.index(atom1)
107114
index2 = atoms.index(atom2)
108115
if index1 < index2:
109-
order_string = bond.get_order_str()
110-
order = orders[order_string]
116+
if ignore_bond_orders:
117+
order = rd_bonds.UNSPECIFIED
118+
else:
119+
order_string = bond.get_order_str()
120+
order = orders[order_string]
111121
rdkitmol.AddBond(index1, index2, order)
112122

113123
# Make editable mol into a mol and rectify the molecule
114124
rdkitmol = rdkitmol.GetMol()
115-
if label_dict:
116-
for label, ind_list in label_dict.items():
117-
for ind in ind_list:
118-
Chem.SetSupplementalSmilesLabel(rdkitmol.GetAtomWithIdx(ind), label)
125+
for index, label in label_dict.items():
126+
Chem.SetSupplementalSmilesLabel(rdkitmol.GetAtomWithIdx(index), label)
119127
for atom in rdkitmol.GetAtoms():
120128
if atom.GetAtomicNum() > 1:
121129
atom.SetNoImplicit(True)
122-
if sanitize:
123-
Chem.SanitizeMol(rdkitmol)
124130
if remove_h:
125-
rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=sanitize)
131+
rdkitmol = Chem.RemoveHs(rdkitmol, sanitize=False) # skip sanitization here, do it later if requested
132+
if sanitize == True:
133+
Chem.SanitizeMol(rdkitmol)
134+
elif sanitize == "partial":
135+
try:
136+
Chem.SanitizeMol(rdkitmol, sanitizeOps=Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES)
137+
except (KekulizeException, AtomKekulizeException):
138+
logging.warning("Kekulization failed; sanitizing without Kekulize")
139+
Chem.SanitizeMol(rdkitmol, sanitizeOps=Chem.SANITIZE_ALL ^ Chem.SANITIZE_PROPERTIES ^ Chem.SANITIZE_KEKULIZE)
140+
126141
if return_mapping:
127142
return rdkitmol, rd_atom_indices
128143
return rdkitmol

rmgpy/molecule/draw.py

Lines changed: 43 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ def clear(self):
155155
self.surface = None
156156
self.cr = None
157157

158-
def draw(self, molecule, file_format, target=None):
158+
def draw(self, molecule, file_format, target=None, use_rdkit=True):
159159
"""
160160
Draw the given `molecule` using the given image `file_format` - pdf, svg, ps, or
161161
png. If `path` is given, the drawing is saved to that location on disk. The
@@ -165,6 +165,9 @@ def draw(self, molecule, file_format, target=None):
165165
This function returns the Cairo surface and context used to create the
166166
drawing, as well as a bounding box for the molecule being drawn as the
167167
tuple (`left`, `top`, `width`, `height`).
168+
169+
If `use_rdkit` is True, then the RDKit 2D coordinate generation is used to generate the coordinates.
170+
If `use_rdkit` is False, then the molecule is drawn using our (deprecated) original algorithm.
168171
"""
169172

170173
# The Cairo 2D graphics library (and its Python wrapper) is required for
@@ -219,13 +222,13 @@ def draw(self, molecule, file_format, target=None):
219222
if molecule.contains_surface_site():
220223
try:
221224
self._connect_surface_sites()
222-
self._generate_coordinates()
225+
self._generate_coordinates(use_rdkit=use_rdkit)
223226
self._disconnect_surface_sites()
224227
except AdsorbateDrawingError as e:
225228
self._disconnect_surface_sites()
226-
self._generate_coordinates(fix_surface_sites=False)
229+
self._generate_coordinates(fix_surface_sites=False, use_rdkit=use_rdkit)
227230
else:
228-
self._generate_coordinates()
231+
self._generate_coordinates(use_rdkit=use_rdkit)
229232
self._replace_bonds(old_bond_dictionary)
230233

231234
# Generate labels to use
@@ -341,7 +344,7 @@ def _find_ring_groups(self):
341344
if not found:
342345
self.ringSystems.append([cycle])
343346

344-
def _generate_coordinates(self, fix_surface_sites=True):
347+
def _generate_coordinates(self, fix_surface_sites=True, use_rdkit=True):
345348
"""
346349
Generate the 2D coordinates to be used when drawing the current
347350
molecule. The function uses rdKits 2D coordinate generation.
@@ -372,15 +375,42 @@ def _generate_coordinates(self, fix_surface_sites=True):
372375
self.coordinates[1, :] = [0.5, 0.0]
373376
return self.coordinates
374377

375-
# Decide whether we can use RDKit or have to generate coordinates ourselves
376-
for atom in self.molecule.atoms:
377-
if atom.charge != 0:
378-
use_rdkit = False
379-
break
380-
else: # didn't break
381-
use_rdkit = True
378+
if use_rdkit == True:
379+
# Use RDKit 2D coordinate generation:
380+
# Generate the RDkit molecule from the RMG molecule, saving mapping
381+
# in order to match the atoms in the rdmol with the atoms in the
382+
# RMG molecule (which is required to extract coordinates).
383+
#
384+
# partial sanitization is used to allow molecules to fail RDKit's
385+
# bond order and implicit hydrogen assignments, and possibly also
386+
# RDKit's aromaticity perception, while still drawing the molecule.
387+
# this can happen because RMG uses partial bond orders and atom types
388+
# that RDKit doesn't understand, though RDKit can still generate
389+
# coordinates for the molecule.
390+
rdmol, rd_atom_idx = self.molecule.to_rdkit_mol(remove_h=False,
391+
return_mapping=True,
392+
sanitize="partial")
393+
394+
AllChem.Compute2DCoords(rdmol)
395+
396+
# Extract the coordinates from each atom.
397+
rd_conformer = rdmol.GetConformer(0)
398+
for atom in atoms:
399+
index = rd_atom_idx[atom]
400+
point = rd_conformer.GetAtomPosition(index)
401+
coordinates[index, :] = [point.x * 0.6, point.y * 0.6]
402+
403+
# RDKit generates some molecules more vertically than horizontally,
404+
# especially linear ones. This will reflect any molecule taller than
405+
# it is wide across the line y=x.
406+
ranges = np.ptp(coordinates, axis=0)
407+
if ranges[1] > ranges[0]:
408+
temp = np.copy(coordinates)
409+
coordinates[:, 0] = temp[:, 1]
410+
coordinates[:, 1] = temp[:, 0]
382411

383-
if not use_rdkit:
412+
else:
413+
logging.warning("Using deprecated molecule drawing algorithm; undesired behavior may occur. Consider using use_rdkit=True.")
384414
if len(self.cycles) > 0:
385415
# Cyclic molecule
386416
backbone = self._find_cyclic_backbone()
@@ -438,32 +468,6 @@ def _generate_coordinates(self, fix_surface_sites=True):
438468
# minimize likelihood of overlap
439469
self._generate_neighbor_coordinates(backbone)
440470

441-
else:
442-
# Use RDKit 2D coordinate generation:
443-
444-
# Generate the RDkit molecule from the RDkit molecule, use geometry
445-
# in order to match the atoms in the rdmol with the atoms in the
446-
# RMG molecule (which is required to extract coordinates).
447-
self.geometry = Geometry(None, None, self.molecule, None)
448-
449-
rdmol, rd_atom_idx = self.geometry.rd_build()
450-
AllChem.Compute2DCoords(rdmol)
451-
452-
# Extract the coordinates from each atom.
453-
for atom in atoms:
454-
index = rd_atom_idx[atom]
455-
point = rdmol.GetConformer(0).GetAtomPosition(index)
456-
coordinates[index, :] = [point.x * 0.6, point.y * 0.6]
457-
458-
# RDKit generates some molecules more vertically than horizontally,
459-
# Especially linear ones. This will reflect any molecule taller than
460-
# it is wide across the line y=x
461-
ranges = np.ptp(coordinates, axis=0)
462-
if ranges[1] > ranges[0]:
463-
temp = np.copy(coordinates)
464-
coordinates[:, 0] = temp[:, 1]
465-
coordinates[:, 1] = temp[:, 0]
466-
467471
# For surface species
468472
if fix_surface_sites and self.molecule.contains_surface_site():
469473
if len(self.molecule.atoms) == 1:

0 commit comments

Comments
 (0)