-
Notifications
You must be signed in to change notification settings - Fork 25
Expand file tree
/
Copy pathvalidators.py
More file actions
106 lines (85 loc) · 3.67 KB
/
validators.py
File metadata and controls
106 lines (85 loc) · 3.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import functools
from collections.abc import Callable, Sequence
from typing import Any
from rdkit.Chem import Mol, MolFromInchi, MolFromSmiles, MolToSmiles
from rdkit.Chem.PropertyMol import PropertyMol
def ensure_mols(X: Sequence[Any]) -> list[Mol]:
"""
Ensure that all input sequence elements are RDKit ``Mol`` objects. Requires
all input elements to be of the same type: string (SMILES or InChI strings) or ``Mol``.
In the case of SMILES or InChI strings, they are converted to RDKit ``Mol`` objects with
default settings.
"""
if not all(isinstance(x, (Mol, PropertyMol, str)) for x in X):
types = {type(x) for x in X}
raise TypeError(
f"Passed values must be RDKit Mol objects, SMILES or InChI strings, got types: {types}"
)
if isinstance(X[0], str):
parser = MolFromInchi if X[0].startswith("InChI=") else MolFromSmiles
mols = [parser(x) for x in X]
else:
mols = list(X)
if any(x is None for x in mols):
idx = mols.index(None)
raise TypeError(f"Could not parse '{X[idx]}' at index {idx} as molecule")
return mols
def ensure_smiles(X: Sequence[Any]) -> list[str]:
"""
Ensure that all input sequence elements are SMILES strings. Requires all input
elements to be of the same type: string (SMILES strings) or ``Mol``. In the case of
RDKit ``Mol`` objects, they are converted to SMILES strings with default settings.
"""
if not all(isinstance(x, (Mol, PropertyMol, str)) for x in X):
types = {type(x) for x in X}
raise TypeError(f"Passed values must be SMILES strings, got types: {types}")
X = [MolToSmiles(x) if isinstance(x, Mol) else x for x in X]
return X
def require_mols(X: Sequence[Any]) -> None:
"""
Check that all inputs are RDKit ``Mol`` objects, raises TypeError otherwise.
"""
for idx, x in enumerate(X):
if not isinstance(x, (Mol, PropertyMol)):
raise TypeError(
f"Passed values must be RDKit Mol objects, got type {type(x)} at index {idx}"
)
def require_mols_with_conf_ids(X: Sequence[Any]) -> Sequence[Mol]:
"""
Check that all inputs are RDKit ``Mol`` objects with ``"conf_id"`` property
set, i.e. with conformers computed and properly identified. Raises TypeError
otherwise.
"""
if not all(isinstance(x, (Mol, PropertyMol)) and x.HasProp("conf_id") for x in X):
raise TypeError(
"Passed data must be molecules (RDKit Mol objects) "
"and each must have conf_id property set. "
"You can use ConformerGenerator to add them."
)
return X
def require_strings(X: Sequence[Any]) -> None:
"""
Check that all inputs are strings, raises TypeError otherwise.
"""
for idx, x in enumerate(X):
if not isinstance(x, str):
raise TypeError(
f"Passed values must be strings, got type {type(x)} at index {idx}"
)
def require_atoms(min_atoms: int = 1, only_explicit=True) -> Callable:
"""
Ensure molecule is nonempty or has at least min_atoms atoms.
Used as a decorator for functions operating on a single molecule.
Raises ValueError if conditions are not met.
"""
def decorator(func: Callable) -> Callable:
@functools.wraps(func)
def wrapper(mol: Mol, *args, **kwargs):
if mol.GetNumAtoms(onlyExplicit=only_explicit) < min_atoms:
raise ValueError(
f"The molecule must have at least {min_atoms} atom(s), "
f"{func.__name__} cannot be calculated."
)
return func(mol, *args, **kwargs)
return wrapper
return decorator