Skip to content

Commit bdd0f92

Browse files
author
kpzn768
committed
Adding features for 4.4.0 release
1 parent 8877e4e commit bdd0f92

35 files changed

+3303
-1419
lines changed

CHANGELOG.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# CHANGELOG
22

3+
## Version 4.4.0 2025-06-30
4+
5+
## Features
6+
7+
- Add scorers that are wrappers around rxnutils scorers
8+
- Add filter policy for keeping substructures frozen
9+
- Add stock option based on molecular weight
10+
11+
## Miscellaneous
12+
13+
- Add rxnutils as dependency
14+
- Remove routines in rxnutils to reduce code duplication
15+
316
## Version 4.3.2 2025-04-07
417

518
### Trivial changes

aizynthfinder/analysis/routes.py

Lines changed: 20 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
""" Module containing classes to store and manipulate collections of synthetic routes.
22
"""
3+
34
from __future__ import annotations
45

56
import copy
67
from typing import TYPE_CHECKING
78

89
import numpy as np
10+
from rxnutils.routes.comparison import simple_route_similarity
11+
from rxnutils.routes.readers import read_aizynthfinder_dict
912

1013
try:
1114
from route_distances.clustering import ClusteringHelper
12-
from route_distances.route_distances import route_distances_calculator
1315
except ImportError:
14-
pass
16+
SUPPORT_CLUSTERING = False
17+
else:
18+
SUPPORT_CLUSTERING = True
1519

1620
from aizynthfinder.analysis.utils import CombinedReactionTrees, RouteSelectionArguments
17-
from aizynthfinder.reactiontree import SUPPORT_DISTANCES, ReactionTree
21+
from aizynthfinder.reactiontree import ReactionTree
1822
from aizynthfinder.search.mcts import MctsNode, MctsSearchTree
1923

2024
if TYPE_CHECKING:
@@ -27,7 +31,6 @@
2731
PilImage,
2832
Sequence,
2933
StrDict,
30-
Union,
3134
)
3235

3336

@@ -77,7 +80,7 @@ def __init__(self, reaction_trees: Sequence[ReactionTree], **kwargs) -> None:
7780
self.clusters: Optional[Sequence[RouteCollection]] = self._unpack_kwarg(
7881
"clusters", **kwargs
7982
)
80-
self._distance_matrix: Dict[str, np.ndarray] = {}
83+
self._distance_matrix: Optional[np.ndarray] = None
8184
self._combined_reaction_trees: Optional[CombinedReactionTrees] = None
8285

8386
@classmethod
@@ -142,41 +145,30 @@ def cluster(
142145
self,
143146
n_clusters: int,
144147
max_clusters: int = 5,
145-
distances_model: str = "ted",
146148
**kwargs: Any,
147149
) -> np.ndarray:
148150
"""
149151
Cluster the route collection into a number of clusters.
150152
151-
Additional arguments to the distance or clustering algorithm
152-
can be passed in as key-word arguments.
153-
154-
When `distances_model` is "lstm", a key-word argument `model_path` needs to be given
155-
when `distances_model` is "ted", two optional key-word arguments `timeout` and `content`
156-
can be given.
153+
Additional arguments to the clustering algorithm can be passed in as key-word arguments.
157154
158155
If the number of reaction trees are less than 3, no clustering will be performed
159156
160157
:param n_clusters: the desired number of clusters, if less than 2 triggers optimization
161158
:param max_clusters: the maximum number of clusters to consider
162-
:param distances_model: can be ted or lstm and determines how the route distances are computed
163159
:return: the cluster labels
160+
:raises ValueError: if the route_distance package is not installed
164161
"""
165-
if not SUPPORT_DISTANCES:
162+
if not SUPPORT_CLUSTERING:
166163
raise ValueError(
167164
"Clustering is not supported by this installation."
168165
" Please install aizynthfinder with extras dependencies."
169166
)
170167

171168
if len(self.reaction_trees) < 3:
172169
return np.asarray([])
173-
dist_kwargs = {
174-
"content": kwargs.pop("content", "both"),
175-
"timeout": kwargs.pop("timeout", None),
176-
"model_path": kwargs.pop("model_path", None),
177-
}
178170
try:
179-
distances = self.distance_matrix(model=distances_model, **dist_kwargs)
171+
distances = self.distance_matrix()
180172
except ValueError:
181173
return np.asarray([])
182174

@@ -213,7 +205,7 @@ def compute_scores(self, *scorers: Scorer) -> None:
213205
for scorer in scorers:
214206
for idx, score in enumerate(scorer(list_)): # type: ignore
215207
self.all_scores[idx][repr(scorer)] = score
216-
self._update_route_dict(self.all_scores, "all_score")
208+
self._update_route_dict(self.all_scores, "all_scores")
217209

218210
def dict_with_extra(
219211
self, include_scores=False, include_metadata=False
@@ -244,41 +236,19 @@ def dict_with_scores(self) -> Sequence[StrDict]:
244236
"""
245237
return self.dict_with_extra(include_scores=True)
246238

247-
def distance_matrix(
248-
self, recreate: bool = False, model: str = "ted", **kwargs: Any
249-
) -> np.ndarray:
239+
def distance_matrix(self, recreate: bool = False) -> np.ndarray:
250240
"""
251241
Compute the distance matrix between each pair of reaction trees
252242
253-
All key-word arguments are passed along to the `route_distance_calculator`
254-
function from the `route_distances` package.
255-
256-
When `model` is "lstm", a key-word argument `model_path` needs to be given
257-
when `model` is "ted", two optional key-word arguments `timeout` and `content`
258-
can be given.
259-
260243
:param recreate: if False, use a cached one if available
261-
:param model: the type of model to use "ted" or "lstm"
262244
:return: the square distance matrix
263245
"""
264-
if not SUPPORT_DISTANCES:
265-
raise ValueError(
266-
"Distance calculations are not supported by this installation."
267-
" Please install aizynthfinder with extras dependencies."
268-
)
269-
270-
if model == "lstm" and not kwargs.get("model_path"):
271-
raise KeyError(
272-
"Need to provide 'model_path' argument when using LSTM model for computing distances"
273-
)
274-
content = kwargs.get("content", "both")
275-
cache_key = kwargs.get("model_path", "") if model == "lstm" else content
276-
if self._distance_matrix.get(cache_key) is not None and not recreate:
277-
return self._distance_matrix[cache_key]
278-
calculator = route_distances_calculator(model, **kwargs)
279-
distances = calculator(self.dicts)
280-
self._distance_matrix[cache_key] = distances
281-
return distances
246+
if self._distance_matrix is not None and not recreate:
247+
return self._distance_matrix
248+
routes = [read_aizynthfinder_dict(dict_) for dict_ in self.dicts]
249+
self._distance_matrix = 1.0 - simple_route_similarity(routes)
250+
assert self._distance_matrix is not None
251+
return self._distance_matrix
282252

283253
def make_dicts(self) -> Sequence[StrDict]:
284254
"""Convert all reaction trees to dictionaries"""

aizynthfinder/context/policy/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from aizynthfinder.context.policy.filter_strategies import (
1111
BondFilter,
1212
FilterStrategy,
13+
FrozenSubstructureFilter,
1314
QuickKerasFilter,
1415
ReactantsCountFilter,
1516
)

aizynthfinder/context/policy/filter_strategies.py

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
""" Module containing classes that implements different filter policy strategies
22
"""
3+
34
from __future__ import annotations
45

56
import abc
67
from typing import TYPE_CHECKING
78

89
import numpy as np
10+
from rdkit import Chem
911

1012
from aizynthfinder.chem import TemplatedRetroReaction
1113
from aizynthfinder.context.policy.utils import _make_fingerprint
1214
from aizynthfinder.utils.bonds import BrokenBonds
13-
from aizynthfinder.utils.exceptions import (
14-
PolicyException,
15-
RejectionException,
16-
)
15+
from aizynthfinder.utils.exceptions import PolicyException, RejectionException
1716
from aizynthfinder.utils.logging import logger
1817
from aizynthfinder.utils.models import load_model
1918

@@ -182,8 +181,46 @@ def apply(self, reaction: RetroReaction) -> None:
182181
)
183182

184183

184+
class FrozenSubstructureFilter(FilterStrategy):
185+
"""
186+
Filter for rejecting reactions that break up substructures
187+
188+
:param key: the key or label
189+
:param config: the configuration of the tree search
190+
:param smarts_list: the SMARTS patterns of the sub-structures
191+
"""
192+
193+
_required_kwargs: List[str] = ["smarts_list"]
194+
195+
def __init__(self, key: str, config: Configuration, **kwargs: Any) -> None:
196+
super().__init__(key, config, **kwargs)
197+
self._smarts_list = kwargs.get("smarts_list", [])
198+
self._mol_lists = [Chem.MolFromSmarts(smarts) for smarts in self._smarts_list]
199+
self._logger.info(
200+
f"Loading frozen substructure filter to {key} with {len(self._mol_lists)} substructures"
201+
)
202+
203+
def apply(self, reaction: RetroReaction) -> None:
204+
for mol in self._mol_lists:
205+
# If it did not exists in the product, we cannot expect it to be present in the reactants
206+
if not reaction.mol.rd_mol.HasSubstructMatch(mol):
207+
continue
208+
209+
found = False
210+
for reactant in reaction.reactants[reaction.index]:
211+
if reactant.rd_mol.HasSubstructMatch(mol):
212+
found = True
213+
break
214+
215+
if not found:
216+
raise RejectionException(
217+
f"{reaction} was filtered out because of broken substructure: {Chem.MolToSmarts(mol)}"
218+
)
219+
220+
185221
FILTER_STRATEGY_ALIAS = {
186222
"feasibility": "QuickKerasFilter",
187223
"quick_keras_filter": "QuickKerasFilter",
188224
"reactants_count": "ReactantsCountFilter",
225+
"frozen_substructure": "FrozenSubstructureFilter",
189226
}

aizynthfinder/context/scoring/__init__.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,30 @@
33

44
from aizynthfinder.context.scoring.collection import ScorerCollection
55
from aizynthfinder.context.scoring.scorers import (
6-
AverageTemplateOccurrenceScorer,
6+
SUPPORT_DISTANCES,
77
BrokenBondsScorer,
88
CombinedScorer,
9+
DeepSetScorer,
10+
RouteCostScorer,
11+
RouteSimilarityScorer,
12+
StateScorer,
13+
)
14+
from aizynthfinder.context.scoring.scorers_base import Scorer
15+
from aizynthfinder.context.scoring.scorers_mols import (
916
DeltaSyntheticComplexityScorer,
17+
FractionInSourceStockScorer,
1018
FractionInStockScorer,
11-
MaxTransformScorerer,
19+
FractionOfIntermediatesInStockScorer,
1220
NumberOfPrecursorsInStockScorer,
1321
NumberOfPrecursorsScorer,
14-
NumberOfReactionsScorer,
1522
PriceSumScorer,
16-
ReactionClassMembershipScorer,
17-
RouteCostScorer,
18-
RouteSimilarityScorer,
19-
Scorer,
20-
StateScorer,
2123
StockAvailabilityScorer,
22-
SUPPORT_DISTANCES,
24+
)
25+
from aizynthfinder.context.scoring.scorers_reactions import (
26+
AverageTemplateOccurrenceScorer,
27+
MaxTransformScorer,
28+
NumberOfReactionsScorer,
29+
ReactionClassMembershipScorer,
30+
ReactionClassRankScorer,
2331
)
2432
from aizynthfinder.utils.exceptions import ScorerException

aizynthfinder/context/scoring/collection.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
11
""" Module containing classes used to score the reaction routes.
22
"""
3+
34
from __future__ import annotations
45

56
from typing import TYPE_CHECKING
67

78
from aizynthfinder.context.collection import ContextCollection
8-
from aizynthfinder.context.scoring.scorers import (
9-
AverageTemplateOccurrenceScorer,
9+
from aizynthfinder.context.scoring.scorers import StateScorer
10+
from aizynthfinder.context.scoring.scorers import __name__ as scorers_module
11+
from aizynthfinder.context.scoring.scorers_mols import (
1012
NumberOfPrecursorsInStockScorer,
1113
NumberOfPrecursorsScorer,
14+
)
15+
from aizynthfinder.context.scoring.scorers_reactions import (
1216
NumberOfReactionsScorer,
1317
Scorer,
14-
StateScorer,
1518
)
16-
from aizynthfinder.context.scoring.scorers import __name__ as scorers_module
1719
from aizynthfinder.reactiontree import ReactionTree
1820
from aizynthfinder.search.mcts import MctsNode
1921
from aizynthfinder.utils.exceptions import ScorerException
@@ -33,7 +35,6 @@
3335
NumberOfReactionsScorer,
3436
NumberOfPrecursorsScorer,
3537
NumberOfPrecursorsInStockScorer,
36-
AverageTemplateOccurrenceScorer,
3738
]
3839

3940

0 commit comments

Comments
 (0)