Skip to content

Commit 2271f66

Browse files
committed
#45 implemented as the rest/v2/search/synthon-expand endpoint
1 parent f566f85 commit 2271f66

File tree

3 files changed

+261
-13
lines changed

3 files changed

+261
-13
lines changed

fragnet-search/README.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,56 @@ The response would look like this:
617617
]
618618
```
619619

620+
### Synthon expansion search
621+
622+
This search finds molecules in the neighbourhood of the query molecule that incorporate a specific "synthon". Those
623+
synthons are child fragments that can be found using the fragments search endpoint. This can be useful to generate
624+
"fragment merges", molecules that incorporate parts of 2 different molecules.
625+
626+
Parameters:
627+
628+
| Name | Type | Required | Description |
629+
|------------|-------|----------|-------------|
630+
| smiles | URL | Yes | The smiles string for the query molecule. |
631+
| synthon | Query | Yes | The smiles string for the synthon molecule. |
632+
| hacMin | Query | No | The minimum heavy atom count of the resulting molecules. |
633+
| hacMax | Query | No | The maximum heavy atom count of the resulting molecules. |
634+
| racMin | Query | No | The minimum ring atom count of the resulting molecules. |
635+
| racMax | Query | No | The maximum ring atom count of the resulting molecules. |
636+
| hops | Query | Yes | The number of graph edges to traverse from the query molecule. Typically use 3 or 4. |
637+
638+
Note that these searches can be quite slow. Start with a small number of hops, and increase if you have no results.
639+
640+
Typcial execution:
641+
```
642+
curl "$FRAGNET_SERVER/fragnet-search/rest/v2/search/synthon-expand/OC(Cn1ccnn1)C1CC1?synthon=%5BXe%5Dn1ccnn1&hops=3&hacMin=14&hacMax=18"
643+
```
644+
645+
Typcial results:
646+
```
647+
[
648+
"NC(CCCNCCn1ccnn1)=NO",
649+
"NN=C(NCCn1ccnn1)NC1CC1",
650+
"CCC(NCCn1ccnn1)C(C)(C)C",
651+
"CC(CC(N)=O)NCCCn1ccnn1",
652+
"O=C(O)C(=O)C(=O)NCCn1ccnn1",
653+
"NN=C(NCCCn1ccnn1)NC1CC1",
654+
"CCOC(C)(C)CNCCn1ccnn1",
655+
"CCNC(C)CNC(=O)Cn1ccnn1",
656+
"CCC(CN)NC(=O)CCn1ccnn1",
657+
"CN(C(=O)Cn1ccnn1)C(C)(C)C#N",
658+
"O=C(O)C=CC(=O)NCCn1ccnn1",
659+
"COCCNC(=NN)NCCn1ccnn1",
660+
"CC(NC(=O)CCn1ccnn1)C(N)=O",
661+
"CC(C)N(CC#N)C(=O)Cn1ccnn1",
662+
"CCC(NCCCn1ccnn1)C(N)=NO",
663+
"CN(CCC(=O)O)C(=O)CCn1ccnn1",
664+
"NCC=CCNC(=O)CCn1ccnn1",
665+
"COC(=O)C=CNCCCn1ccnn1",
666+
"NC(CCCCNCCn1ccnn1)=NO"
667+
]
668+
```
669+
620670
## Authentication
621671

622672
### Linux or Mac
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/*
2+
* Copyright (c) 2021 Informatics Matters Ltd.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package org.squonk.fragnet.search.queries.v2;
17+
18+
import org.neo4j.driver.v1.Record;
19+
import org.neo4j.driver.v1.Session;
20+
import org.neo4j.driver.v1.StatementResult;
21+
import org.neo4j.driver.v1.Value;
22+
import org.neo4j.driver.v1.types.Node;
23+
import org.neo4j.driver.v1.types.Relationship;
24+
import org.squonk.fragnet.Constants;
25+
import org.squonk.fragnet.chem.MolStandardize;
26+
import org.squonk.fragnet.search.queries.AbstractQuery;
27+
28+
import javax.validation.constraints.NotNull;
29+
import java.util.ArrayList;
30+
import java.util.HashSet;
31+
import java.util.List;
32+
import java.util.logging.Logger;
33+
34+
import static org.neo4j.driver.v1.Values.parameters;
35+
36+
public class SynthonExpandQuery extends AbstractQuery {
37+
38+
private static final Logger LOG = Logger.getLogger(SynthonExpandQuery.class.getName());
39+
40+
public SynthonExpandQuery(Session session) {
41+
super(session);
42+
}
43+
44+
private final String SYNTHON_QUERY = "MATCH (fa:F2 {smiles: $smiles})" +
45+
"-[:FRAG*0..%s]-(:F2)" +
46+
"<-[e:FRAG]-(c:Mol) WHERE %s" +
47+
" (split(e.label, '|')[1] = $synthon OR split(e.label, '|')[4] = $synthon)" +
48+
" RETURN DISTINCT c";
49+
50+
@Override
51+
protected String getQueryTemplate() {
52+
return SYNTHON_QUERY;
53+
}
54+
55+
private String expandTemplate(@NotNull Integer hops, Integer hacMin, Integer hacMax, Integer racMin, Integer racMax) {
56+
// all params are integers so no risk of cypher injection
57+
String queryTemplate = getQueryTemplate();
58+
List<String> filters = new ArrayList<>();
59+
if (hacMin != null) {
60+
filters.add("c.hac >= " + hacMin.toString());
61+
}
62+
if (hacMax != null) {
63+
filters.add("c.hac <= " + hacMax.toString());
64+
}
65+
if (racMin != null) {
66+
filters.add("c.chac >= " + racMin.toString());
67+
}
68+
if (racMax != null) {
69+
filters.add("c.chac <= " + racMax.toString());
70+
}
71+
72+
String filter = "";
73+
if (filters.size() > 0) {
74+
filter = String.join(" AND ", filters) + " AND";
75+
}
76+
77+
String q = String.format(queryTemplate, hops, filter);
78+
return q;
79+
}
80+
81+
public List<String> execute(@NotNull String mol, @NotNull String synthon, @NotNull Integer hops,
82+
Integer hacMin, Integer hacMax, Integer racMin, Integer racMax) {
83+
84+
// standardize the mol. It must be in smiles format
85+
String stdSmiles = MolStandardize.prepareNonisoMol(mol, Constants.MIME_TYPE_SMILES);
86+
String stdSynthon = MolStandardize.prepareNonisoMol(synthon, Constants.MIME_TYPE_SMILES);
87+
88+
final String query = expandTemplate(hops, hacMin, hacMax, racMin, racMax);
89+
90+
HashSet<String> values = getSession().writeTransaction((tx) -> {
91+
LOG.info("Executing Synthon Query: " + query);
92+
StatementResult result = tx.run(query, parameters(new Object[]{"smiles", stdSmiles, "synthon", stdSynthon}));
93+
HashSet<String> smiles = new HashSet<>();
94+
while (result.hasNext()) {
95+
Record rec = result.next();
96+
Value val = rec.get(0);
97+
Node node = val.asNode();
98+
String smi = node.get("smiles").asString();
99+
smiles.add(smi);
100+
}
101+
return smiles;
102+
});
103+
104+
return new ArrayList(values);
105+
}
106+
}

fragnet-search/src/main/java/org/squonk/fragnet/service/v2/FragnetSearchRouteBuilder.java

Lines changed: 105 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,11 +59,31 @@ public class FragnetSearchRouteBuilder extends AbstractFragnetSearchRouteBuilder
5959
.help("Total number of molecule search requests")
6060
.register();
6161

62+
private final Counter moleculeSearchNeo4jSearchDuration = Counter.build()
63+
.name("duration_molecule_neo4j_ns")
64+
.help("Total duration of molecule Neo4j cypher query")
65+
.register();
66+
6267
private final Counter fragmentSearchRequestsTotal = Counter.build()
6368
.name("requests_fragment_total")
6469
.help("Total number of fragment search requests")
6570
.register();
6671

72+
private final Counter synthonExpandRequestsTotal = Counter.build()
73+
.name("synthon_expand_total")
74+
.help("Total number of synthon expansion requests")
75+
.register();
76+
77+
private final Counter synthonExpandNeo4jSearchDuration = Counter.build()
78+
.name("duration_synthon_expand_neo4j_ns")
79+
.help("Total duration of synthon expansion Neo4j cypher query")
80+
.register();
81+
82+
private final Counter synthonExpandMoleculesTotal = Counter.build()
83+
.name("results_synthon_expand_molecules")
84+
.help("Total number of synthon expansion search fragments")
85+
.register();
86+
6787
private final Counter neighbourhoodSearchRequestsTotal = Counter.build()
6888
.name("requests_neighbourhood_total")
6989
.help("Total number of neighbourhood search requests")
@@ -84,11 +104,6 @@ public class FragnetSearchRouteBuilder extends AbstractFragnetSearchRouteBuilder
84104
.help("Total duration of calculations")
85105
.register();
86106

87-
private final Counter moleculeSearchNeo4jSearchDuration = Counter.build()
88-
.name("duration_molecule_neo4j_ns")
89-
.help("Total duration of molecule Neo4j cypher query")
90-
.register();
91-
92107
private final Counter fragmentSearchNeo4jSearchDuration = Counter.build()
93108
.name("duration_fragment_neo4j_ns")
94109
.help("Total duration of fragment Neo4j cypher query")
@@ -242,6 +257,23 @@ public void configure() throws Exception {
242257
})
243258
.marshal().json(JsonLibrary.Jackson)
244259
.endRest()
260+
// Fetch the expansions of a molecule that involve a specific synthon
261+
// example:
262+
// curl "$FRAGNET_SERVER/fragnet-search/rest/v2/search/synthon-expand/OC(Cn1ccnn1)C1CC1"
263+
.get("synthon-expand/{smiles}").description("Find expansions of a molecule using a particular synthon")
264+
.param().name("smiles").type(RestParamType.path).description("SMILES query").endParam()
265+
.param().name("synthon").type(RestParamType.query).description("SMILES synthon").endParam()
266+
.param().name("hacMin").type(RestParamType.query).description("Heavy atom count reduction ").endParam()
267+
.param().name("hacMax").type(RestParamType.query).description("Heavy atom count increase ").endParam()
268+
.param().name("racMin").type(RestParamType.query).description("Ring atom count reduction").endParam()
269+
.param().name("racMax").type(RestParamType.query).description("Ring atom count increase").endParam()
270+
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
271+
.produces("application/json")
272+
.route()
273+
.process((Exchange exch) -> {
274+
executeSynthonExpand(exch);
275+
})
276+
.endRest()
245277
// example:
246278
// curl "$FRAGNET_SERVER/fragnet-search/rest/v2/search/neighbourhood/c1ccc%28Nc2nc3ccccc3o2%29cc1?hac=3&rac=1&hops=2&calcs=LOGP,SIM_RDKIT_TANIMOTO"
247279
.get("neighbourhood/{smiles}").description("Neighbourhood search")
@@ -285,8 +317,10 @@ public void configure() throws Exception {
285317
.get("expand/{smiles}").description("Expansion search")
286318
.bindingMode(RestBindingMode.off)
287319
.param().name("smiles").type(RestParamType.path).description("SMILES query").endParam()
288-
.param().name("hac").type(RestParamType.query).description("Heavy atom count bounds").endParam()
289-
.param().name("rac").type(RestParamType.query).description("Ring atom count bounds").endParam()
320+
.param().name("hacMin").type(RestParamType.query).description("Heavy atom count reduction ").endParam()
321+
.param().name("hacMax").type(RestParamType.query).description("Heavy atom count increase ").endParam()
322+
.param().name("racMin").type(RestParamType.query).description("Ring atom count reduction").endParam()
323+
.param().name("racMax").type(RestParamType.query).description("Ring atom count increase").endParam()
290324
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
291325
.param().name("suppliers").type(RestParamType.query).description("Suppliers to include").endParam()
292326
.param().name("pathLimit").type(RestParamType.query).description("Limit for the number of paths to return from the graph query").endParam()
@@ -299,8 +333,10 @@ public void configure() throws Exception {
299333
.endRest()
300334
.post("expand").description("Expansion search")
301335
.bindingMode(RestBindingMode.off)
302-
.param().name("hac").type(RestParamType.query).description("Heavy atom count bounds").endParam()
303-
.param().name("rac").type(RestParamType.query).description("Ring atom count bounds").endParam()
336+
.param().name("hacMin").type(RestParamType.query).description("Heavy atom count reduction ").endParam()
337+
.param().name("hacMax").type(RestParamType.query).description("Heavy atom count increase ").endParam()
338+
.param().name("racMin").type(RestParamType.query).description("Ring atom count reduction").endParam()
339+
.param().name("racMax").type(RestParamType.query).description("Ring atom count increase").endParam()
304340
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
305341
.param().name("suppliers").type(RestParamType.query).description("Suppliers to include").endParam()
306342
.param().name("pathLimit").type(RestParamType.query).description("Limit for the number of paths to return from the graph query").endParam()
@@ -315,8 +351,10 @@ public void configure() throws Exception {
315351
.post("expand-multi").description("Expansion search (multiple inputs)")
316352
.bindingMode(RestBindingMode.off)
317353
.param().name("smiles").type(RestParamType.body).description("SMILES queries").endParam()
318-
.param().name("hac").type(RestParamType.query).description("Heavy atom count bounds").endParam()
319-
.param().name("rac").type(RestParamType.query).description("Ring atom count bounds").endParam()
354+
.param().name("hacMin").type(RestParamType.query).description("Heavy atom count reduction ").endParam()
355+
.param().name("hacMax").type(RestParamType.query).description("Heavy atom count increase ").endParam()
356+
.param().name("racMin").type(RestParamType.query).description("Ring atom count reduction").endParam()
357+
.param().name("racMax").type(RestParamType.query).description("Ring atom count increase").endParam()
320358
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
321359
.param().name("suppliers").type(RestParamType.query).description("Suppliers to include").endParam()
322360
.param().name("id_prop").type(RestParamType.query).description("Name of the property for the ID (use _Name for the mol name)").endParam()
@@ -623,7 +661,7 @@ private String[] fetchSmilesOrMolfile(Message message) {
623661
} else {
624662
throw new IllegalStateException("Only support SMILES using GET or molfile using POST");
625663
}
626-
return new String[] {queryMol, mimeType};
664+
return new String[]{queryMol, mimeType};
627665
}
628666

629667
void executeMoleculeQuery(Exchange exch) {
@@ -713,7 +751,7 @@ void executeFragmentQuery(Exchange exch) {
713751
message.setHeader(Exchange.HTTP_RESPONSE_CODE, 404);
714752
} else {
715753
int size = smiles.size();
716-
fragmentSearchMoleculesTotal.inc((double)size);
754+
fragmentSearchMoleculesTotal.inc((double) size);
717755
LOG.info(size + " fragments found");
718756
message.setBody(smiles);
719757
message.setHeader(Exchange.HTTP_RESPONSE_CODE, 200);
@@ -731,6 +769,60 @@ void executeFragmentQuery(Exchange exch) {
731769
}
732770
}
733771

772+
773+
void executeSynthonExpand(Exchange exch) {
774+
LOG.info("Executing executeMoleculeQuery");
775+
776+
synthonExpandRequestsTotal.inc();
777+
778+
Message message = exch.getIn();
779+
780+
long t0 = System.nanoTime();
781+
String username = getUsername(exch);
782+
783+
String queryMol = message.getHeader("smiles", String.class);
784+
String synthon = message.getHeader("synthon", String.class);
785+
Integer hops = message.getHeader("hops", Integer.class);
786+
Integer hacMin = message.getHeader("hacMin", Integer.class);
787+
Integer hacMax = message.getHeader("hacMax", Integer.class);
788+
Integer racMin = message.getHeader("racMin", Integer.class);
789+
Integer racMax = message.getHeader("racMax", Integer.class);
790+
791+
List<String> smiles;
792+
try (Session session = graphdb.getSession()) {
793+
// execute the query
794+
SynthonExpandQuery query = new SynthonExpandQuery(session);
795+
796+
long n0 = System.nanoTime();
797+
smiles = query.execute(queryMol, synthon, hops, hacMin, hacMax, racMin, racMax);
798+
long n1 = System.nanoTime();
799+
synthonExpandNeo4jSearchDuration.inc((double) (n1 - n0));
800+
if (smiles == null || smiles.isEmpty()) {
801+
fragmentSearchMissesTotal.inc(1.0d);
802+
// throw 404
803+
message.setBody("{\"error\": \"MoleculeQuery Failed\",\"message\": \"Molecule not found\"}");
804+
message.setHeader(Exchange.HTTP_RESPONSE_CODE, 404);
805+
} else {
806+
int size = smiles.size();
807+
synthonExpandMoleculesTotal.inc((double) size);
808+
LOG.info(size + " expansions found");
809+
message.setBody(smiles);
810+
message.setHeader(Exchange.HTTP_RESPONSE_CODE, 200);
811+
}
812+
813+
} catch (
814+
Exception ex) {
815+
LOG.log(Level.SEVERE, "MoleculeQuery Failed", ex);
816+
neighbourhoodSearchErrorsTotal.inc();
817+
message.setBody("{\"error\": \"MoleculeQuery Failed\",\"message\":\"" + ex.getLocalizedMessage() + "\"}");
818+
message.setHeader(Exchange.HTTP_RESPONSE_CODE, 500);
819+
820+
long t1 = System.nanoTime();
821+
writeErrorToQueryLog(username, "MoleculeQuery", t1 - t0, ex.getLocalizedMessage());
822+
}
823+
824+
}
825+
734826
void executeNeighbourhoodQuery(Exchange exch) {
735827

736828
neighbourhoodSearchRequestsTotal.inc();

0 commit comments

Comments
 (0)