Skip to content

Commit 19e40d8

Browse files
committed
#45 improvements following discussion with Ruben
1 parent 1e48727 commit 19e40d8

File tree

3 files changed

+84
-36
lines changed

3 files changed

+84
-36
lines changed

fragnet-search/README.md

Lines changed: 55 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -629,41 +629,73 @@ Parameters:
629629
|------------|-------|----------|-------------|
630630
| smiles | URL | Yes | The smiles string for the query molecule. |
631631
| synthon | Query | Yes | The smiles string for the synthon molecule. |
632+
| hops | Query | Yes | The number of graph edges to traverse from the query molecule. Typically use 3 or 4. |
632633
| hacMin | Query | No | The minimum heavy atom count of the resulting molecules. |
633634
| hacMax | Query | No | The maximum heavy atom count of the resulting molecules. |
634635
| racMin | Query | No | The minimum ring atom count of the resulting molecules. |
635636
| racMax | Query | No | The maximum ring atom count of the resulting molecules. |
636-
| hops | Query | Yes | The number of graph edges to traverse from the query molecule. Typically use 3 or 4. |
637+
| limit | Query | No | The maximum number of results to fetch. Default is 1000, values over 500 not allowed. |
637638

638-
Note that these searches can be quite slow. Start with a small number of hops, and increase if you have no results.
639+
Be careful with the query parameters as using a large number of hops may result in queries that take a long time
640+
to execute and return a large number of results. Start with a small number of hops, and increase if you get no results.
641+
To provide some control over this there is a limit to the number of results that will be returned.
642+
By default this is set to 1000, but it can be adjusted using the `limit` query parameter, but this value cannot be set
643+
to a value greater than 5000.
639644

640-
Typcial execution:
645+
Typical execution:
641646
```
642647
curl "$FRAGNET_SERVER/fragnet-search/rest/v2/search/synthon-expand/OC(Cn1ccnn1)C1CC1?synthon=%5BXe%5Dn1ccnn1&hops=3&hacMin=14&hacMax=18"
643648
```
644649

645-
Typcial results:
650+
The results are in JSON format and comprise an array of molecule info objects (the same data object as is returned by the
651+
molecule search endpoint). e.g.:
646652
```
647653
[
648-
"NC(CCCNCCn1ccnn1)=NO",
649-
"NN=C(NCCn1ccnn1)NC1CC1",
650-
"CCC(NCCn1ccnn1)C(C)(C)C",
651-
"CC(CC(N)=O)NCCCn1ccnn1",
652-
"O=C(O)C(=O)C(=O)NCCn1ccnn1",
653-
"NN=C(NCCCn1ccnn1)NC1CC1",
654-
"CCOC(C)(C)CNCCn1ccnn1",
655-
"CCNC(C)CNC(=O)Cn1ccnn1",
656-
"CCC(CN)NC(=O)CCn1ccnn1",
657-
"CN(C(=O)Cn1ccnn1)C(C)(C)C#N",
658-
"O=C(O)C=CC(=O)NCCn1ccnn1",
659-
"COCCNC(=NN)NCCn1ccnn1",
660-
"CC(NC(=O)CCn1ccnn1)C(N)=O",
661-
"CC(C)N(CC#N)C(=O)Cn1ccnn1",
662-
"CCC(NCCCn1ccnn1)C(N)=NO",
663-
"CN(CCC(=O)O)C(=O)CCn1ccnn1",
664-
"NCC=CCNC(=O)CCn1ccnn1",
665-
"COC(=O)C=CNCCCn1ccnn1",
666-
"NC(CCCCNCCn1ccnn1)=NO"
654+
{
655+
"id": 3573384,
656+
"smiles": "NC(CCCNCCn1ccnn1)=NO",
657+
"molType": "NET_FRAG",
658+
"labels": [
659+
"CanSmi",
660+
"Mol",
661+
"F2",
662+
"V_CS_BB"
663+
],
664+
"props": {
665+
"inchik": "LMBOOOPIIROSSY-JSGPKCTENA-N",
666+
"osmiles": "NC(CCCNCCC1CCCC1)NO",
667+
"chac": 5,
668+
"neighbours": 2,
669+
"hac": 15,
670+
"inchis": "InChI=1/C8H16N6O/c9-8(12-15)2-1-3-10-4-6-14-7-5-11-13-14/h5,7,10,15H,1-4,6H2,(H2,9,12)/f/h9H2MA",
671+
"cmpd_ids": [
672+
"CHEMSPACE-BB:CSC033999226"
673+
]
674+
}
675+
},
676+
{
677+
"id": 3287068,
678+
"smiles": "OCCc1ccc(-n2ccnn2)cc1",
679+
"molType": "NET_FRAG",
680+
"labels": [
681+
"CanSmi",
682+
"Mol",
683+
"F2",
684+
"V_CS_BB"
685+
],
686+
"props": {
687+
"inchik": "PTDSMQOYRRMILK-UHFFFAOYNA-N",
688+
"osmiles": "OCCC1CCC(C2CCCC2)CC1",
689+
"chac": 11,
690+
"neighbours": 4,
691+
"hac": 14,
692+
"inchis": "InChI=1/C10H11N3O/c14-8-5-9-1-3-10(4-2-9)13-7-6-11-12-13/h1-4,6-7,14H,5,8H2MA",
693+
"cmpd_ids": [
694+
"CHEMSPACE-BB:CSC021068907"
695+
]
696+
}
697+
},
698+
...
667699
]
668700
```
669701

fragnet-search/src/main/java/org/squonk/fragnet/search/queries/v2/SynthonExpandQuery.java

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,13 @@
2323
import org.neo4j.driver.v1.types.Relationship;
2424
import org.squonk.fragnet.Constants;
2525
import org.squonk.fragnet.chem.MolStandardize;
26+
import org.squonk.fragnet.search.model.v2.FragmentGraph;
27+
import org.squonk.fragnet.search.model.v2.MoleculeNode;
2628
import org.squonk.fragnet.search.queries.AbstractQuery;
2729

2830
import javax.validation.constraints.NotNull;
2931
import java.util.ArrayList;
32+
import java.util.HashMap;
3033
import java.util.HashSet;
3134
import java.util.List;
3235
import java.util.logging.Logger;
@@ -45,7 +48,7 @@ public SynthonExpandQuery(Session session) {
4548
"-[:FRAG*0..%s]-(:F2)" +
4649
"<-[e:FRAG]-(c:Mol) WHERE %s" +
4750
" (split(e.label, '|')[1] = $synthon OR split(e.label, '|')[4] = $synthon)" +
48-
" RETURN DISTINCT c";
51+
" RETURN DISTINCT c LIMIT $limit";
4952

5053
@Override
5154
protected String getQueryTemplate() {
@@ -79,28 +82,39 @@ private String expandTemplate(@NotNull Integer hops, Integer hacMin, Integer hac
7982
}
8083

8184
public List<String> execute(@NotNull String mol, @NotNull String synthon, @NotNull Integer hops,
82-
Integer hacMin, Integer hacMax, Integer racMin, Integer racMax) {
85+
Integer hacMin, Integer hacMax, Integer racMin, Integer racMax, Integer limit) {
8386

84-
// standardize the mol. It must be in smiles format
85-
String stdSmiles = MolStandardize.prepareNonisoMol(mol, Constants.MIME_TYPE_SMILES);
86-
String stdSynthon = MolStandardize.prepareNonisoMol(synthon, Constants.MIME_TYPE_SMILES);
87+
if (limit == null) {
88+
limit = 1000;
89+
}
90+
if (limit > 5000) {
91+
throw new IllegalArgumentException("Limits over 5000 are not supported");
92+
}
8793

94+
// standardize the mol. It must be in smiles format
95+
final String stdSmiles = MolStandardize.prepareNonisoMol(mol, Constants.MIME_TYPE_SMILES);
96+
final String stdSynthon = MolStandardize.prepareNonisoMol(synthon, Constants.MIME_TYPE_SMILES);
97+
final int limitf = limit;
8898
final String query = expandTemplate(hops, hacMin, hacMax, racMin, racMax);
8999

90-
HashSet<String> values = getSession().writeTransaction((tx) -> {
100+
HashMap<String, MoleculeNode> values = getSession().writeTransaction((tx) -> {
91101
LOG.info("Executing Synthon Query: " + query);
92-
StatementResult result = tx.run(query, parameters(new Object[]{"smiles", stdSmiles, "synthon", stdSynthon}));
93-
HashSet<String> smiles = new HashSet<>();
102+
StatementResult result = tx.run(query, parameters(new Object[]{
103+
"smiles", stdSmiles, "synthon", stdSynthon, "limit", limitf}));
104+
HashMap<String, MoleculeNode> molNodes = new HashMap<>();
94105
while (result.hasNext()) {
95106
Record rec = result.next();
96107
Value val = rec.get(0);
97108
Node node = val.asNode();
98109
String smi = node.get("smiles").asString();
99-
smiles.add(smi);
110+
if (!molNodes.containsKey(smi)) {
111+
molNodes.put(smi, FragmentGraph.generateMoleculeNode(node));
112+
}
113+
100114
}
101-
return smiles;
115+
return molNodes;
102116
});
103117

104-
return new ArrayList(values);
118+
return new ArrayList(values.values());
105119
}
106120
}

fragnet-search/src/main/java/org/squonk/fragnet/service/v2/FragnetSearchRouteBuilder.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,12 @@ public void configure() throws Exception {
263263
.get("synthon-expand/{smiles}").description("Find expansions of a molecule using a particular synthon")
264264
.param().name("smiles").type(RestParamType.path).description("SMILES query").endParam()
265265
.param().name("synthon").type(RestParamType.query).description("SMILES synthon").endParam()
266+
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
266267
.param().name("hacMin").type(RestParamType.query).description("Heavy atom count reduction ").endParam()
267268
.param().name("hacMax").type(RestParamType.query).description("Heavy atom count increase ").endParam()
268269
.param().name("racMin").type(RestParamType.query).description("Ring atom count reduction").endParam()
269270
.param().name("racMax").type(RestParamType.query).description("Ring atom count increase").endParam()
270-
.param().name("hops").type(RestParamType.query).description("Number of edge traversals").endParam()
271+
.param().name("limit").type(RestParamType.query).description("Max number of results to be returned").endParam()
271272
.produces("application/json")
272273
.route()
273274
.process((Exchange exch) -> {
@@ -787,14 +788,15 @@ void executeSynthonExpand(Exchange exch) {
787788
Integer hacMax = message.getHeader("hacMax", Integer.class);
788789
Integer racMin = message.getHeader("racMin", Integer.class);
789790
Integer racMax = message.getHeader("racMax", Integer.class);
791+
Integer limit = message.getHeader("limit", Integer.class);
790792

791793
List<String> smiles;
792794
try (Session session = graphdb.getSession()) {
793795
// execute the query
794796
SynthonExpandQuery query = new SynthonExpandQuery(session);
795797

796798
long n0 = System.nanoTime();
797-
smiles = query.execute(queryMol, synthon, hops, hacMin, hacMax, racMin, racMax);
799+
smiles = query.execute(queryMol, synthon, hops, hacMin, hacMax, racMin, racMax, limit);
798800
long n1 = System.nanoTime();
799801
synthonExpandNeo4jSearchDuration.inc((double) (n1 - n0));
800802
if (smiles == null || smiles.isEmpty()) {

0 commit comments

Comments
 (0)