Skip to content

Commit da2515e

Browse files
authored
Merge pull request #752 from bserdar/in-mem-index
In mem index
2 parents 983bb16 + ad05e51 commit da2515e

26 files changed

+2138
-88
lines changed

crud/src/main/java/com/redhat/lightblue/assoc/ep/Assemble.java

Lines changed: 164 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.util.HashMap;
2323
import java.util.List;
2424
import java.util.ArrayList;
25+
import java.util.Set;
2526

2627
import java.util.stream.Collectors;
2728

@@ -38,6 +39,23 @@
3839
import com.redhat.lightblue.query.QueryExpression;
3940
import com.redhat.lightblue.query.NaryLogicalOperator;
4041

42+
import com.redhat.lightblue.metadata.CompositeMetadata;
43+
44+
import com.redhat.lightblue.assoc.BindQuery;
45+
import com.redhat.lightblue.assoc.QueryFieldInfo;
46+
import com.redhat.lightblue.assoc.AnalyzeQuery;
47+
48+
import com.redhat.lightblue.mindex.MemDocIndex;
49+
import com.redhat.lightblue.mindex.GetIndexKeySpec;
50+
import com.redhat.lightblue.mindex.GetIndexLookupSpec;
51+
import com.redhat.lightblue.mindex.KeySpec;
52+
import com.redhat.lightblue.mindex.LookupSpec;
53+
54+
import com.redhat.lightblue.eval.QueryEvaluator;
55+
56+
import com.redhat.lightblue.util.Path;
57+
import com.redhat.lightblue.util.JsonDoc;
58+
4159
/**
4260
* There are two sides to an Assemble step: Assemble gets results from the
4361
* source, and for each of those documents, it runs the associated queries on
@@ -48,6 +66,13 @@ public class Assemble extends Step<ResultDocument> {
4866

4967
private static final Logger LOGGER = LoggerFactory.getLogger(Assemble.class);
5068

69+
/**
70+
* This is used for testing. This is the threshold for the number
71+
* of slots above which we'll use the memory indexing. If we have
72+
* slots fewer than this, then we don't use in-memory index.
73+
*/
74+
public static int MEM_INDEX_THRESHOLD=16;
75+
5176
private final ExecutionBlock[] destinationBlocks;
5277
private final Source<ResultDocument> source;
5378
private Map<ExecutionBlock, Assemble> destinations;
@@ -95,6 +120,9 @@ public StepResult<ResultDocument> getResults(ExecutionContext ctx) {
95120
return StepResult.EMPTY;
96121
}
97122

123+
// Assemble results: retrieve results from associated
124+
// execution blocks, and insert them into sourceResults
125+
// documents
98126
List<Future> assemblers = new ArrayList<>();
99127
for (Map.Entry<ExecutionBlock, Assemble> destination : destinations.entrySet()) {
100128
AssociationQuery aq = destination.getKey().getAssociationQueryForEdge(block);
@@ -141,7 +169,7 @@ public DocAndQ(ResultDocument doc) {
141169
}
142170
}
143171

144-
private static class BatchAssembler {
172+
private class BatchAssembler {
145173
private List<DocAndQ> docs = new ArrayList<>();
146174
private List<QueryExpression> queries = new ArrayList<>();
147175
private final int batchSize;
@@ -181,8 +209,27 @@ public void commit() {
181209
combinedQuery = null;
182210
}
183211
List<ResultDocument> destResults = dest.getResultList(combinedQuery, ctx);
212+
int numSlots=0;
213+
for(ResultDocument doc:destResults) {
214+
List<ChildSlot> slots=doc.getSlots().get(aq.getReference());
215+
if(slots!=null)
216+
numSlots+=slots.size();
217+
}
218+
// Try to build an index from results
219+
MemDocIndex docIndex=null;
220+
if(aq.getQuery()!=null&&numSlots>MEM_INDEX_THRESHOLD) {
221+
KeySpec keySpec=aq.getIndexKeySpec();
222+
LOGGER.debug("In-memory index key spec:{}",keySpec);
223+
if(keySpec!=null) {
224+
// There is a key spec, meaning we can index the docs
225+
docIndex=new MemDocIndex(keySpec);
226+
for(ResultDocument child:destResults) {
227+
docIndex.add(child.getDoc());
228+
}
229+
}
230+
}
184231
for (DocAndQ parentDocAndQ : docs) {
185-
Searches.associateDocs(parentDocAndQ.doc, destResults, aq);
232+
associateDocs(parentDocAndQ.doc, destResults, aq,docIndex);
186233
}
187234
}
188235
docs = new ArrayList<>();
@@ -207,6 +254,121 @@ private JsonNode toJson(Step.ToJsonCb<Step> scb,Step.ToJsonCb<ExecutionBlock> bc
207254
}
208255
return o;
209256
}
257+
258+
/**
259+
* Associates child documents obtained from 'aq' to all the slots in the
260+
* parent document
261+
*/
262+
public void associateDocs(ResultDocument parentDoc,
263+
List<ResultDocument> childDocs,
264+
AssociationQuery aq,
265+
MemDocIndex childIndex) {
266+
if(!childDocs.isEmpty()) {
267+
CompositeMetadata childMetadata = childDocs.get(0).getBlock().getMetadata();
268+
List<ChildSlot> slots = parentDoc.getSlots().get(aq.getReference());
269+
for (ChildSlot slot : slots) {
270+
BindQuery binders = parentDoc.getBindersForSlot(slot, aq);
271+
// No binders means all child docs will be added to the parent
272+
// aq.always==true means query is always true, so add everything to the parent
273+
if (binders.getBindings().isEmpty()||(aq.getAlways()!=null && aq.getAlways()) ) {
274+
associateAllDocs(parentDoc,childDocs,slot.getSlotFieldName());
275+
} else if(aq.getAlways()==null||!aq.getAlways()) { // If query is not always false
276+
if(childIndex==null)
277+
associateDocs(childMetadata,parentDoc,slot.getSlotFieldName(),binders,childDocs,aq.getQuery());
278+
else
279+
associateDocsWithIndex(childMetadata,parentDoc,slot.getSlotFieldName(),binders,childDocs,aq,childIndex);
280+
}
281+
}
282+
}
283+
}
284+
285+
private static void associateAllDocs(ResultDocument parentDoc,List<ResultDocument> childDocs,Path fieldName) {
286+
ArrayNode destNode=ensureDestNodeExists(parentDoc,null,fieldName);
287+
for (ResultDocument d : childDocs) {
288+
destNode.add(d.getDoc().getRoot());
289+
}
290+
}
291+
292+
private static ArrayNode ensureDestNodeExists(ResultDocument doc,ArrayNode destNode,Path fieldName) {
293+
if (destNode == null) {
294+
destNode = JsonNodeFactory.instance.arrayNode();
295+
doc.getDoc().modify(fieldName, destNode, true);
296+
}
297+
return destNode;
298+
}
299+
300+
/**
301+
* Associate child documents with their parents. The association query is
302+
* for the association from the child to the parent, so caller must flip it
303+
* before sending it in if necessary. The caller also make sure parentDocs
304+
* is a unique stream.
305+
*
306+
* @param parentDoc The parent document
307+
* @param parentSlot The slot in parent docuemnt to which the results will
308+
* be attached
309+
* @param childDocs The child documents
310+
* @param aq The association query from parent to child. This may not be the
311+
* same association query between the blocks. If the child block is before
312+
* the parent block, a new aq must be constructed for the association from
313+
* the parent to the child
314+
*/
315+
public static void associateDocs(CompositeMetadata childMetadata,
316+
ResultDocument parentDoc,
317+
Path destFieldName,
318+
BindQuery binders,
319+
List<ResultDocument> childDocs,
320+
QueryExpression query) {
321+
LOGGER.debug("Associating docs");
322+
QueryExpression boundQuery = binders.iterate(query);
323+
LOGGER.debug("Association query:{}", boundQuery);
324+
QueryEvaluator qeval = QueryEvaluator.getInstance(boundQuery, childMetadata);
325+
ArrayNode destNode=null;
326+
for (ResultDocument childDoc : childDocs) {
327+
if (qeval.evaluate(childDoc.getDoc()).getResult()) {
328+
destNode=ensureDestNodeExists(parentDoc,destNode,destFieldName);
329+
destNode.add(childDoc.getDoc().getRoot());
330+
}
331+
}
332+
}
333+
334+
private void associateDocsWithIndex(CompositeMetadata childMetadata,
335+
ResultDocument parentDoc,
336+
Path destFieldName,
337+
BindQuery binders,
338+
List<ResultDocument> childDocs,
339+
AssociationQuery aq,
340+
MemDocIndex childIndex) {
341+
LOGGER.debug("Associating docs using index");
342+
QueryExpression boundQuery = binders.iterate(aq.getQuery());
343+
LOGGER.debug("Association query:{}", boundQuery);
344+
QueryEvaluator qeval = QueryEvaluator.getInstance(boundQuery, childMetadata);
345+
AnalyzeQuery analyzer=new AnalyzeQuery(block.rootMd,aq.getReference());
346+
analyzer.iterate(boundQuery);
347+
List<QueryFieldInfo> qfi=analyzer.getFieldInfo();
348+
GetIndexLookupSpec gils=new GetIndexLookupSpec(qfi);
349+
LookupSpec ls=gils.iterate(boundQuery);
350+
LOGGER.debug("Lookup spec:"+ls);
351+
List<ResultDocument> docs=reorder(childDocs,childIndex.find(ls));
352+
ArrayNode destNode=null;
353+
for (ResultDocument childDoc : childDocs) {
354+
if (qeval.evaluate(childDoc.getDoc()).getResult()) {
355+
destNode=ensureDestNodeExists(parentDoc,destNode,destFieldName);
356+
destNode.add(childDoc.getDoc().getRoot());
357+
}
358+
}
359+
}
360+
361+
/**
362+
* Returns the documents in foundList in the order of originalList
363+
*/
364+
private static List<ResultDocument> reorder(List<ResultDocument> originalList,Set<JsonDoc> foundList) {
365+
List<ResultDocument> ret=new ArrayList<>(foundList.size());
366+
for(ResultDocument d:originalList) {
367+
if(foundList.contains(d.getDoc()))
368+
ret.add(d);
369+
}
370+
return ret;
371+
}
210372

211373
@Override
212374
public JsonNode toJson() {

crud/src/main/java/com/redhat/lightblue/assoc/ep/AssociationQuery.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@
99
import com.redhat.lightblue.assoc.BoundObject;
1010
import com.redhat.lightblue.assoc.Conjunct;
1111
import com.redhat.lightblue.assoc.RewriteQuery;
12+
import com.redhat.lightblue.assoc.QueryFieldInfo;
13+
import com.redhat.lightblue.assoc.AnalyzeQuery;
14+
15+
import com.redhat.lightblue.mindex.GetIndexKeySpec;
16+
import com.redhat.lightblue.mindex.KeySpec;
1217

1318
import com.redhat.lightblue.query.QueryExpression;
1419

@@ -22,6 +27,9 @@ public class AssociationQuery {
2227
private final ResolvedReferenceField reference;
2328
// If non-null, query is either always true or always false
2429
private final Boolean always;
30+
private final List<QueryFieldInfo> qfi;
31+
// In-memory index key spec
32+
private final KeySpec keySpec;
2533

2634
public AssociationQuery(CompositeMetadata root,
2735
CompositeMetadata currentEntity,
@@ -32,6 +40,7 @@ public AssociationQuery(CompositeMetadata root,
3240
List<QueryExpression> queries = new ArrayList<>(conjuncts.size());
3341
int numTrue=0;
3442
int numFalse=0;
43+
qfi=new ArrayList<>();
3544
for (Conjunct c : conjuncts) {
3645
RewriteQuery.RewriteQueryResult result = rewriter.rewriteQuery(c.getClause(), c.getFieldInfo());
3746
if(result.query instanceof RewriteQuery.TruePH) {
@@ -41,7 +50,11 @@ public AssociationQuery(CompositeMetadata root,
4150
numFalse++;
4251
} else {
4352
queries.add(result.query);
44-
}
53+
// Analyze the query as if it is a root entity query
54+
AnalyzeQuery aq=new AnalyzeQuery(currentEntity,null);
55+
aq.iterate(result.query);
56+
qfi.addAll(aq.getFieldInfo());
57+
}
4558
fieldBindings.addAll(result.bindings);
4659
}
4760
if(queries.isEmpty()) {
@@ -53,12 +66,23 @@ public AssociationQuery(CompositeMetadata root,
5366
} else {
5467
always=null;
5568
}
69+
keySpec=null;
5670
} else {
5771
query = Searches.and(queries);
72+
GetIndexKeySpec giks=new GetIndexKeySpec(qfi);
73+
keySpec=giks.iterate(query);
5874
always=null;
5975
}
6076
}
6177

78+
public KeySpec getIndexKeySpec() {
79+
return keySpec;
80+
}
81+
82+
public List<QueryFieldInfo> getQueryFieldInfo() {
83+
return qfi;
84+
}
85+
6286
public Boolean getAlways() {
6387
return always;
6488
}

crud/src/main/java/com/redhat/lightblue/assoc/ep/ExecutionBlock.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
*/
2424
public class ExecutionBlock {
2525

26-
private final CompositeMetadata rootMd;
26+
public final CompositeMetadata rootMd;
2727

2828
/**
2929
* The query plan node corresponding to this execution block

crud/src/main/java/com/redhat/lightblue/assoc/ep/Searches.java

Lines changed: 0 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@
3434
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
3535

3636
import com.redhat.lightblue.assoc.BindQuery;
37-
38-
import com.redhat.lightblue.eval.QueryEvaluator;
39-
4037
import com.redhat.lightblue.query.QueryExpression;
4138
import com.redhat.lightblue.query.NaryLogicalExpression;
4239
import com.redhat.lightblue.query.NaryLogicalOperator;
@@ -116,69 +113,6 @@ public static List<QueryExpression> writeQueriesForJoinTuple(JoinTuple tuple, Ex
116113
return ret;
117114
}
118115

119-
/**
120-
* Associates child documents obtained from 'aq' to all the slots in the
121-
* parent document
122-
*/
123-
public static void associateDocs(ResultDocument parentDoc,
124-
List<ResultDocument> childDocs,
125-
AssociationQuery aq) {
126-
List<ChildSlot> slots = parentDoc.getSlots().get(aq.getReference());
127-
for (ChildSlot slot : slots) {
128-
associateDocs(parentDoc, slot, childDocs, aq);
129-
}
130-
}
131-
132-
/**
133-
* Associate child documents with their parents. The association query is
134-
* for the association from the child to the parent, so caller must flip it
135-
* before sending it in if necessary. The caller also make sure parentDocs
136-
* is a unique stream.
137-
*
138-
* @param parentDoc The parent document
139-
* @param parentSlot The slot in parent docuemnt to which the results will
140-
* be attached
141-
* @param childDocs The child documents
142-
* @param aq The association query from parent to child. This may not be the
143-
* same association query between the blocks. If the child block is before
144-
* the parent block, a new aq must be constructed for the association from
145-
* the parent to the child
146-
*/
147-
public static void associateDocs(ResultDocument parentDoc,
148-
ChildSlot parentSlot,
149-
List<ResultDocument> childDocs,
150-
AssociationQuery aq) {
151-
if (!childDocs.isEmpty()) {
152-
LOGGER.debug("Associating docs");
153-
ExecutionBlock childBlock = childDocs.get(0).getBlock();
154-
ArrayNode destNode = (ArrayNode) parentDoc.getDoc().get(parentSlot.getSlotFieldName());
155-
BindQuery binders = parentDoc.getBindersForSlot(parentSlot, aq);
156-
// No binders means all child docs will be added to the parent
157-
if (binders.getBindings().isEmpty()) {
158-
if (destNode == null) {
159-
destNode = JsonNodeFactory.instance.arrayNode();
160-
parentDoc.getDoc().modify(parentSlot.getSlotFieldName(), destNode, true);
161-
}
162-
for (ResultDocument d : childDocs) {
163-
destNode.add(d.getDoc().getRoot());
164-
}
165-
} else {
166-
QueryExpression boundQuery = binders.iterate(aq.getQuery());
167-
LOGGER.debug("Association query:{}", boundQuery);
168-
QueryEvaluator qeval = QueryEvaluator.getInstance(boundQuery, childBlock.getMetadata());
169-
for (ResultDocument childDoc : childDocs) {
170-
if (qeval.evaluate(childDoc.getDoc()).getResult()) {
171-
if (destNode == null) {
172-
destNode = JsonNodeFactory.instance.arrayNode();
173-
parentDoc.getDoc().modify(parentSlot.getSlotFieldName(), destNode, true);
174-
}
175-
destNode.add(childDoc.getDoc().getRoot());
176-
}
177-
}
178-
}
179-
}
180-
}
181-
182116
/**
183117
* Combines queries with AND. Queries can be null, but at least one of them
184118
* must be non-null

0 commit comments

Comments
 (0)