Speedup MultiTermsAggregator (#123220)

original-brownbear · web-flow · commit 4af51839d2c5 · 2025-02-24T08:00:47.000+01:00
Creating (and more importantly eventually resizing) a fresh stream
output makes up a large chunk of the runtime of this aggregation.
Also, recursively calling an inline consumer makes this logic
even more confusing and adds additional overhead as escape analysis
will not be able to remove the allocation of the consumer.
=&gt; just call a method recursively and reuse the output
diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/multiterms/MultiTermsAggregator.java
@@ -24,7 +24,6 @@
 import org.elasticsearch.common.util.LongArray;
 import org.elasticsearch.common.util.ObjectArray;
 import org.elasticsearch.common.util.ObjectArrayPriorityQueue;
-import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.core.Releasables;
 import org.elasticsearch.index.fielddata.FieldData;
 import org.elasticsearch.index.fielddata.NumericDoubleValues;
@@ -168,20 +167,6 @@ static List<List<Object>> docTerms(List<TermValues> termValuesList, int doc) thr
         return terms;
     }
 
-    /**
-     * Packs a list of terms into ByteRef so we can use BytesKeyedBucketOrds
-     *
-     * TODO: this is a temporary solution, we should replace it with a more optimal mechanism instead of relying on BytesKeyedBucketOrds
-     */
-    static BytesRef packKey(List<Object> terms) {
-        try (BytesStreamOutput output = new BytesStreamOutput()) {
-            output.writeCollection(terms, StreamOutput::writeGenericValue);
-            return output.bytes().toBytesRef();
-        } catch (IOException ex) {
-            throw ExceptionsHelper.convertToRuntime(ex);
-        }
-    }
-
     /**
      * Unpacks ByteRef back into a list of terms
      *
@@ -198,36 +183,39 @@ static List<Object> unpackTerms(BytesRef termsBytes) {
     @Override
     public LeafBucketCollector getLeafCollector(AggregationExecutionContext aggCtx, LeafBucketCollector sub) throws IOException {
         List<TermValues> termValuesList = termValuesList(aggCtx.getLeafReaderContext());
-
+        BytesStreamOutput output = new BytesStreamOutput();
         return new LeafBucketCollectorBase(sub, values) {
             @Override
             public void collect(int doc, long owningBucketOrd) throws IOException {
                 List<List<Object>> terms = docTerms(termValuesList, doc);
                 if (terms != null) {
-                    List<Object> path = new ArrayList<>(terms.size());
-                    new CheckedConsumer<Integer, IOException>() {
-                        @Override
-                        public void accept(Integer start) throws IOException {
-                            for (Object term : terms.get(start)) {
-                                if (start == path.size()) {
-                                    path.add(term);
-                                } else {
-                                    path.set(start, term);
-                                }
-                                if (start < terms.size() - 1) {
-                                    this.accept(start + 1);
-                                } else {
-                                    long bucketOrd = bucketOrds.add(owningBucketOrd, packKey(path));
-                                    if (bucketOrd < 0) { // already seen
-                                        bucketOrd = -1 - bucketOrd;
-                                        collectExistingBucket(sub, doc, bucketOrd);
-                                    } else {
-                                        collectBucket(sub, doc, bucketOrd);
-                                    }
-                                }
-                            }
+                    doCollect(terms, new ArrayList<>(terms.size()), owningBucketOrd, doc, 0);
+                }
+            }
+
+            private void doCollect(List<List<Object>> terms, List<Object> path, long owningBucketOrd, int doc, int start)
+                throws IOException {
+                for (Object term : terms.get(start)) {
+                    if (start == path.size()) {
+                        path.add(term);
+                    } else {
+                        path.set(start, term);
+                    }
+                    if (start < terms.size() - 1) {
+                        doCollect(terms, path, owningBucketOrd, doc, start + 1);
+                    } else {
+                        // TODO: this is a temporary solution, we should replace it with a more optimal mechanism instead of relying on
+                        // BytesKeyedBucketOrds
+                        output.seek(0L);
+                        output.writeCollection(path, StreamOutput::writeGenericValue);
+                        long bucketOrd = bucketOrds.add(owningBucketOrd, output.bytes().toBytesRef());
+                        if (bucketOrd < 0) { // already seen
+                            bucketOrd = -1 - bucketOrd;
+                            collectExistingBucket(sub, doc, bucketOrd);
+                        } else {
+                            collectBucket(sub, doc, bucketOrd);
                         }
-                    }.accept(0);
+                    }
                 }
             }
         };