pawankartik-elastic
diff --git a/‎.coderabbit.yml‎
Lines changed: 1 addition & 0 deletions b/‎.coderabbit.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 20 additions & 14 deletions b/‎AGENTS.md‎
Lines changed: 20 additions & 14 deletions
diff --git a/‎benchmarks/build.gradle‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/build.gradle‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/GroupedTopNBenchmark.java‎
Lines changed: 281 additions & 0 deletions b/‎benchmarks/src/main/java/org/elasticsearch/benchmark/_nightly/esql/GroupedTopNBenchmark.java‎
Lines changed: 281 additions & 0 deletions
@@ -8,3 +8,4 @@ reviews:
     labels:
       - "Team:Delivery"
       - "Team:Search - Inference"
+      - "Team:Core/Infra"
@@ -99,19 +99,25 @@ If you encounter any of the following methods, you must go and read their javado
 ## Backwards compatibility
 - For changes to a `Writeable` implementation (`writeTo` and constructor from `StreamInput`), add a new `public static final <UNIQUE_DESCRIPTIVE_NAME> = TransportVersion.fromName("<unique_descriptive_name>")` and use it in the new code paths. Confirm the backport branches and then generate a new version file with `./gradlew generateTransportVersion`.
 
-### CI failure triage with Buildkite and Gradle Enterprise build scans
-- Prefer Gradle Enterprise build scans (`https://gradle-enterprise.elastic.co/s/<id>`) over raw logs for root-cause analysis when available.
-- If given a Buildkite link, use the Buildkite MCP server first.
-- First call `buildkite-list_annotations` and inspect `context=gradle-build-scans-failed` (failed jobs only). If needed, inspect `context=gradle-build-scans` (all jobs).
-- If annotations are incomplete, call `buildkite-get_build` and map failed job IDs to `meta_data` keys: `build-scan-<job_id>` and `build-scan-id-<job_id>`.
-- Buildkite UI fallback (when MCP is unavailable): Build page -> `Jobs` -> `Failures`, then open/copy the Gradle Enterprise build scan links shown per failed job.
-- If given a Gradle Enterprise build scan link directly, start from that link instead of searching Buildkite logs first.
-- If `dvcli` is available, use it to extract failed tasks, exact failed tests, primary assertion/error, and reproduction details.
-- If `dvcli` is unavailable, do not block: continue with Buildkite MCP logs (`buildkite-search_logs`, `buildkite-tail_logs`, `buildkite-read_logs`), artifacts, and annotations.
-- If either tool is missing, suggest installation to the user for faster future triage:
-  - `dvcli` / `develocity-cli-client`: `https://github.com/breskeby/develocity-cli-client`
-  - Buildkite MCP setup for AI tools: `https://buildkite.com/docs/apis/mcp-server/remote/configuring-ai-tools`
-- For Buildkite URLs that include `#<job_id>`, prioritize that specific job and resolve its corresponding `build-scan-<job_id>` entry.
-- In reports, list exact failed tests first, then failed tasks and related build scan URLs.
+## CI failure triage with Buildkite and Gradle Enterprise build scans
+
+Prefer Gradle Enterprise build scans (`https://gradle-enterprise.elastic.co/s/<id>`) over raw logs for root-cause analysis when available.
+
+**Primary tool: `dvcli`.** Use it for root-cause analysis on Gradle Enterprise build scans (`https://gradle-enterprise.elastic.co/s/<id>`) whenever possible.
+It extracts failed tasks, exact failed tests, primary assertion/error, and reproduction details without requiring the agent to authenticate.
+
+1. If given a Gradle Enterprise build scan link directly, start from that link instead of searching Buildkite logs first.
+2. If given a Buildkite link, use the Buildkite MCP server to retrieve Gradle build scans.
+    - For Buildkite URLs that include `#<job_id>`, prioritize that specific job and resolve its corresponding `build-scan-<job_id>` entry.
+    - Otherwise call `buildkite-list_annotations` and inspect `context=gradle-build-scans-failed` (failed jobs only). If needed, inspect `context=gradle-build-scans` (all jobs).
+    - If annotations are incomplete, call `buildkite-get_build` and map failed job IDs to `meta_data` keys: `build-scan-<job_id>` and `build-scan-id-<job_id>`.
+    - Buildkite UI fallback (when MCP is unavailable): Build page -> `Jobs` -> `Failures`, then open/copy the Gradle Enterprise build scan links shown per failed job.
+3. Run `dvcli` against the resolved build scan link to extract failure details.
+    - If `dvcli` is unavailable, fall back to Buildkite MCP logs (`buildkite-search_logs`, `buildkite-tail_logs`, `buildkite-read_logs`), artifacts, and annotations.
+    - If either tool is missing, suggest installation to the user for faster future triage:
+        - `dvcli` / `develocity-cli-client`: `https://github.com/breskeby/develocity-cli-client`
+        - Buildkite MCP setup for AI tools: `https://buildkite.com/docs/apis/mcp-server/remote/configuring-ai-tools`
+
+In reports, list exact failed tests first, then failed tasks and related build scan URLs.
 
 Stay aligned with `CONTRIBUTING.md`, `BUILDING.md`, and `TESTING.asciidoc`; this AGENTS guide summarizes—but does not replace—those authoritative docs.
@@ -51,6 +51,7 @@ dependencies {
   api(project(':x-pack:plugin:analytics'))
   api(project(':x-pack:plugin:logsdb'))
   implementation project(path: ':libs:native')
+  implementation(testFixtures(project(':libs:native')))
   implementation project(path: ':libs:simdvec')
   implementation (testFixtures(project(path: ':libs:simdvec')))
   implementation project(path: ':libs:swisshash')
@@ -103,7 +104,7 @@ tasks.named("run").configure {
 
 tasks.named('test').configure {
   if (buildParams.getRuntimeJavaVersion().map{ it.majorVersion.toInteger() }.get() >= 21) {
-    jvmArgs '--add-modules=jdk.incubator.vector'
+    jvmArgs '--add-modules=jdk.incubator.vector', '--add-opens=java.base/java.nio=ALL-UNNAMED'
   }
 }
 
 
@@ -0,0 +1,281 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.benchmark._nightly.esql;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.benchmark.Utils;
+import org.elasticsearch.common.breaker.NoopCircuitBreaker;
+import org.elasticsearch.common.util.BigArrays;
+import org.elasticsearch.compute.data.Block;
+import org.elasticsearch.compute.data.BlockFactory;
+import org.elasticsearch.compute.data.BooleanBlock;
+import org.elasticsearch.compute.data.BytesRefBlock;
+import org.elasticsearch.compute.data.ElementType;
+import org.elasticsearch.compute.data.Page;
+import org.elasticsearch.compute.operator.BreakingBytesRefBuilder;
+import org.elasticsearch.compute.operator.GroupKeyEncoder;
+import org.elasticsearch.compute.operator.Operator;
+import org.elasticsearch.compute.operator.topn.GroupedTopNOperator;
+import org.elasticsearch.compute.operator.topn.TopNEncoder;
+import org.elasticsearch.compute.operator.topn.TopNOperator;
+import org.elasticsearch.core.Releasables;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.IntStream;
+
+@Warmup(iterations = 5)
+@Measurement(iterations = 7)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@State(Scope.Thread)
+@Fork(1)
+public class GroupedTopNBenchmark {
+
+    private static final BlockFactory blockFactory = BlockFactory.builder(BigArrays.NON_RECYCLING_INSTANCE)
+        .breaker(new NoopCircuitBreaker("none"))
+        .build();
+
+    private static final int BLOCK_LENGTH = 4 * 1024;
+    private static final int NUM_PAGES = 1024;
+    private static final int SELF_TEST_PAGES = 16;
+
+    private static final String LONGS = "longs";
+    private static final String INTS = "ints";
+    private static final String DOUBLES = "doubles";
+    private static final String BOOLEANS = "booleans";
+    private static final String BYTES_REFS = "bytes_refs";
+
+    private static final String ASC = "_asc";
+    private static final String DESC = "_desc";
+
+    private static final String AND = "_and_";
+
+    static {
+        Utils.configureBenchmarkLogging();
+        // Smoke test all the expected values and force loading subclasses more like prod
+        selfTest();
+    }
+
+    static void selfTest() {
+        try {
+            for (String data : GroupedTopNBenchmark.class.getField("data").getAnnotationsByType(Param.class)[0].value()) {
+                for (String topCount : GroupedTopNBenchmark.class.getField("topCount").getAnnotationsByType(Param.class)[0].value()) {
+                    for (String groupCount : GroupedTopNBenchmark.class.getField("groupCount").getAnnotationsByType(Param.class)[0]
+                        .value()) {
+                        for (String gk : GroupedTopNBenchmark.class.getField("groupKeys").getAnnotationsByType(Param.class)[0].value()) {
+                            run(data, Integer.parseInt(topCount), Integer.parseInt(groupCount), gk, SELF_TEST_PAGES);
+                        }
+                    }
+                }
+            }
+        } catch (NoSuchFieldException e) {
+            throw new AssertionError();
+        }
+    }
+
+    @Param({ LONGS + ASC, LONGS + DESC, BYTES_REFS + ASC, LONGS + ASC + AND + LONGS + ASC, LONGS + ASC + AND + BYTES_REFS + ASC })
+    public String data;
+
+    @Param({ "1", "10", "1000" })
+    public int topCount;
+
+    @Param({ "10", "100", "1000" })
+    public int groupCount;
+
+    @Param({ LONGS, BYTES_REFS, LONGS + AND + LONGS, BYTES_REFS + AND + BYTES_REFS, LONGS + AND + BYTES_REFS })
+    public String groupKeys;
+
+    private static Operator operator(String data, int topCount, String groupKeys) {
+        String[] dataSpec = data.split(AND);
+        List<ElementType> elementTypes = new ArrayList<>(Arrays.stream(dataSpec).map(GroupedTopNBenchmark::elementType).toList());
+        List<TopNEncoder> encoders = new ArrayList<>(Arrays.stream(dataSpec).map(GroupedTopNBenchmark::encoder).toList());
+        List<TopNOperator.SortOrder> sortOrders = IntStream.range(0, dataSpec.length).mapToObj(c -> sortOrder(c, dataSpec[c])).toList();
+
+        String[] groupKeySpec = groupKeys.split(AND);
+        int[] groupKeyChannels = new int[groupKeySpec.length];
+        for (int i = 0; i < groupKeySpec.length; i++) {
+            groupKeyChannels[i] = elementTypes.size();
+            elementTypes.add(elementType(groupKeySpec[i]));
+            encoders.add(TopNEncoder.DEFAULT_UNSORTABLE);
+        }
+
+        return new GroupedTopNOperator(
+            blockFactory,
+            blockFactory.breaker(),
+            topCount,
+            elementTypes,
+            encoders,
+            sortOrders,
+            new GroupKeyEncoder(groupKeyChannels, elementTypes, new BreakingBytesRefBuilder(blockFactory.breaker(), "group-key-encoder")),
+            8 * 1024,
+            Long.MAX_VALUE
+        );
+    }
+
+    private static ElementType elementType(String data) {
+        return switch (data.replace(ASC, "").replace(DESC, "")) {
+            case LONGS -> ElementType.LONG;
+            case INTS -> ElementType.INT;
+            case DOUBLES -> ElementType.DOUBLE;
+            case BOOLEANS -> ElementType.BOOLEAN;
+            case BYTES_REFS -> ElementType.BYTES_REF;
+            default -> throw new IllegalArgumentException("unsupported data type [" + data + "]");
+        };
+    }
+
+    private static TopNEncoder encoder(String data) {
+        return switch (data.replace(ASC, "").replace(DESC, "")) {
+            case LONGS, INTS, DOUBLES, BOOLEANS -> TopNEncoder.DEFAULT_SORTABLE;
+            case BYTES_REFS -> TopNEncoder.UTF8;
+            default -> throw new IllegalArgumentException("unsupported data type [" + data + "]");
+        };
+    }
+
+    private static boolean ascDesc(String data) {
+        if (data.endsWith(ASC)) {
+            return true;
+        } else if (data.endsWith(DESC)) {
+            return false;
+        } else {
+            throw new IllegalArgumentException("data neither asc nor desc: " + data);
+        }
+    }
+
+    private static TopNOperator.SortOrder sortOrder(int channel, String data) {
+        return new TopNOperator.SortOrder(channel, ascDesc(data), false);
+    }
+
+    private static void checkExpected(int topCount, int groupCount, int numPages, List<Page> pages) {
+        int effectiveGroupCount = Math.min(groupCount, BLOCK_LENGTH);
+        long expectedOutput = 0;
+        for (int g = 0; g < effectiveGroupCount; g++) {
+            int rowsPerPage = BLOCK_LENGTH / effectiveGroupCount + (g < BLOCK_LENGTH % effectiveGroupCount ? 1 : 0);
+            long totalRowsForGroup = (long) rowsPerPage * numPages;
+            expectedOutput += Math.min(topCount, totalRowsForGroup);
+        }
+        long actualOutput = pages.stream().mapToLong(Page::getPositionCount).sum();
+        if (expectedOutput != actualOutput) {
+            throw new AssertionError("expected [" + expectedOutput + "] but got [" + actualOutput + "]");
+        }
+    }
+
+    private static Page page(String data, int groupCount, String groupKeys) {
+        String[] dataSpec = data.split(AND);
+        String[] groupKeySpec = groupKeys.split(AND);
+        int effectiveGroupCount = Math.min(groupCount, BLOCK_LENGTH);
+        int divisor = (int) Math.ceil(Math.sqrt(effectiveGroupCount));
+
+        Block[] blocks = new Block[dataSpec.length + groupKeySpec.length];
+        for (int i = 0; i < dataSpec.length; i++) {
+            blocks[i] = block(dataSpec[i]);
+        }
+        for (int k = 0; k < groupKeySpec.length; k++) {
+            blocks[dataSpec.length + k] = groupKeyBlock(groupKeySpec[k], effectiveGroupCount, divisor, k, groupKeySpec.length);
+        }
+        return new Page(blocks);
+    }
+
+    private static Block block(String data) {
+        return switch (data.replace(ASC, "").replace(DESC, "")) {
+            case LONGS -> {
+                var builder = blockFactory.newLongBlockBuilder(BLOCK_LENGTH);
+                new Random().longs(BLOCK_LENGTH, 0, Long.MAX_VALUE).forEach(builder::appendLong);
+                yield builder.build();
+            }
+            case INTS -> {
+                var builder = blockFactory.newIntBlockBuilder(BLOCK_LENGTH);
+                new Random().ints(BLOCK_LENGTH, 0, Integer.MAX_VALUE).forEach(builder::appendInt);
+                yield builder.build();
+            }
+            case DOUBLES -> {
+                var builder = blockFactory.newDoubleBlockBuilder(BLOCK_LENGTH);
+                new Random().doubles(BLOCK_LENGTH, 0, Double.MAX_VALUE).forEach(builder::appendDouble);
+                yield builder.build();
+            }
+            case BOOLEANS -> {
+                BooleanBlock.Builder builder = blockFactory.newBooleanBlockBuilder(BLOCK_LENGTH);
+                new Random().ints(BLOCK_LENGTH, 0, 2).forEach(i -> builder.appendBoolean(i == 1));
+                yield builder.build();
+            }
+            case BYTES_REFS -> {
+                BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(BLOCK_LENGTH);
+                new Random().ints(BLOCK_LENGTH, 0, Integer.MAX_VALUE)
+                    .forEach(i -> builder.appendBytesRef(new BytesRef(Integer.toString(i))));
+                yield builder.build();
+            }
+            default -> throw new UnsupportedOperationException("unsupported data [" + data + "]");
+        };
+    }
+
+    private static Block groupKeyBlock(String groupKeyType, int effectiveGroupCount, int divisor, int keyIndex, int groupKeyCount) {
+        return switch (groupKeyType) {
+            case LONGS -> {
+                var builder = blockFactory.newLongBlockBuilder(BLOCK_LENGTH);
+                for (int i = 0; i < BLOCK_LENGTH; i++) {
+                    int groupId = i % effectiveGroupCount;
+                    long keyValue = groupKeyCount == 1 ? groupId : (keyIndex == 0 ? groupId / divisor : groupId % divisor);
+                    builder.appendLong(keyValue);
+                }
+                yield builder.build();
+            }
+            case BYTES_REFS -> {
+                BytesRefBlock.Builder builder = blockFactory.newBytesRefBlockBuilder(BLOCK_LENGTH);
+                for (int i = 0; i < BLOCK_LENGTH; i++) {
+                    int groupId = i % effectiveGroupCount;
+                    long keyValue = groupKeyCount == 1 ? groupId : (keyIndex == 0 ? groupId / divisor : groupId % divisor);
+                    builder.appendBytesRef(new BytesRef(Long.toString(keyValue)));
+                }
+                yield builder.build();
+            }
+            default -> throw new IllegalArgumentException("unsupported group key type [" + groupKeyType + "]");
+        };
+    }
+
+    @Benchmark
+    @OperationsPerInvocation(NUM_PAGES * BLOCK_LENGTH)
+    public void run() {
+        run(data, topCount, groupCount, groupKeys, NUM_PAGES);
+    }
+
+    private static void run(String data, int topCount, int groupCount, String groupKeys, int numPages) {
+        try (Operator operator = operator(data, topCount, groupKeys)) {
+            Page page = page(data, groupCount, groupKeys);
+            for (int i = 0; i < numPages; i++) {
+                operator.addInput(page.shallowCopy());
+            }
+            operator.finish();
+            List<Page> results = new ArrayList<>();
+            try {
+                Page p;
+                while ((p = operator.getOutput()) != null) {
+                    results.add(p);
+                }
+                checkExpected(topCount, groupCount, numPages, results);
+            } finally {
+                Releasables.close(results);
+            }
+        }
+    }
+}
Original file line number	Diff line number	Diff line change
`@@ -51,6 +51,7 @@ dependencies {`
`51`	`51`	`api(project(':x-pack:plugin:analytics'))`
`52`	`52`	`api(project(':x-pack:plugin:logsdb'))`
`53`	`53`	`implementation project(path: ':libs:native')`
	`54`	`+ implementation(testFixtures(project(':libs:native')))`
`54`	`55`	`implementation project(path: ':libs:simdvec')`
`55`	`56`	`implementation (testFixtures(project(path: ':libs:simdvec')))`
`56`	`57`	`implementation project(path: ':libs:swisshash')`
`@@ -103,7 +104,7 @@ tasks.named("run").configure {`
`103`	`104`
`104`	`105`	`tasks.named('test').configure {`
`105`	`106`	`if (buildParams.getRuntimeJavaVersion().map{ it.majorVersion.toInteger() }.get() >= 21) {`
`106`		`- jvmArgs '--add-modules=jdk.incubator.vector'`
	`107`	`+ jvmArgs '--add-modules=jdk.incubator.vector', '--add-opens=java.base/java.nio=ALL-UNNAMED'`
`107`	`108`	`}`
`108`	`109`	`}`
`109`	`110`