From 27bc1bd0e06284e157d9d91f8e396c71cef60516 Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Fri, 9 May 2025 09:03:47 -0700 Subject: [PATCH] Ensure ordinal builder emit ordinal blocks (#127949) Currently, if a field has high cardinality, we may mistakenly disable emitting ordinal blocks. For example, with 10,000 `tsid` values, we never emit ordinal blocks during reads, even though we could emit blocks for 10 `tsid` values across 1,000 positions. This bug disables optimizations for value aggregation and block hashing. This change tracks the minimum and maximum seen ordinals and uses them as an estimate for the number of ordinals. However, if a page contains `ord=1` and `ord=9999`, ordinal blocks still won't be emitted. Allocating a bitset or an array for `value_count` could track this more accurately but would require additional memory. I need to think about this trade off more before opening another PR to fix this issue completely. This is a quick, contained fix that significantly speeds up time-series aggregation (and other queries too). The execution time of this query is reduced from 3.4s to 1.9s with 11M documents. ``` POST /_query { "profile": true, "query": "TS metrics-hostmetricsreceiver.otel-default | STATS cpu = avg(avg_over_time(`metrics.system.cpu.load_average.1m`)) BY host.name, BUCKET(@timestamp, 5 minute)" } ``` ``` "took": 3475, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ``` ``` "took": 1965, "is_partial": false, "documents_found": 11368089, "values_loaded": 34248167 ``` --- docs/changelog/127949.yaml | 5 +++ .../compute/data/OrdinalBytesRefBlock.java | 19 +++++++++- .../data/SingletonOrdinalsBuilder.java | 24 ++++++++---- .../data/SingletonOrdinalsBuilderTests.java | 37 +++++++++++++++++++ 4 files changed, 76 insertions(+), 9 deletions(-) create mode 100644 docs/changelog/127949.yaml diff --git a/docs/changelog/127949.yaml b/docs/changelog/127949.yaml new file mode 100644 index 0000000000000..82a8b65fc9ec4 --- /dev/null +++ b/docs/changelog/127949.yaml @@ -0,0 +1,5 @@ +pr: 127949 +summary: Ensure ordinal builder emit ordinal blocks +area: ES|QL +type: bug +issues: [] diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java index 87b33b3b0893d..9dfc0055415d7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java @@ -54,7 +54,11 @@ void writeOrdinalBlock(StreamOutput out) throws IOException { * Returns true if this ordinal block is dense enough to enable optimizations using its ordinals */ public boolean isDense() { - return ordinals.getTotalValueCount() * 2 / 3 >= bytes.getPositionCount(); + return isDense(ordinals.getTotalValueCount(), bytes.getPositionCount()); + } + + public static boolean isDense(long totalPositions, long dictionarySize) { + return totalPositions >= 10 && totalPositions >= dictionarySize * 2L; } public IntBlock getOrdinalsBlock() { @@ -251,4 +255,17 @@ public long ramBytesUsed() { public String toString() { return getClass().getSimpleName() + "[ordinals=" + ordinals + ", bytes=" + bytes + "]"; } + + @Override + public boolean equals(Object o) { + if (o instanceof BytesRefBlock that) { + return BytesRefBlock.equals(this, that); + } + return false; + } + + @Override + public int hashCode() { + return BytesRefBlock.hash(this); + } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java index 576bde5cdf676..cfcc75c7c396a 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilder.java @@ -22,6 +22,8 @@ public class SingletonOrdinalsBuilder implements BlockLoader.SingletonOrdinalsBuilder, Releasable, Block.Builder { private final BlockFactory blockFactory; private final SortedDocValues docValues; + private int minOrd = Integer.MAX_VALUE; + private int maxOrd = Integer.MIN_VALUE; private final int[] ords; private int count; @@ -39,8 +41,10 @@ public SingletonOrdinalsBuilder appendNull() { } @Override - public SingletonOrdinalsBuilder appendOrd(int value) { - ords[count++] = value; + public SingletonOrdinalsBuilder appendOrd(int ord) { + ords[count++] = ord; + minOrd = Math.min(minOrd, ord); + maxOrd = Math.max(maxOrd, ord); return this; } @@ -55,7 +59,7 @@ public SingletonOrdinalsBuilder endPositionEntry() { } BytesRefBlock buildOrdinal() { - int valueCount = docValues.getValueCount(); + int valueCount = maxOrd - minOrd + 1; long breakerSize = ordsSize(valueCount); blockFactory.adjustBreaker(breakerSize); BytesRefVector bytesVector = null; @@ -65,7 +69,7 @@ BytesRefBlock buildOrdinal() { Arrays.fill(newOrds, -1); for (int ord : ords) { if (ord != -1) { - newOrds[ord] = 0; + newOrds[ord - minOrd] = 0; } } // resolve the ordinals and remaps the ordinals @@ -74,7 +78,7 @@ BytesRefBlock buildOrdinal() { for (int i = 0; i < newOrds.length; i++) { if (newOrds[i] != -1) { newOrds[i] = ++nextOrd; - bytesBuilder.appendBytesRef(docValues.lookupOrd(i)); + bytesBuilder.appendBytesRef(docValues.lookupOrd(i + minOrd)); } } bytesVector = bytesBuilder.build(); @@ -86,7 +90,7 @@ BytesRefBlock buildOrdinal() { if (ord == -1) { ordinalsBuilder.appendNull(); } else { - ordinalsBuilder.appendInt(newOrds[ord]); + ordinalsBuilder.appendInt(newOrds[ord - minOrd]); } } ordinalBlock = ordinalsBuilder.build(); @@ -107,7 +111,6 @@ BytesRefBlock buildRegularBlock() { blockFactory.adjustBreaker(breakerSize); try { int[] sortedOrds = ords.clone(); - Arrays.sort(sortedOrds); int uniqueCount = compactToUnique(sortedOrds); try (BreakingBytesRefBuilder copies = new BreakingBytesRefBuilder(blockFactory.breaker(), "ords")) { @@ -167,7 +170,12 @@ public BytesRefBlock build() { } boolean shouldBuildOrdinalsBlock() { - return ords.length >= 2 * docValues.getValueCount() && ords.length >= 32; + if (minOrd <= maxOrd) { + int numOrds = maxOrd - minOrd + 1; + return OrdinalBytesRefBlock.isDense(ords.length, numOrds); + } else { + return false; + } } @Override diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java index 87f3210dc12df..9a338506c00d1 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/SingletonOrdinalsBuilderTests.java @@ -8,7 +8,10 @@ package org.elasticsearch.compute.data; import org.apache.lucene.document.SortedDocValuesField; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.store.Directory; @@ -37,6 +40,7 @@ import static org.elasticsearch.test.MapMatcher.assertMap; import static org.elasticsearch.test.MapMatcher.matchesMap; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; public class SingletonOrdinalsBuilderTests extends ESTestCase { public void testReader() throws IOException { @@ -154,6 +158,39 @@ public void testAllNull() throws IOException { } } + public void testEmitOrdinalForHighCardinality() throws IOException { + BlockFactory factory = breakingDriverContext().blockFactory(); + int numOrds = between(50, 100); + try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig())) { + for (int o = 0; o < numOrds; o++) { + int docPerOrds = between(10, 15); + for (int d = 0; d < docPerOrds; d++) { + indexWriter.addDocument(List.of(new SortedDocValuesField("f", new BytesRef("value-" + o)))); + } + } + try (IndexReader reader = DirectoryReader.open(indexWriter)) { + assertThat(reader.leaves(), hasSize(1)); + for (LeafReaderContext ctx : reader.leaves()) { + int batchSize = between(40, 100); + int ord = random().nextInt(numOrds); + try ( + var b1 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize); + var b2 = new SingletonOrdinalsBuilder(factory, ctx.reader().getSortedDocValues("f"), batchSize) + ) { + for (int i = 0; i < batchSize; i++) { + b1.appendOrd(ord); + b2.appendOrd(ord); + } + try (BytesRefBlock block1 = b1.build(); BytesRefBlock block2 = b2.buildRegularBlock()) { + assertThat(block1, equalTo(block2)); + assertNotNull(block1.asOrdinals()); + } + } + } + } + } + } + static BytesRefBlock buildOrdinalsBuilder(SingletonOrdinalsBuilder builder) { if (randomBoolean()) { return builder.buildRegularBlock();