diff --git a/docs/changelog/136444.yaml b/docs/changelog/136444.yaml new file mode 100644 index 0000000000000..21c6ea9416fa1 --- /dev/null +++ b/docs/changelog/136444.yaml @@ -0,0 +1,6 @@ +pr: 136444 +summary: Create new block when filter `OrdinalBytesRefBlock` +area: ES|QL +type: bug +issues: + - 136423 diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java index efdabb0e2883d..b726300664b58 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java @@ -91,41 +91,33 @@ public OrdinalBytesRefBlock asOrdinals() { @Override public BytesRefBlock filter(int... positions) { - if (positions.length * ordinals.getTotalValueCount() >= bytes.getPositionCount() * ordinals.getPositionCount()) { - OrdinalBytesRefBlock result = null; - IntBlock filteredOrdinals = ordinals.filter(positions); - try { - result = new OrdinalBytesRefBlock(filteredOrdinals, bytes); - bytes.incRef(); - } finally { - if (result == null) { - filteredOrdinals.close(); + // Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced + // may reappear when hashing the dictionary in BlockHash. + // TODO: merge this BytesRefArrayBlock#filter + final OrdinalBytesRefVector vector = asVector(); + if (vector != null) { + return vector.filter(positions).asBlock(); + } + BytesRef scratch = new BytesRef(); + try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) { + for (int pos : positions) { + if (isNull(pos)) { + builder.appendNull(); + continue; } - } - return result; - } else { - // TODO: merge this BytesRefArrayBlock#filter - BytesRef scratch = new BytesRef(); - try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) { - for (int pos : positions) { - if (isNull(pos)) { - builder.appendNull(); - continue; - } - int valueCount = getValueCount(pos); - int first = getFirstValueIndex(pos); - if (valueCount == 1) { - builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch)); - } else { - builder.beginPositionEntry(); - for (int c = 0; c < valueCount; c++) { - builder.appendBytesRef(getBytesRef(first + c, scratch)); - } - builder.endPositionEntry(); + int valueCount = getValueCount(pos); + int first = getFirstValueIndex(pos); + if (valueCount == 1) { + builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch)); + } else { + builder.beginPositionEntry(); + for (int c = 0; c < valueCount; c++) { + builder.appendBytesRef(getBytesRef(first + c, scratch)); } + builder.endPositionEntry(); } - return builder.mvOrdering(mvOrdering()).build(); } + return builder.mvOrdering(mvOrdering()).build(); } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java index 4f3737f0aa7f2..d34dbf676ab23 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java @@ -99,26 +99,14 @@ public BytesRefVector getDictionaryVector() { @Override public BytesRefVector filter(int... positions) { - if (positions.length >= ordinals.getPositionCount()) { - OrdinalBytesRefVector result = null; - IntVector filteredOrdinals = ordinals.filter(positions); - try { - result = new OrdinalBytesRefVector(filteredOrdinals, bytes); - bytes.incRef(); - } finally { - if (result == null) { - filteredOrdinals.close(); - } - } - return result; - } else { - final BytesRef scratch = new BytesRef(); - try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) { - for (int p : positions) { - builder.appendBytesRef(getBytesRef(p, scratch)); - } - return builder.build(); + // Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced + // may reappear when hashing the dictionary in BlockHash. + final BytesRef scratch = new BytesRef(); + try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) { + for (int p : positions) { + builder.appendBytesRef(getBytesRef(p, scratch)); } + return builder.build(); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index e0a18c79307c5..0ab6f8239de41 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -51,6 +51,7 @@ import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.nullValue; import static org.hamcrest.Matchers.sameInstance; @@ -1512,6 +1513,67 @@ public void testRefCountingDocVector() { assertThat(breaker.getUsed(), is(0L)); } + public void testFilterOrdinalBytesRefBlock() { + int dictSize = between(1, 10); + int positionCount = between(1, 100); + try ( + var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize); + var ordinalBuilder = blockFactory.newIntBlockBuilder(positionCount) + ) { + for (int i = 0; i < dictSize; i++) { + dictBuilder.appendBytesRef(new BytesRef("value" + i)); + } + for (int i = 0; i < positionCount; i++) { + int valueCount = randomIntBetween(0, 2); + switch (valueCount) { + case 0 -> ordinalBuilder.appendNull(); + case 1 -> ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1)); + default -> { + ordinalBuilder.beginPositionEntry(); + for (int v = 0; v < valueCount; v++) { + ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1)); + } + ordinalBuilder.endPositionEntry(); + } + } + } + try (var block = new OrdinalBytesRefBlock(ordinalBuilder.build(), dictBuilder.build())) { + int[] masks = new int[between(1, 100)]; + for (int i = 0; i < masks.length; i++) { + masks[i] = randomIntBetween(0, positionCount - 1); + } + try (var filtered = block.filter(masks)) { + assertThat(filtered, not(instanceOf(OrdinalBytesRefBlock.class))); + } + } + } + } + + public void testFilterOrdinalBytesRefVector() { + int dictSize = between(1, 10); + int positionCount = between(1, 100); + try ( + var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize); + var ordinalBuilder = blockFactory.newIntVectorBuilder(positionCount) + ) { + for (int i = 0; i < dictSize; i++) { + dictBuilder.appendBytesRef(new BytesRef("value" + i)); + } + for (int i = 0; i < positionCount; i++) { + ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1)); + } + try (var vector = new OrdinalBytesRefVector(ordinalBuilder.build(), dictBuilder.build())) { + int[] masks = new int[between(1, 100)]; + for (int i = 0; i < masks.length; i++) { + masks[i] = randomIntBetween(0, positionCount - 1); + } + try (var filtered = vector.filter(masks)) { + assertThat(filtered, not(instanceOf(OrdinalBytesRefVector.class))); + } + } + } + } + /** * Take an object with exactly 1 reference and assert that ref counting works fine. * Assumes that {@link Releasable#close()} and {@link RefCounted#decRef()} are equivalent. diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec index 418e98da010a9..61ca316a39dbc 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec @@ -3381,3 +3381,31 @@ VALUES(color):keyword | color:keyword yellow | yellow // end::mv-group-values-expand-result[] ; + +filterOrdinalValues +required_capability: fix_filter_ordinals +from employees +| MV_EXPAND job_positions +| WHERE job_positions != "Purchase Manager" +| STATS employees=count(*) BY job_positions +| SORT job_positions +| LIMIT 1000 +; + +employees:long | job_positions:keyword +18 | Accountant +13 | Architect +11 | Business Analyst +13 | Data Scientist +10 | Head Human Resources +15 | Internship +14 | Junior Developer +20 | Principal Support Engineer +13 | Python Developer +15 | Reporting Analyst +20 | Senior Python Developer +15 | Senior Team Lead +11 | Support Engineer +15 | Tech Lead +; + diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index 36b61e127888e..7116c86d69d14 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1499,7 +1499,14 @@ public enum Cap { /** * Support for dots in FUSE attributes */ - DOTS_IN_FUSE; + DOTS_IN_FUSE, + + /** + * Create new block when filtering OrdinalBytesRefBlock + */ + FIX_FILTER_ORDINALS, + + ; private final boolean enabled;