Skip to content

Commit bbede47

Browse files
committed
Create new block when filter OrdinalBytesRefBlock
1 parent 71d9d17 commit bbede47

File tree

5 files changed

+128
-51
lines changed

5 files changed

+128
-51
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefBlock.java

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -91,41 +91,33 @@ public OrdinalBytesRefBlock asOrdinals() {
9191

9292
@Override
9393
public BytesRefBlock filter(int... positions) {
94-
if (positions.length * ordinals.getTotalValueCount() >= bytes.getPositionCount() * ordinals.getPositionCount()) {
95-
OrdinalBytesRefBlock result = null;
96-
IntBlock filteredOrdinals = ordinals.filter(positions);
97-
try {
98-
result = new OrdinalBytesRefBlock(filteredOrdinals, bytes);
99-
bytes.incRef();
100-
} finally {
101-
if (result == null) {
102-
filteredOrdinals.close();
94+
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
95+
// may reappear when hashing the dictionary in BlockHash.
96+
// TODO: merge this BytesRefArrayBlock#filter
97+
final OrdinalBytesRefVector vector = asVector();
98+
if (vector != null) {
99+
return vector.filter(positions).asBlock();
100+
}
101+
BytesRef scratch = new BytesRef();
102+
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
103+
for (int pos : positions) {
104+
if (isNull(pos)) {
105+
builder.appendNull();
106+
continue;
103107
}
104-
}
105-
return result;
106-
} else {
107-
// TODO: merge this BytesRefArrayBlock#filter
108-
BytesRef scratch = new BytesRef();
109-
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
110-
for (int pos : positions) {
111-
if (isNull(pos)) {
112-
builder.appendNull();
113-
continue;
114-
}
115-
int valueCount = getValueCount(pos);
116-
int first = getFirstValueIndex(pos);
117-
if (valueCount == 1) {
118-
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
119-
} else {
120-
builder.beginPositionEntry();
121-
for (int c = 0; c < valueCount; c++) {
122-
builder.appendBytesRef(getBytesRef(first + c, scratch));
123-
}
124-
builder.endPositionEntry();
108+
int valueCount = getValueCount(pos);
109+
int first = getFirstValueIndex(pos);
110+
if (valueCount == 1) {
111+
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
112+
} else {
113+
builder.beginPositionEntry();
114+
for (int c = 0; c < valueCount; c++) {
115+
builder.appendBytesRef(getBytesRef(first + c, scratch));
125116
}
117+
builder.endPositionEntry();
126118
}
127-
return builder.mvOrdering(mvOrdering()).build();
128119
}
120+
return builder.mvOrdering(mvOrdering()).build();
129121
}
130122
}
131123

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/OrdinalBytesRefVector.java

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -99,26 +99,14 @@ public BytesRefVector getDictionaryVector() {
9999

100100
@Override
101101
public BytesRefVector filter(int... positions) {
102-
if (positions.length >= ordinals.getPositionCount()) {
103-
OrdinalBytesRefVector result = null;
104-
IntVector filteredOrdinals = ordinals.filter(positions);
105-
try {
106-
result = new OrdinalBytesRefVector(filteredOrdinals, bytes);
107-
bytes.incRef();
108-
} finally {
109-
if (result == null) {
110-
filteredOrdinals.close();
111-
}
112-
}
113-
return result;
114-
} else {
115-
final BytesRef scratch = new BytesRef();
116-
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
117-
for (int p : positions) {
118-
builder.appendBytesRef(getBytesRef(p, scratch));
119-
}
120-
return builder.build();
102+
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
103+
// may reappear when hashing the dictionary in BlockHash.
104+
final BytesRef scratch = new BytesRef();
105+
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
106+
for (int p : positions) {
107+
builder.appendBytesRef(getBytesRef(p, scratch));
121108
}
109+
return builder.build();
122110
}
123111
}
124112

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import static org.hamcrest.Matchers.greaterThan;
5252
import static org.hamcrest.Matchers.instanceOf;
5353
import static org.hamcrest.Matchers.is;
54+
import static org.hamcrest.Matchers.not;
5455
import static org.hamcrest.Matchers.notNullValue;
5556
import static org.hamcrest.Matchers.nullValue;
5657
import static org.hamcrest.Matchers.sameInstance;
@@ -1512,6 +1513,67 @@ public void testRefCountingDocVector() {
15121513
assertThat(breaker.getUsed(), is(0L));
15131514
}
15141515

1516+
public void testFilterOrdinalBytesRefBlock() {
1517+
int dictSize = between(1, 10);
1518+
int positionCount = between(1, 100);
1519+
try (
1520+
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
1521+
var ordinalBuilder = blockFactory.newIntBlockBuilder(positionCount)
1522+
) {
1523+
for (int i = 0; i < dictSize; i++) {
1524+
dictBuilder.appendBytesRef(new BytesRef("value" + i));
1525+
}
1526+
for (int i = 0; i < positionCount; i++) {
1527+
int valueCount = randomIntBetween(0, 2);
1528+
switch (valueCount) {
1529+
case 0 -> ordinalBuilder.appendNull();
1530+
case 1 -> ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
1531+
default -> {
1532+
ordinalBuilder.beginPositionEntry();
1533+
for (int v = 0; v < valueCount; v++) {
1534+
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
1535+
}
1536+
ordinalBuilder.endPositionEntry();
1537+
}
1538+
}
1539+
}
1540+
try (var block = new OrdinalBytesRefBlock(ordinalBuilder.build(), dictBuilder.build())) {
1541+
int[] masks = new int[between(1, 100)];
1542+
for (int i = 0; i < masks.length; i++) {
1543+
masks[i] = randomIntBetween(0, positionCount - 1);
1544+
}
1545+
try (var filtered = block.filter(masks)) {
1546+
assertThat(filtered, not(instanceOf(OrdinalBytesRefBlock.class)));
1547+
}
1548+
}
1549+
}
1550+
}
1551+
1552+
public void testFilterOrdinalBytesRefVector() {
1553+
int dictSize = between(1, 10);
1554+
int positionCount = between(1, 100);
1555+
try (
1556+
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
1557+
var ordinalBuilder = blockFactory.newIntVectorBuilder(positionCount)
1558+
) {
1559+
for (int i = 0; i < dictSize; i++) {
1560+
dictBuilder.appendBytesRef(new BytesRef("value" + i));
1561+
}
1562+
for (int i = 0; i < positionCount; i++) {
1563+
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
1564+
}
1565+
try (var vector = new OrdinalBytesRefVector(ordinalBuilder.build(), dictBuilder.build())) {
1566+
int[] masks = new int[between(1, 100)];
1567+
for (int i = 0; i < masks.length; i++) {
1568+
masks[i] = randomIntBetween(0, positionCount - 1);
1569+
}
1570+
try (var filtered = vector.filter(masks)) {
1571+
assertThat(filtered, not(instanceOf(OrdinalBytesRefVector.class)));
1572+
}
1573+
}
1574+
}
1575+
}
1576+
15151577
/**
15161578
* Take an object with exactly 1 reference and assert that ref counting works fine.
15171579
* Assumes that {@link Releasable#close()} and {@link RefCounted#decRef()} are equivalent.

x-pack/plugin/esql/qa/testFixtures/src/main/resources/stats.csv-spec

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3381,3 +3381,31 @@ VALUES(color):keyword | color:keyword
33813381
yellow | yellow
33823382
// end::mv-group-values-expand-result[]
33833383
;
3384+
3385+
filterOrdinalValues
3386+
required_capability: fix_filter_ordinals
3387+
from employees
3388+
| MV_EXPAND job_positions
3389+
| WHERE job_positions != "Purchase Manager"
3390+
| STATS employees=count(*) BY job_positions
3391+
| SORT job_positions
3392+
| LIMIT 1000
3393+
;
3394+
3395+
employees:long | job_positions:keyword
3396+
18 | Accountant
3397+
13 | Architect
3398+
11 | Business Analyst
3399+
13 | Data Scientist
3400+
10 | Head Human Resources
3401+
15 | Internship
3402+
14 | Junior Developer
3403+
20 | Principal Support Engineer
3404+
13 | Python Developer
3405+
15 | Reporting Analyst
3406+
20 | Senior Python Developer
3407+
15 | Senior Team Lead
3408+
11 | Support Engineer
3409+
15 | Tech Lead
3410+
;
3411+

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1499,7 +1499,14 @@ public enum Cap {
14991499
/**
15001500
* Support for dots in FUSE attributes
15011501
*/
1502-
DOTS_IN_FUSE;
1502+
DOTS_IN_FUSE,
1503+
1504+
/**
1505+
* Create new block when filtering OrdinalBytesRefBlock
1506+
*/
1507+
FIX_FILTER_ORDINALS,
1508+
1509+
;
15031510

15041511
private final boolean enabled;
15051512

0 commit comments

Comments
 (0)