Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/136444.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 136444
summary: Create new block when filter `OrdinalBytesRefBlock`
area: ES|QL
type: bug
issues:
- 136423
Original file line number Diff line number Diff line change
Expand Up @@ -91,41 +91,33 @@ public OrdinalBytesRefBlock asOrdinals() {

@Override
public BytesRefBlock filter(int... positions) {
if (positions.length * ordinals.getTotalValueCount() >= bytes.getPositionCount() * ordinals.getPositionCount()) {
OrdinalBytesRefBlock result = null;
IntBlock filteredOrdinals = ordinals.filter(positions);
try {
result = new OrdinalBytesRefBlock(filteredOrdinals, bytes);
bytes.incRef();
} finally {
if (result == null) {
filteredOrdinals.close();
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
// may reappear when hashing the dictionary in BlockHash.
// TODO: merge this BytesRefArrayBlock#filter
final OrdinalBytesRefVector vector = asVector();
if (vector != null) {
return vector.filter(positions).asBlock();
}
BytesRef scratch = new BytesRef();
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
for (int pos : positions) {
if (isNull(pos)) {
builder.appendNull();
continue;
}
}
return result;
} else {
// TODO: merge this BytesRefArrayBlock#filter
BytesRef scratch = new BytesRef();
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
for (int pos : positions) {
if (isNull(pos)) {
builder.appendNull();
continue;
}
int valueCount = getValueCount(pos);
int first = getFirstValueIndex(pos);
if (valueCount == 1) {
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
} else {
builder.beginPositionEntry();
for (int c = 0; c < valueCount; c++) {
builder.appendBytesRef(getBytesRef(first + c, scratch));
}
builder.endPositionEntry();
int valueCount = getValueCount(pos);
int first = getFirstValueIndex(pos);
if (valueCount == 1) {
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
} else {
builder.beginPositionEntry();
for (int c = 0; c < valueCount; c++) {
builder.appendBytesRef(getBytesRef(first + c, scratch));
}
builder.endPositionEntry();
}
return builder.mvOrdering(mvOrdering()).build();
}
return builder.mvOrdering(mvOrdering()).build();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,26 +99,14 @@ public BytesRefVector getDictionaryVector() {

@Override
public BytesRefVector filter(int... positions) {
if (positions.length >= ordinals.getPositionCount()) {
OrdinalBytesRefVector result = null;
IntVector filteredOrdinals = ordinals.filter(positions);
try {
result = new OrdinalBytesRefVector(filteredOrdinals, bytes);
bytes.incRef();
} finally {
if (result == null) {
filteredOrdinals.close();
}
}
return result;
} else {
final BytesRef scratch = new BytesRef();
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
for (int p : positions) {
builder.appendBytesRef(getBytesRef(p, scratch));
}
return builder.build();
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
// may reappear when hashing the dictionary in BlockHash.
final BytesRef scratch = new BytesRef();
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
for (int p : positions) {
builder.appendBytesRef(getBytesRef(p, scratch));
}
return builder.build();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.Matchers.sameInstance;
Expand Down Expand Up @@ -1512,6 +1513,67 @@ public void testRefCountingDocVector() {
assertThat(breaker.getUsed(), is(0L));
}

public void testFilterOrdinalBytesRefBlock() {
int dictSize = between(1, 10);
int positionCount = between(1, 100);
try (
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
var ordinalBuilder = blockFactory.newIntBlockBuilder(positionCount)
) {
for (int i = 0; i < dictSize; i++) {
dictBuilder.appendBytesRef(new BytesRef("value" + i));
}
for (int i = 0; i < positionCount; i++) {
int valueCount = randomIntBetween(0, 2);
switch (valueCount) {
case 0 -> ordinalBuilder.appendNull();
case 1 -> ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
default -> {
ordinalBuilder.beginPositionEntry();
for (int v = 0; v < valueCount; v++) {
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
}
ordinalBuilder.endPositionEntry();
}
}
}
try (var block = new OrdinalBytesRefBlock(ordinalBuilder.build(), dictBuilder.build())) {
int[] masks = new int[between(1, 100)];
for (int i = 0; i < masks.length; i++) {
masks[i] = randomIntBetween(0, positionCount - 1);
}
try (var filtered = block.filter(masks)) {
assertThat(filtered, not(instanceOf(OrdinalBytesRefBlock.class)));
}
}
}
}

public void testFilterOrdinalBytesRefVector() {
int dictSize = between(1, 10);
int positionCount = between(1, 100);
try (
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
var ordinalBuilder = blockFactory.newIntVectorBuilder(positionCount)
) {
for (int i = 0; i < dictSize; i++) {
dictBuilder.appendBytesRef(new BytesRef("value" + i));
}
for (int i = 0; i < positionCount; i++) {
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
}
try (var vector = new OrdinalBytesRefVector(ordinalBuilder.build(), dictBuilder.build())) {
int[] masks = new int[between(1, 100)];
for (int i = 0; i < masks.length; i++) {
masks[i] = randomIntBetween(0, positionCount - 1);
}
try (var filtered = vector.filter(masks)) {
assertThat(filtered, not(instanceOf(OrdinalBytesRefVector.class)));
}
}
}
}

/**
* Take an object with exactly 1 reference and assert that ref counting works fine.
* Assumes that {@link Releasable#close()} and {@link RefCounted#decRef()} are equivalent.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3381,3 +3381,31 @@ VALUES(color):keyword | color:keyword
yellow | yellow
// end::mv-group-values-expand-result[]
;

filterOrdinalValues
required_capability: fix_filter_ordinals
from employees
| MV_EXPAND job_positions
| WHERE job_positions != "Purchase Manager"
| STATS employees=count(*) BY job_positions
| SORT job_positions
| LIMIT 1000
;

employees:long | job_positions:keyword
18 | Accountant
13 | Architect
11 | Business Analyst
13 | Data Scientist
10 | Head Human Resources
15 | Internship
14 | Junior Developer
20 | Principal Support Engineer
13 | Python Developer
15 | Reporting Analyst
20 | Senior Python Developer
15 | Senior Team Lead
11 | Support Engineer
15 | Tech Lead
;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the fix, Purchase Manager would re-appear in the result.

employees:long | job_positions:keyword
18             | Accountant
13             | Architect
11             | Business Analyst
13             | Data Scientist
10             | Head Human Resources
15             | Internship
14             | Junior Developer
20             | Principal Support Engineer
0              | Purchase Manager
13             | Python Developer
15             | Reporting Analyst
20             | Senior Python Developer
15             | Senior Team Lead
11             | Support Engineer
15             | Tech Lead


Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,14 @@ public enum Cap {
/**
* Support for dots in FUSE attributes
*/
DOTS_IN_FUSE;
DOTS_IN_FUSE,

/**
* Create new block when filtering OrdinalBytesRefBlock
*/
FIX_FILTER_ORDINALS,

;

private final boolean enabled;

Expand Down