Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/136444.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 136444
summary: Create new block when filter `OrdinalBytesRefBlock`
area: ES|QL
type: bug
issues:
- 136423
Original file line number Diff line number Diff line change
Expand Up @@ -91,41 +91,33 @@ public OrdinalBytesRefBlock asOrdinals() {

@Override
public BytesRefBlock filter(int... positions) {
if (positions.length * ordinals.getTotalValueCount() >= bytes.getPositionCount() * ordinals.getPositionCount()) {
OrdinalBytesRefBlock result = null;
IntBlock filteredOrdinals = ordinals.filter(positions);
try {
result = new OrdinalBytesRefBlock(filteredOrdinals, bytes);
bytes.incRef();
} finally {
if (result == null) {
filteredOrdinals.close();
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
// may reappear when hashing the dictionary in BlockHash.
// TODO: merge this BytesRefArrayBlock#filter
final OrdinalBytesRefVector vector = asVector();
if (vector != null) {
return vector.filter(positions).asBlock();
}
BytesRef scratch = new BytesRef();
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
for (int pos : positions) {
if (isNull(pos)) {
builder.appendNull();
continue;
}
}
return result;
} else {
// TODO: merge this BytesRefArrayBlock#filter
BytesRef scratch = new BytesRef();
try (BytesRefBlock.Builder builder = blockFactory().newBytesRefBlockBuilder(positions.length)) {
for (int pos : positions) {
if (isNull(pos)) {
builder.appendNull();
continue;
}
int valueCount = getValueCount(pos);
int first = getFirstValueIndex(pos);
if (valueCount == 1) {
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
} else {
builder.beginPositionEntry();
for (int c = 0; c < valueCount; c++) {
builder.appendBytesRef(getBytesRef(first + c, scratch));
}
builder.endPositionEntry();
int valueCount = getValueCount(pos);
int first = getFirstValueIndex(pos);
if (valueCount == 1) {
builder.appendBytesRef(getBytesRef(getFirstValueIndex(pos), scratch));
} else {
builder.beginPositionEntry();
for (int c = 0; c < valueCount; c++) {
builder.appendBytesRef(getBytesRef(first + c, scratch));
}
builder.endPositionEntry();
}
return builder.mvOrdering(mvOrdering()).build();
}
return builder.mvOrdering(mvOrdering()).build();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,26 +99,14 @@ public BytesRefVector getDictionaryVector() {

@Override
public BytesRefVector filter(int... positions) {
if (positions.length >= ordinals.getPositionCount()) {
OrdinalBytesRefVector result = null;
IntVector filteredOrdinals = ordinals.filter(positions);
try {
result = new OrdinalBytesRefVector(filteredOrdinals, bytes);
bytes.incRef();
} finally {
if (result == null) {
filteredOrdinals.close();
}
}
return result;
} else {
final BytesRef scratch = new BytesRef();
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
for (int p : positions) {
builder.appendBytesRef(getBytesRef(p, scratch));
}
return builder.build();
// Do not build a filtered block using the same dictionary, because dictionary entries that are not referenced
// may reappear when hashing the dictionary in BlockHash.
final BytesRef scratch = new BytesRef();
try (BytesRefVector.Builder builder = blockFactory().newBytesRefVectorBuilder(positions.length)) {
for (int p : positions) {
builder.appendBytesRef(getBytesRef(p, scratch));
}
return builder.build();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.Matchers.sameInstance;
Expand Down Expand Up @@ -1512,6 +1513,67 @@ public void testRefCountingDocVector() {
assertThat(breaker.getUsed(), is(0L));
}

public void testFilterOrdinalBytesRefBlock() {
int dictSize = between(1, 10);
int positionCount = between(1, 100);
try (
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
var ordinalBuilder = blockFactory.newIntBlockBuilder(positionCount)
) {
for (int i = 0; i < dictSize; i++) {
dictBuilder.appendBytesRef(new BytesRef("value" + i));
}
for (int i = 0; i < positionCount; i++) {
int valueCount = randomIntBetween(0, 2);
switch (valueCount) {
case 0 -> ordinalBuilder.appendNull();
case 1 -> ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
default -> {
ordinalBuilder.beginPositionEntry();
for (int v = 0; v < valueCount; v++) {
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
}
ordinalBuilder.endPositionEntry();
}
}
}
try (var block = new OrdinalBytesRefBlock(ordinalBuilder.build(), dictBuilder.build())) {
int[] masks = new int[between(1, 100)];
for (int i = 0; i < masks.length; i++) {
masks[i] = randomIntBetween(0, positionCount - 1);
}
try (var filtered = block.filter(masks)) {
assertThat(filtered, not(instanceOf(OrdinalBytesRefBlock.class)));
}
}
}
}

public void testFilterOrdinalBytesRefVector() {
int dictSize = between(1, 10);
int positionCount = between(1, 100);
try (
var dictBuilder = blockFactory.newBytesRefVectorBuilder(dictSize);
var ordinalBuilder = blockFactory.newIntVectorBuilder(positionCount)
) {
for (int i = 0; i < dictSize; i++) {
dictBuilder.appendBytesRef(new BytesRef("value" + i));
}
for (int i = 0; i < positionCount; i++) {
ordinalBuilder.appendInt(randomIntBetween(0, dictSize - 1));
}
try (var vector = new OrdinalBytesRefVector(ordinalBuilder.build(), dictBuilder.build())) {
int[] masks = new int[between(1, 100)];
for (int i = 0; i < masks.length; i++) {
masks[i] = randomIntBetween(0, positionCount - 1);
}
try (var filtered = vector.filter(masks)) {
assertThat(filtered, not(instanceOf(OrdinalBytesRefVector.class)));
}
}
}
}

/**
* Take an object with exactly 1 reference and assert that ref counting works fine.
* Assumes that {@link Releasable#close()} and {@link RefCounted#decRef()} are equivalent.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3381,3 +3381,31 @@ VALUES(color):keyword | color:keyword
yellow | yellow
// end::mv-group-values-expand-result[]
;

filterOrdinalValues
required_capability: fix_filter_ordinals
from employees
| MV_EXPAND job_positions
| WHERE job_positions != "Purchase Manager"
| STATS employees=count(*) BY job_positions
| SORT job_positions
| LIMIT 1000
;

employees:long | job_positions:keyword
18 | Accountant
13 | Architect
11 | Business Analyst
13 | Data Scientist
10 | Head Human Resources
15 | Internship
14 | Junior Developer
20 | Principal Support Engineer
13 | Python Developer
15 | Reporting Analyst
20 | Senior Python Developer
15 | Senior Team Lead
11 | Support Engineer
15 | Tech Lead
;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without the fix, Purchase Manager would re-appear in the result.

employees:long | job_positions:keyword
18             | Accountant
13             | Architect
11             | Business Analyst
13             | Data Scientist
10             | Head Human Resources
15             | Internship
14             | Junior Developer
20             | Principal Support Engineer
0              | Purchase Manager
13             | Python Developer
15             | Reporting Analyst
20             | Senior Python Developer
15             | Senior Team Lead
11             | Support Engineer
15             | Tech Lead


Original file line number Diff line number Diff line change
Expand Up @@ -1499,7 +1499,14 @@ public enum Cap {
/**
* Support for dots in FUSE attributes
*/
DOTS_IN_FUSE;
DOTS_IN_FUSE,

/**
* Create new block when filtering OrdinalBytesRefBlock
*/
FIX_FILTER_ORDINALS,

;

private final boolean enabled;

Expand Down