Skip to content

Commit a40e38c

Browse files
authored
[8.19] Fix empty VALUES with ordinals grouping (elastic#130861) (elastic#130866)
* Fix empty VALUES with ordinals grouping (elastic#130861) We should not build the sorted structure for the ordinal grouping operator if the requested position is larger than maxGroupId. This situation occurs with nulls. We should benchmark the ordinal blocks and consider removing the ordinal grouping operator if performance is similar; otherwise, we need to integrate this operator with GroupingAggregatorFunctionTestCase. Relates elastic#130576 * Fix test
1 parent 8e8f4b0 commit a40e38c

File tree

7 files changed

+114
-6
lines changed

7 files changed

+114
-6
lines changed

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesBytesRefAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesDoubleAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesFloatAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesIntAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/generated-src/org/elasticsearch/compute/aggregation/ValuesLongAggregator.java

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/X-ValuesAggregator.java.st

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,10 +125,10 @@ $endif$
125125
}
126126

127127
public static void combineStates(GroupingState current, int currentGroupId, GroupingState state, int statePosition) {
128-
var sorted = state.sortedForOrdinalMerging(current);
129128
if (statePosition > state.maxGroupId) {
130129
return;
131130
}
131+
var sorted = state.sortedForOrdinalMerging(current);
132132
var start = statePosition > 0 ? sorted.counts[statePosition - 1] : 0;
133133
var end = sorted.counts[statePosition];
134134
for (int i = start; i < end; i++) {

x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/OperatorTests.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import org.apache.lucene.document.Field;
1212
import org.apache.lucene.document.LongField;
1313
import org.apache.lucene.document.LongPoint;
14+
import org.apache.lucene.document.SortedNumericDocValuesField;
1415
import org.apache.lucene.document.SortedSetDocValuesField;
1516
import org.apache.lucene.index.DirectoryReader;
1617
import org.apache.lucene.index.IndexReader;
@@ -35,6 +36,7 @@
3536
import org.elasticsearch.common.util.MockPageCacheRecycler;
3637
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
3738
import org.elasticsearch.compute.aggregation.CountAggregatorFunction;
39+
import org.elasticsearch.compute.aggregation.ValuesLongAggregatorFunctionSupplier;
3840
import org.elasticsearch.compute.aggregation.blockhash.BlockHash;
3941
import org.elasticsearch.compute.data.Block;
4042
import org.elasticsearch.compute.data.BlockFactory;
@@ -252,6 +254,112 @@ public String toString() {
252254
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
253255
}
254256

257+
// TODO: Remove ordinals grouping operator or enable it GroupingAggregatorFunctionTestCase
258+
public void testValuesWithOrdinalGrouping() throws Exception {
259+
DriverContext driverContext = driverContext();
260+
BlockFactory blockFactory = driverContext.blockFactory();
261+
262+
final int numDocs = between(100, 1000);
263+
Map<BytesRef, Set<Long>> expectedValues = new HashMap<>();
264+
try (BaseDirectoryWrapper dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir)) {
265+
String VAL_NAME = "val";
266+
String KEY_NAME = "key";
267+
for (int i = 0; i < numDocs; i++) {
268+
Document doc = new Document();
269+
BytesRef key = new BytesRef(Integer.toString(between(1, 100)));
270+
SortedSetDocValuesField keyField = new SortedSetDocValuesField(KEY_NAME, key);
271+
doc.add(keyField);
272+
if (randomBoolean()) {
273+
int numValues = between(0, 2);
274+
for (int v = 0; v < numValues; v++) {
275+
long val = between(1, 1000);
276+
var valuesField = new SortedNumericDocValuesField(VAL_NAME, val);
277+
doc.add(valuesField);
278+
expectedValues.computeIfAbsent(key, k -> new HashSet<>()).add(val);
279+
}
280+
}
281+
writer.addDocument(doc);
282+
}
283+
writer.commit();
284+
try (DirectoryReader reader = writer.getReader()) {
285+
List<Operator> operators = new ArrayList<>();
286+
if (randomBoolean()) {
287+
operators.add(new ShuffleDocsOperator(blockFactory));
288+
}
289+
operators.add(
290+
new ValuesSourceReaderOperator(
291+
blockFactory,
292+
List.of(
293+
new ValuesSourceReaderOperator.FieldInfo(
294+
VAL_NAME,
295+
ElementType.LONG,
296+
unused -> new BlockDocValuesReader.LongsBlockLoader(VAL_NAME)
297+
)
298+
),
299+
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
300+
throw new UnsupportedOperationException();
301+
}, 0.2)),
302+
0
303+
)
304+
);
305+
operators.add(
306+
new OrdinalsGroupingOperator(
307+
shardIdx -> new KeywordFieldMapper.KeywordFieldType(KEY_NAME).blockLoader(null),
308+
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> SourceLoader.FROM_STORED_SOURCE, 0.2)),
309+
ElementType.BYTES_REF,
310+
0,
311+
KEY_NAME,
312+
List.of(new ValuesLongAggregatorFunctionSupplier().groupingAggregatorFactory(INITIAL, List.of(1))),
313+
randomPageSize(),
314+
driverContext
315+
)
316+
);
317+
operators.add(
318+
new HashAggregationOperator(
319+
List.of(new ValuesLongAggregatorFunctionSupplier().groupingAggregatorFactory(FINAL, List.of(1))),
320+
() -> BlockHash.build(
321+
List.of(new BlockHash.GroupSpec(0, ElementType.BYTES_REF)),
322+
driverContext.blockFactory(),
323+
randomPageSize(),
324+
false
325+
),
326+
driverContext
327+
)
328+
);
329+
Map<BytesRef, Set<Long>> actualValues = new HashMap<>();
330+
Driver driver = TestDriverFactory.create(
331+
driverContext,
332+
luceneOperatorFactory(
333+
reader,
334+
List.of(new LuceneSliceQueue.QueryAndTags(new MatchAllDocsQuery(), List.of())),
335+
LuceneOperator.NO_LIMIT
336+
).get(driverContext),
337+
operators,
338+
new PageConsumerOperator(page -> {
339+
BytesRefBlock keyBlock = page.getBlock(0);
340+
LongBlock valueBlock = page.getBlock(1);
341+
BytesRef spare = new BytesRef();
342+
for (int p = 0; p < page.getPositionCount(); p++) {
343+
var key = keyBlock.getBytesRef(p, spare);
344+
int valueCount = valueBlock.getValueCount(p);
345+
for (int i = 0; i < valueCount; i++) {
346+
long val = valueBlock.getLong(valueBlock.getFirstValueIndex(p) + i);
347+
boolean added = actualValues.computeIfAbsent(BytesRef.deepCopyOf(key), k -> new HashSet<>()).add(val);
348+
assertTrue(actualValues.toString(), added);
349+
}
350+
}
351+
page.releaseBlocks();
352+
})
353+
);
354+
OperatorTestCase.runDriver(driver);
355+
assertDriverContext(driverContext);
356+
assertThat(actualValues, equalTo(expectedValues));
357+
org.elasticsearch.common.util.MockBigArrays.ensureAllArraysAreReleased();
358+
}
359+
}
360+
assertThat(blockFactory.breaker().getUsed(), equalTo(0L));
361+
}
362+
255363
public void testPushRoundToToQuery() throws IOException {
256364
long firstGroupMax = randomLong();
257365
long secondGroupMax = randomLong();

0 commit comments

Comments
 (0)