Skip to content

Commit 516990c

Browse files
authored
Remove ordinal grouping operator (#131133)
The ordinals grouping operator was introduced to speed up aggregation before ordinal blocks and related optimizations in block hashes were available. However, this operator has several issues: 1. It only supports single grouping with the `keyword` type and requires `doc_values`. 2. It needs a separate aggregation implementation, which currently lacks test coverage. We had performance issues with the `VALUES` aggregation using this operator (see #130576). 3. It can be slower and use more memory when the target documents have sparse cardinality (see #98963). 4. Ad-hoc planning, although this can now be addressed with local plans. Although the ordinals grouping operator is slightly faster than the hash operator with ordinal blocks, its complexity now outweighs the benefits. This PR proposes removing the operator. Below is the NYC_taxis benchmark. Closes #98963
1 parent 04ae527 commit 516990c

File tree

15 files changed

+29
-1135
lines changed

15 files changed

+29
-1135
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesAggregatorBenchmark.java

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ static void selfTest() {
9595
try {
9696
for (String groups : ValuesAggregatorBenchmark.class.getField("groups").getAnnotationsByType(Param.class)[0].value()) {
9797
for (String dataType : ValuesAggregatorBenchmark.class.getField("dataType").getAnnotationsByType(Param.class)[0].value()) {
98-
run(Integer.parseInt(groups), dataType, 10, 0);
99-
run(Integer.parseInt(groups), dataType, 10, 1);
98+
run(Integer.parseInt(groups), dataType, 10);
10099
}
101100
}
102101
} catch (NoSuchFieldException e) {
@@ -114,10 +113,7 @@ static void selfTest() {
114113
@Param({ BYTES_REF, INT, LONG })
115114
public String dataType;
116115

117-
@Param({ "0", "1" })
118-
public int numOrdinalMerges;
119-
120-
private static Operator operator(DriverContext driverContext, int groups, String dataType, int numOrdinalMerges) {
116+
private static Operator operator(DriverContext driverContext, int groups, String dataType) {
121117
if (groups == 1) {
122118
return new AggregationOperator(
123119
List.of(supplier(dataType).aggregatorFactory(AggregatorMode.SINGLE, List.of(0)).apply(driverContext)),
@@ -132,20 +128,8 @@ private static Operator operator(DriverContext driverContext, int groups, String
132128
) {
133129
@Override
134130
public Page getOutput() {
135-
mergeOrdinal();
136131
return super.getOutput();
137132
}
138-
139-
// simulate OrdinalsGroupingOperator
140-
void mergeOrdinal() {
141-
var merged = supplier(dataType).groupingAggregatorFactory(AggregatorMode.SINGLE, List.of(1)).apply(driverContext);
142-
for (int i = 0; i < numOrdinalMerges; i++) {
143-
for (int p = 0; p < groups; p++) {
144-
merged.addIntermediateRow(p, aggregators.getFirst(), p);
145-
}
146-
}
147-
aggregators.set(0, merged);
148-
}
149133
};
150134
}
151135

@@ -352,12 +336,12 @@ private static Block groupingBlock(int groups) {
352336

353337
@Benchmark
354338
public void run() {
355-
run(groups, dataType, OP_COUNT, numOrdinalMerges);
339+
run(groups, dataType, OP_COUNT);
356340
}
357341

358-
private static void run(int groups, String dataType, int opCount, int numOrdinalMerges) {
342+
private static void run(int groups, String dataType, int opCount) {
359343
DriverContext driverContext = driverContext();
360-
try (Operator operator = operator(driverContext, groups, dataType, numOrdinalMerges)) {
344+
try (Operator operator = operator(driverContext, groups, dataType)) {
361345
Page page = page(groups, dataType);
362346
for (int i = 0; i < opCount; i++) {
363347
operator.addInput(page.shallowCopy());

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/GroupingAggregator.java

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,6 @@ public void close() {}
7070
}
7171
}
7272

73-
/**
74-
* Add the position-th row from the intermediate output of the given aggregator to this aggregator at the groupId position
75-
*/
76-
public void addIntermediateRow(int groupId, GroupingAggregator input, int position) {
77-
aggregatorFunction.addIntermediateRowInput(groupId, input.aggregatorFunction, position);
78-
}
79-
8073
/**
8174
* Build the results for this aggregation.
8275
* @param selected the groupIds that have been selected to be included in

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/aggregation/GroupingAggregatorFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ default void add(int positionOffset, IntBlock groupIds) {
130130

131131
/**
132132
* Add the position-th row from the intermediate output of the given aggregator function to the groupId
133+
* TODO: Remove this method as the grouping operator has been removed
133134
*/
134135
void addIntermediateRowInput(int groupId, GroupingAggregatorFunction input, int position);
135136

0 commit comments

Comments
 (0)