Skip to content

Commit d5bccfe

Browse files
Drier and faster SumAggregator and AvgAggregator (#120436)
Dried up (and moved to the much faster inline logic) for the summation here for both implementations. Obviously this could have been done even drier but it didn't seem like that was possible without a performance hit (we really don't want to sub-class the leaf-collector I think). Benchmarks suggest this variant is ~10% faster than the previous iteration of `SumAggregator` (probably from making the grow method smaller) and a bigger than that improvement for the `AvgAggregator`.
1 parent ebed385 commit d5bccfe

File tree

2 files changed

+76
-77
lines changed

2 files changed

+76
-77
lines changed

server/src/main/java/org/elasticsearch/search/aggregations/metrics/AvgAggregator.java

Lines changed: 10 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,10 @@
99
package org.elasticsearch.search.aggregations.metrics;
1010

1111
import org.elasticsearch.common.util.BigArrays;
12-
import org.elasticsearch.common.util.DoubleArray;
1312
import org.elasticsearch.common.util.LongArray;
1413
import org.elasticsearch.core.Releasables;
1514
import org.elasticsearch.index.fielddata.NumericDoubleValues;
1615
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
17-
import org.elasticsearch.search.DocValueFormat;
1816
import org.elasticsearch.search.aggregations.Aggregator;
1917
import org.elasticsearch.search.aggregations.InternalAggregation;
2018
import org.elasticsearch.search.aggregations.LeafBucketCollector;
@@ -25,12 +23,9 @@
2523
import java.io.IOException;
2624
import java.util.Map;
2725

28-
class AvgAggregator extends NumericMetricsAggregator.SingleDoubleValue {
26+
class AvgAggregator extends SumAggregator {
2927

3028
LongArray counts;
31-
DoubleArray sums;
32-
DoubleArray compensations;
33-
DocValueFormat format;
3429

3530
AvgAggregator(
3631
String name,
@@ -40,63 +35,40 @@ class AvgAggregator extends NumericMetricsAggregator.SingleDoubleValue {
4035
Map<String, Object> metadata
4136
) throws IOException {
4237
super(name, valuesSourceConfig, context, parent, metadata);
43-
assert valuesSourceConfig.hasValues();
44-
this.format = valuesSourceConfig.format();
45-
final BigArrays bigArrays = context.bigArrays();
46-
counts = bigArrays.newLongArray(1, true);
47-
sums = bigArrays.newDoubleArray(1, true);
48-
compensations = bigArrays.newDoubleArray(1, true);
38+
counts = context.bigArrays().newLongArray(1, true);
4939
}
5040

5141
@Override
5242
protected LeafBucketCollector getLeafCollector(SortedNumericDoubleValues values, final LeafBucketCollector sub) {
53-
final CompensatedSum kahanSummation = new CompensatedSum(0, 0);
5443
return new LeafBucketCollectorBase(sub, values) {
5544
@Override
5645
public void collect(int doc, long bucket) throws IOException {
5746
if (values.advanceExact(doc)) {
5847
maybeGrow(bucket);
59-
final int valueCount = values.docValueCount();
60-
counts.increment(bucket, valueCount);
61-
// Compute the sum of double values with Kahan summation algorithm which is more
62-
// accurate than naive summation.
63-
kahanSummation.reset(sums.get(bucket), compensations.get(bucket));
64-
for (int i = 0; i < valueCount; i++) {
65-
kahanSummation.add(values.nextValue());
66-
}
67-
sums.set(bucket, kahanSummation.value());
68-
compensations.set(bucket, kahanSummation.delta());
48+
counts.increment(bucket, sumSortedDoubles(bucket, values, sums, compensations));
6949
}
7050
}
7151
};
7252
}
7353

7454
@Override
7555
protected LeafBucketCollector getLeafCollector(NumericDoubleValues values, final LeafBucketCollector sub) {
76-
final CompensatedSum kahanSummation = new CompensatedSum(0, 0);
7756
return new LeafBucketCollectorBase(sub, values) {
7857
@Override
7958
public void collect(int doc, long bucket) throws IOException {
8059
if (values.advanceExact(doc)) {
8160
maybeGrow(bucket);
61+
computeSum(bucket, values, sums, compensations);
8262
counts.increment(bucket, 1L);
83-
// Compute the sum of double values with Kahan summation algorithm which is more
84-
// accurate than naive summation.
85-
kahanSummation.reset(sums.get(bucket), compensations.get(bucket));
86-
kahanSummation.add(values.doubleValue());
87-
sums.set(bucket, kahanSummation.value());
88-
compensations.set(bucket, kahanSummation.delta());
8963
}
9064
}
9165
};
9266
}
9367

94-
private void maybeGrow(long bucket) {
95-
if (bucket >= counts.size()) {
96-
counts = bigArrays().grow(counts, bucket + 1);
97-
sums = bigArrays().grow(sums, bucket + 1);
98-
compensations = bigArrays().grow(compensations, bucket + 1);
99-
}
68+
@Override
69+
protected void doGrow(long bucket, BigArrays bigArrays) {
70+
super.doGrow(bucket, bigArrays);
71+
counts = bigArrays.grow(counts, bucket + 1);
10072
}
10173

10274
@Override
@@ -122,7 +94,8 @@ public InternalAggregation buildEmptyAggregation() {
12294

12395
@Override
12496
public void doClose() {
125-
Releasables.close(counts, sums, compensations);
97+
super.doClose();
98+
Releasables.close(counts);
12699
}
127100

128101
}

server/src/main/java/org/elasticsearch/search/aggregations/metrics/SumAggregator.java

Lines changed: 66 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
*/
99
package org.elasticsearch.search.aggregations.metrics;
1010

11+
import org.elasticsearch.common.util.BigArrays;
1112
import org.elasticsearch.common.util.DoubleArray;
1213
import org.elasticsearch.core.Releasables;
1314
import org.elasticsearch.index.fielddata.NumericDoubleValues;
@@ -25,10 +26,9 @@
2526

2627
public class SumAggregator extends NumericMetricsAggregator.SingleDoubleValue {
2728

28-
private final DocValueFormat format;
29-
30-
private DoubleArray sums;
31-
private DoubleArray compensations;
29+
protected final DocValueFormat format;
30+
protected DoubleArray sums;
31+
protected DoubleArray compensations;
3232

3333
SumAggregator(
3434
String name,
@@ -40,72 +40,98 @@ public class SumAggregator extends NumericMetricsAggregator.SingleDoubleValue {
4040
super(name, valuesSourceConfig, context, parent, metadata);
4141
assert valuesSourceConfig.hasValues();
4242
this.format = valuesSourceConfig.format();
43-
sums = bigArrays().newDoubleArray(1, true);
44-
compensations = bigArrays().newDoubleArray(1, true);
43+
var bigArrays = context.bigArrays();
44+
sums = bigArrays.newDoubleArray(1, true);
45+
compensations = bigArrays.newDoubleArray(1, true);
4546
}
4647

4748
@Override
4849
protected LeafBucketCollector getLeafCollector(SortedNumericDoubleValues values, final LeafBucketCollector sub) {
49-
final CompensatedSum kahanSummation = new CompensatedSum(0, 0);
5050
return new LeafBucketCollectorBase(sub, values) {
5151
@Override
5252
public void collect(int doc, long bucket) throws IOException {
5353
if (values.advanceExact(doc)) {
5454
maybeGrow(bucket);
55-
// Compute the sum of double values with Kahan summation algorithm which is more
56-
// accurate than naive summation.
57-
kahanSummation.reset(sums.get(bucket), compensations.get(bucket));
58-
for (int i = 0; i < values.docValueCount(); i++) {
59-
kahanSummation.add(values.nextValue());
60-
}
61-
compensations.set(bucket, kahanSummation.delta());
62-
sums.set(bucket, kahanSummation.value());
55+
sumSortedDoubles(bucket, values, sums, compensations);
6356
}
6457
}
6558
};
6659
}
6760

61+
// returns number of values added
62+
static int sumSortedDoubles(long bucket, SortedNumericDoubleValues values, DoubleArray sums, DoubleArray compensations)
63+
throws IOException {
64+
final int valueCount = values.docValueCount();
65+
// Compute the sum of double values with Kahan summation algorithm which is more
66+
// accurate than naive summation.
67+
double value = sums.get(bucket);
68+
double delta = compensations.get(bucket);
69+
for (int i = 0; i < valueCount; i++) {
70+
double added = values.nextValue();
71+
value = addIfNonOrInf(added, value);
72+
if (Double.isFinite(value)) {
73+
double correctedSum = added + delta;
74+
double updatedValue = value + correctedSum;
75+
delta = correctedSum - (updatedValue - value);
76+
value = updatedValue;
77+
}
78+
}
79+
compensations.set(bucket, delta);
80+
sums.set(bucket, value);
81+
return valueCount;
82+
}
83+
84+
private static double addIfNonOrInf(double added, double value) {
85+
// If the value is Inf or NaN, just add it to the running tally to "convert" to
86+
// Inf/NaN. This keeps the behavior bwc from before kahan summing
87+
if (Double.isFinite(added)) {
88+
return value;
89+
}
90+
return added + value;
91+
}
92+
6893
@Override
6994
protected LeafBucketCollector getLeafCollector(NumericDoubleValues values, final LeafBucketCollector sub) {
7095
return new LeafBucketCollectorBase(sub, values) {
7196
@Override
7297
public void collect(int doc, long bucket) throws IOException {
7398
if (values.advanceExact(doc)) {
7499
maybeGrow(bucket);
75-
var sums = SumAggregator.this.sums;
76-
// Compute the sum of double values with Kahan summation algorithm which is more
77-
// accurate than naive summation.
78-
double value = sums.get(bucket);
79-
// If the value is Inf or NaN, just add it to the running tally to "convert" to
80-
// Inf/NaN. This keeps the behavior bwc from before kahan summing
81-
double v = values.doubleValue();
82-
if (Double.isFinite(v) == false) {
83-
value = v + value;
84-
}
85-
86-
if (Double.isFinite(value)) {
87-
var compensations = SumAggregator.this.compensations;
88-
double delta = compensations.get(bucket);
89-
double correctedSum = v + delta;
90-
double updatedValue = value + correctedSum;
91-
delta = correctedSum - (updatedValue - value);
92-
value = updatedValue;
93-
compensations.set(bucket, delta);
94-
}
95-
96-
sums.set(bucket, value);
100+
computeSum(bucket, values, sums, compensations);
97101
}
98102
}
99103
};
100104
}
101105

102-
private void maybeGrow(long bucket) {
106+
static void computeSum(long bucket, NumericDoubleValues values, DoubleArray sums, DoubleArray compensations) throws IOException {
107+
// Compute the sum of double values with Kahan summation algorithm which is more
108+
// accurate than naive summation.
109+
double added = values.doubleValue();
110+
double value = addIfNonOrInf(added, sums.get(bucket));
111+
if (Double.isFinite(value)) {
112+
double delta = compensations.get(bucket);
113+
double correctedSum = added + delta;
114+
double updatedValue = value + correctedSum;
115+
delta = correctedSum - (updatedValue - value);
116+
value = updatedValue;
117+
compensations.set(bucket, delta);
118+
}
119+
120+
sums.set(bucket, value);
121+
}
122+
123+
protected final void maybeGrow(long bucket) {
103124
if (bucket >= sums.size()) {
104-
sums = bigArrays().grow(sums, bucket + 1);
105-
compensations = bigArrays().grow(compensations, bucket + 1);
125+
var bigArrays = bigArrays();
126+
doGrow(bucket, bigArrays);
106127
}
107128
}
108129

130+
protected void doGrow(long bucket, BigArrays bigArrays) {
131+
sums = bigArrays.grow(sums, bucket + 1);
132+
compensations = bigArrays.grow(compensations, bucket + 1);
133+
}
134+
109135
@Override
110136
public double metric(long owningBucketOrd) {
111137
if (owningBucketOrd >= sums.size()) {

0 commit comments

Comments
 (0)