Skip to content

Commit f7752a0

Browse files
committed
Treat sum of empty histograms as null instead of 0.0
1 parent 740ef90 commit f7752a0

File tree

6 files changed

+47
-29
lines changed

6 files changed

+47
-29
lines changed

test/framework/src/main/java/org/elasticsearch/index/mapper/TestBlock.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,12 +797,13 @@ public static TestBlock parseHistogramsToBlock(
797797
}
798798
CompressedExponentialHistogram result = new CompressedExponentialHistogram();
799799
try {
800+
Double sum = (Double) sums.get(i);
800801
Double min = (Double) minima.get(i);
801802
Double max = (Double) maxima.get(i);
802803
result.reset(
803804
(Double) zeroThresholds.get(i),
804805
(Long) valueCounts.get(i),
805-
(Double) sums.get(i),
806+
sum == null ? 0.0 : sum,
806807
min == null ? Double.NaN : min,
807808
max == null ? Double.NaN : max,
808809
(BytesRef) encodedHistograms.get(i)

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ExponentialHistogramArrayBlock.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,10 +85,10 @@ private boolean assertInvariants() {
8585
assert b.isNull(i)
8686
: "ExponentialHistogramArrayBlock sub-block [" + b + "] should be null at position " + i + ", but was not";
8787
} else {
88-
if (b == minima || b == maxima) {
89-
// minima / maxima should be null exactly when value count is 0 or the histogram is null
88+
if (b == sums || b == minima || b == maxima) {
89+
// sums / minima / maxima should be null exactly when value count is 0 or the histogram is null
9090
assert b.isNull(i) == (valueCounts.getLong(valueCounts.getFirstValueIndex(i)) == 0)
91-
: "ExponentialHistogramArrayBlock minima/maxima sub-block [" + b + "] has wrong nullity at position " + i;
91+
: "ExponentialHistogramArrayBlock sums/minima/maxima sub-block [" + b + "] has wrong nullity at position " + i;
9292
} else {
9393
assert b.isNull(i) == false
9494
: "ExponentialHistogramArrayBlock sub-block [" + b + "] should be non-null at position " + i + ", but was not";
@@ -108,7 +108,7 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH
108108
BytesRef bytes = encodedHistograms.getBytesRef(encodedHistograms.getFirstValueIndex(valueIndex), scratch.bytesRefScratch);
109109
double zeroThreshold = zeroThresholds.getDouble(zeroThresholds.getFirstValueIndex(valueIndex));
110110
long valueCount = valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex));
111-
double sum = sums.getDouble(sums.getFirstValueIndex(valueIndex));
111+
double sum = valueCount == 0 ? 0.0 : sums.getDouble(sums.getFirstValueIndex(valueIndex));
112112
double min = valueCount == 0 ? Double.NaN : minima.getDouble(minima.getFirstValueIndex(valueIndex));
113113
double max = valueCount == 0 ? Double.NaN : maxima.getDouble(maxima.getFirstValueIndex(valueIndex));
114114
try {
@@ -140,10 +140,10 @@ public void serializeExponentialHistogram(int valueIndex, SerializedOutput out,
140140
// this value count represents the number of individual samples the histogram was computed for
141141
long valueCount = valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex));
142142
out.appendLong(valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex)));
143-
out.appendDouble(sums.getDouble(sums.getFirstValueIndex(valueIndex)));
144143
out.appendDouble(zeroThresholds.getDouble(zeroThresholds.getFirstValueIndex(valueIndex)));
145144
if (valueCount > 0) {
146-
// min / max are only non-null for non-empty histograms
145+
// sum / min / max are only non-null for non-empty histograms
146+
out.appendDouble(sums.getDouble(sums.getFirstValueIndex(valueIndex)));
147147
out.appendDouble(minima.getDouble(minima.getFirstValueIndex(valueIndex)));
148148
out.appendDouble(maxima.getDouble(maxima.getFirstValueIndex(valueIndex)));
149149
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ExponentialHistogramBlockBuilder.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,12 @@ public ExponentialHistogramBlockBuilder append(ExponentialHistogram histogram) {
125125
} else {
126126
maximaBuilder.appendDouble(histogram.max());
127127
}
128-
sumsBuilder.appendDouble(histogram.sum());
128+
if (histogram.valueCount() == 0) {
129+
assert histogram.sum() == 0.0 : "Empty histogram should have sum 0.0 but was " + histogram.sum();
130+
sumsBuilder.appendNull();
131+
} else {
132+
sumsBuilder.appendDouble(histogram.sum());
133+
}
129134
valueCountsBuilder.appendLong(histogram.valueCount());
130135
zeroThresholdsBuilder.appendDouble(zeroBucket.zeroThreshold());
131136
encodedHistogramsBuilder.appendBytesRef(encodedBytes.bytes().toBytesRef());
@@ -141,12 +146,13 @@ public ExponentialHistogramBlockBuilder append(ExponentialHistogram histogram) {
141146
public void deserializeAndAppend(ExponentialHistogramBlock.SerializedInput input) {
142147
long valueCount = input.readLong();
143148
valueCountsBuilder.appendLong(valueCount);
144-
sumsBuilder.appendDouble(input.readDouble());
145149
zeroThresholdsBuilder.appendDouble(input.readDouble());
146150
if (valueCount > 0) {
151+
sumsBuilder.appendDouble(input.readDouble());
147152
minimaBuilder.appendDouble(input.readDouble());
148153
maximaBuilder.appendDouble(input.readDouble());
149154
} else {
155+
sumsBuilder.appendNull();
150156
minimaBuilder.appendNull();
151157
maximaBuilder.appendNull();
152158
}

x-pack/plugin/esql/qa/testFixtures/src/main/resources/exponential_histogram.csv-spec

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,15 @@ FROM exp_histo_sample
4646
| SORT instance
4747
| Limit 15
4848
;
49-
warningRegex: Line 2:132: evaluation of \[AVG\(responseTime\)\] failed, treating result as null. Only first 20 failures recorded
50-
warningRegex: Line 2:132: java.lang.ArithmeticException: / by zero
5149

5250
instance:keyword | min:double | max:double | p75:double | sum:double | avg:double
53-
dummy-empty | null | null | null | 0.0 | null
51+
dummy-empty | null | null | null | null | null
5452
dummy-full | -100.0 | 50.0 | 10.6666667 | -3775.0 | -25.0
5553
dummy-negative_only | -50.0 | -1.0 | -12.8729318 | -1275.0 | -25.5
5654
dummy-no_zero_bucket | -100.0 | 50.0 | 10.6666667 | -3775.0 | -25.166666666666668
5755
dummy-positive_only | 1.0 | 50.0 | 34.7656715 | 1275.0 | 25.5
5856
dummy-zero_count_only | 0.0 | 0.0 | 0.0 | 0.0 | 0.0
59-
dummy-zero_threshold_only | null | null | null | 0.0 | null
57+
dummy-zero_threshold_only | null | null | null | null | null
6058
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
6159
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
6260
instance-0 | 2.4E-4 | 6.786232 | 0.2608237 | 1472.744209 | 0.1665811796176903
@@ -208,10 +206,8 @@ required_capability: exponential_histogram_sum_avg_support
208206

209207
FROM exp_histo_sample | WHERE instance == "dummy-empty"
210208
| STATS avg = AVG(responseTime)
211-
| KEEP avg //TODO handle without triggering warnings, but this currently triggers a planner issue
209+
| KEEP avg
212210
;
213-
warningRegex: Line 2:16: evaluation of \[AVG\(responseTime\)\] failed, treating result as null. Only first 20 failures recorded
214-
warningRegex: Line 2:16: java.lang.ArithmeticException: / by zero
215211

216212
avg:double
217213
NULL

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/aggregate/Avg.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ public Expression surrogate() {
142142
window(),
143143
summationMode
144144
);
145-
// TODO handle empty histograms ( => 0.0 / 0.0) gracefully without triggering a warning
146145
return new Div(s, valuesSum, totalCount);
147146
}
148147
if (field.foldable()) {

x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -586,10 +586,14 @@ public static HistogramDocValueFields buildDocValueFields(
586586
long thresholdAsLong = NumericUtils.doubleToSortableLong(zeroThreshold);
587587
NumericDocValuesField zeroThresholdField = new NumericDocValuesField(zeroThresholdSubFieldName(fieldName), thresholdAsLong);
588588
NumericDocValuesField valuesCountField = new NumericDocValuesField(valuesCountSubFieldName(fieldName), totalValueCount);
589-
NumericDocValuesField sumField = new NumericDocValuesField(
590-
valuesSumSubFieldName(fieldName),
591-
NumericUtils.doubleToSortableLong(sum)
592-
);
589+
// for empty histograms, we store null as sum so that SUM() / COUNT() in ESQL yields NULL without warnings
590+
NumericDocValuesField sumField = null;
591+
if (totalValueCount > 0) {
592+
sumField = new NumericDocValuesField(valuesSumSubFieldName(fieldName), NumericUtils.doubleToSortableLong(sum));
593+
} else {
594+
// empty histogram must have a sum of 0.0
595+
assert sum == 0.0;
596+
}
593597
NumericDocValuesField minField = null;
594598
if (Double.isNaN(min) == false) {
595599
minField = new NumericDocValuesField(valuesMinSubFieldName(fieldName), NumericUtils.doubleToSortableLong(min));
@@ -614,7 +618,7 @@ public record HistogramDocValueFields(
614618
BinaryDocValuesField histo,
615619
NumericDocValuesField zeroThreshold,
616620
NumericDocValuesField valuesCount,
617-
NumericDocValuesField sumField,
621+
@Nullable NumericDocValuesField sumField,
618622
@Nullable NumericDocValuesField minField,
619623
@Nullable NumericDocValuesField maxField
620624
) {
@@ -623,7 +627,9 @@ public void addToDoc(LuceneDocument doc) {
623627
doc.addWithKey(histo.name(), histo);
624628
doc.add(zeroThreshold);
625629
doc.add(valuesCount);
626-
doc.add(sumField);
630+
if (sumField != null) {
631+
doc.add(sumField);
632+
}
627633
if (minField != null) {
628634
doc.add(minField);
629635
}
@@ -637,7 +643,9 @@ public List<IndexableField> fieldsAsList() {
637643
fields.add(histo);
638644
fields.add(zeroThreshold);
639645
fields.add(valuesCount);
640-
fields.add(sumField);
646+
if (sumField != null) {
647+
fields.add(sumField);
648+
}
641649
if (minField != null) {
642650
fields.add(minField);
643651
}
@@ -766,13 +774,19 @@ public ExponentialHistogram histogramValue() throws IOException {
766774
}
767775
boolean histoPresent = histoDocValues.advanceExact(currentDocId);
768776
boolean zeroThresholdPresent = zeroThresholds.advanceExact(currentDocId);
769-
boolean valueSumsPresent = valueSums.advanceExact(currentDocId);
770-
assert zeroThresholdPresent && histoPresent && valueSumsPresent;
777+
assert zeroThresholdPresent && histoPresent;
771778

772779
BytesRef encodedHistogram = histoDocValues.binaryValue();
773780
double zeroThreshold = NumericUtils.sortableLongToDouble(zeroThresholds.longValue());
774781
long valueCount = valueCounts.longValue();
775-
double valueSum = NumericUtils.sortableLongToDouble(valueSums.longValue());
782+
double valueSum;
783+
if (valueCount > 0) {
784+
boolean valueSumsPresent = valueSums.advanceExact(currentDocId);
785+
assert valueSumsPresent;
786+
valueSum = NumericUtils.sortableLongToDouble(valueSums.longValue());
787+
} else {
788+
valueSum = 0.0; // empty histogram has sum of 0.0, but we store null in the doc values
789+
}
776790
double valueMin;
777791
if (valueMinima != null && valueMinima.advanceExact(currentDocId)) {
778792
valueMin = NumericUtils.sortableLongToDouble(valueMinima.longValue());
@@ -799,8 +813,10 @@ public double sumValue() throws IOException {
799813
if (currentDocId == -1) {
800814
throw new IllegalStateException("No histogram present for current document");
801815
}
802-
boolean valueSumsPresent = valueSums.advanceExact(currentDocId);
803-
assert valueSumsPresent;
816+
if (valueSums == null || valueSums.advanceExact(currentDocId) == false) {
817+
// empty histogram, must have sum of 0.0
818+
return 0.0;
819+
}
804820
return NumericUtils.sortableLongToDouble(valueSums.longValue());
805821
}
806822
}

0 commit comments

Comments
 (0)