Skip to content

Commit cdf5112

Browse files
authored
ESQL: Exponential histogram pre-techpreview cleanups (#138654)
1 parent a57d5c3 commit cdf5112

File tree

4 files changed

+100
-43
lines changed

4 files changed

+100
-43
lines changed

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/BlockFactory.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -497,12 +497,7 @@ public ExponentialHistogramBlockBuilder newExponentialHistogramBlockBuilder(int
497497
}
498498

499499
public final ExponentialHistogramBlock newConstantExponentialHistogramBlock(ExponentialHistogram value, int positionCount) {
500-
try (ExponentialHistogramBlockBuilder builder = newExponentialHistogramBlockBuilder(positionCount)) {
501-
for (int i = 0; i < positionCount; i++) {
502-
builder.append(value);
503-
}
504-
return builder.build();
505-
}
500+
return ExponentialHistogramArrayBlock.createConstant(value, positionCount, this);
506501
}
507502

508503
public BlockLoader.Block newExponentialHistogramBlockFromDocValues(

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ExponentialHistogramArrayBlock.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,14 @@
88
package org.elasticsearch.compute.data;
99

1010
import org.apache.lucene.util.BytesRef;
11+
import org.elasticsearch.common.io.stream.BytesStreamOutput;
1112
import org.elasticsearch.common.io.stream.StreamOutput;
1213
import org.elasticsearch.common.unit.ByteSizeValue;
1314
import org.elasticsearch.core.ReleasableIterator;
1415
import org.elasticsearch.core.Releasables;
16+
import org.elasticsearch.exponentialhistogram.CompressedExponentialHistogram;
1517
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
18+
import org.elasticsearch.exponentialhistogram.ZeroBucket;
1619

1720
import java.io.IOException;
1821
import java.util.List;
@@ -106,6 +109,45 @@ private List<Block> getSubBlocks() {
106109
return List.of(sums, valueCounts, zeroThresholds, encodedHistograms, minima, maxima);
107110
}
108111

112+
public static EncodedHistogramData encode(ExponentialHistogram histogram) {
113+
assert histogram != null;
114+
// TODO: check and potentially improve performance and correctness before moving out of tech-preview
115+
// The current implementation encodes the histogram into the format we use for storage on disk
116+
// This format is optimized for minimal memory usage at the cost of encoding speed
117+
// In addition, it only support storing the zero threshold as a double value, which is lossy when merging histograms
118+
// In practice this currently occurs, as the zero threshold is usually 0.0 and not impacted by merges
119+
// And even if it occurs, the error is usually tiny
120+
// We should add a dedicated encoding when building a block from computed histograms which do not originate from doc values
121+
// That encoding should be optimized for speed and support storing the zero threshold as (scale, index) pair
122+
ZeroBucket zeroBucket = histogram.zeroBucket();
123+
BytesStreamOutput encodedBytes = new BytesStreamOutput();
124+
try {
125+
CompressedExponentialHistogram.writeHistogramBytes(
126+
encodedBytes,
127+
histogram.scale(),
128+
histogram.negativeBuckets().iterator(),
129+
histogram.positiveBuckets().iterator()
130+
);
131+
} catch (IOException e) {
132+
throw new RuntimeException("Failed to encode histogram", e);
133+
}
134+
double sum;
135+
if (histogram.valueCount() == 0) {
136+
assert histogram.sum() == 0.0 : "Empty histogram should have sum 0.0 but was " + histogram.sum();
137+
sum = Double.NaN; // we store null/NaN for empty histograms to ensure avg is null/0.0 instead of 0.0/0.0
138+
} else {
139+
sum = histogram.sum();
140+
}
141+
return new EncodedHistogramData(
142+
histogram.valueCount(),
143+
sum,
144+
histogram.min(),
145+
histogram.max(),
146+
zeroBucket.zeroThreshold(),
147+
encodedBytes.bytes().toBytesRef()
148+
);
149+
}
150+
109151
@Override
110152
public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialHistogramScratch scratch) {
111153
assert isNull(valueIndex) == false : "tried to get histogram at null position " + valueIndex;
@@ -125,6 +167,43 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH
125167
}
126168
}
127169

170+
public static ExponentialHistogramBlock createConstant(ExponentialHistogram histogram, int positionCount, BlockFactory blockFactory) {
171+
EncodedHistogramData data = encode(histogram);
172+
DoubleBlock minBlock = null;
173+
DoubleBlock maxBlock = null;
174+
DoubleBlock sumBlock = null;
175+
DoubleBlock countBlock = null;
176+
DoubleBlock zeroThresholdBlock = null;
177+
BytesRefBlock encodedHistogramBlock = null;
178+
boolean success = false;
179+
try {
180+
countBlock = blockFactory.newConstantDoubleBlockWith(data.count, positionCount);
181+
if (Double.isNaN(data.min)) {
182+
minBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount);
183+
} else {
184+
minBlock = blockFactory.newConstantDoubleBlockWith(data.min, positionCount);
185+
}
186+
if (Double.isNaN(data.max)) {
187+
maxBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount);
188+
} else {
189+
maxBlock = blockFactory.newConstantDoubleBlockWith(data.max, positionCount);
190+
}
191+
if (Double.isNaN(data.sum)) {
192+
sumBlock = (DoubleBlock) blockFactory.newConstantNullBlock(positionCount);
193+
} else {
194+
sumBlock = blockFactory.newConstantDoubleBlockWith(data.sum, positionCount);
195+
}
196+
zeroThresholdBlock = blockFactory.newConstantDoubleBlockWith(data.zeroThreshold, positionCount);
197+
encodedHistogramBlock = blockFactory.newConstantBytesRefBlockWith(data.encodedHistogram, positionCount);
198+
success = true;
199+
return new ExponentialHistogramArrayBlock(minBlock, maxBlock, sumBlock, countBlock, zeroThresholdBlock, encodedHistogramBlock);
200+
} finally {
201+
if (success == false) {
202+
Releasables.close(minBlock, maxBlock, sumBlock, countBlock, zeroThresholdBlock, encodedHistogramBlock);
203+
}
204+
}
205+
}
206+
128207
@Override
129208
public Block buildExponentialHistogramComponentBlock(Component component) {
130209
// as soon as we support multi-values, we need to implement this differently,
@@ -438,4 +517,6 @@ public int hashCode() {
438517
// this ensures proper equality with null blocks and should be unique enough for practical purposes
439518
return encodedHistograms.hashCode();
440519
}
520+
521+
record EncodedHistogramData(double count, double sum, double min, double max, double zeroThreshold, BytesRef encodedHistogram) {}
441522
}

x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/data/ExponentialHistogramBlockBuilder.java

Lines changed: 10 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,10 @@
88
package org.elasticsearch.compute.data;
99

1010
import org.apache.lucene.util.BytesRef;
11-
import org.elasticsearch.common.io.stream.BytesStreamOutput;
1211
import org.elasticsearch.core.Releasables;
13-
import org.elasticsearch.exponentialhistogram.CompressedExponentialHistogram;
1412
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
15-
import org.elasticsearch.exponentialhistogram.ZeroBucket;
1613
import org.elasticsearch.index.mapper.BlockLoader;
1714

18-
import java.io.IOException;
19-
2015
public final class ExponentialHistogramBlockBuilder implements ExponentialHistogramBlock.Builder {
2116

2217
private final DoubleBlock.Builder minimaBuilder;
@@ -95,45 +90,25 @@ public BlockLoader.BytesRefBuilder encodedHistograms() {
9590
}
9691

9792
public ExponentialHistogramBlockBuilder append(ExponentialHistogram histogram) {
98-
assert histogram != null;
99-
// TODO: fix performance and correctness before using in production code
100-
// The current implementation encodes the histogram into the format we use for storage on disk
101-
// This format is optimized for minimal memory usage at the cost of encoding speed
102-
// In addition, it only support storing the zero threshold as a double value, which is lossy when merging histograms
103-
// We should add a dedicated encoding when building a block from computed histograms which do not originate from doc values
104-
// That encoding should be optimized for speed and support storing the zero threshold as (scale, index) pair
105-
ZeroBucket zeroBucket = histogram.zeroBucket();
106-
107-
BytesStreamOutput encodedBytes = new BytesStreamOutput();
108-
try {
109-
CompressedExponentialHistogram.writeHistogramBytes(
110-
encodedBytes,
111-
histogram.scale(),
112-
histogram.negativeBuckets().iterator(),
113-
histogram.positiveBuckets().iterator()
114-
);
115-
} catch (IOException e) {
116-
throw new RuntimeException("Failed to encode histogram", e);
117-
}
118-
if (Double.isNaN(histogram.min())) {
93+
ExponentialHistogramArrayBlock.EncodedHistogramData data = ExponentialHistogramArrayBlock.encode(histogram);
94+
valueCountsBuilder.appendDouble(data.count());
95+
if (Double.isNaN(data.min())) {
11996
minimaBuilder.appendNull();
12097
} else {
121-
minimaBuilder.appendDouble(histogram.min());
98+
minimaBuilder.appendDouble(data.min());
12299
}
123-
if (Double.isNaN(histogram.max())) {
100+
if (Double.isNaN(data.max())) {
124101
maximaBuilder.appendNull();
125102
} else {
126-
maximaBuilder.appendDouble(histogram.max());
103+
maximaBuilder.appendDouble(data.max());
127104
}
128-
if (histogram.valueCount() == 0) {
129-
assert histogram.sum() == 0.0 : "Empty histogram should have sum 0.0 but was " + histogram.sum();
105+
if (Double.isNaN(data.sum())) {
130106
sumsBuilder.appendNull();
131107
} else {
132-
sumsBuilder.appendDouble(histogram.sum());
108+
sumsBuilder.appendDouble(data.sum());
133109
}
134-
valueCountsBuilder.appendDouble(histogram.valueCount());
135-
zeroThresholdsBuilder.appendDouble(zeroBucket.zeroThreshold());
136-
encodedHistogramsBuilder.appendBytesRef(encodedBytes.bytes().toBytesRef());
110+
zeroThresholdsBuilder.appendDouble(data.zeroThreshold());
111+
encodedHistogramsBuilder.appendBytesRef(data.encodedHistogram());
137112
return this;
138113
}
139114

x-pack/plugin/esql/qa/testFixtures/src/main/java/org/elasticsearch/xpack/esql/WriteableExponentialHistogram.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,14 @@
2828
*/
2929
public class WriteableExponentialHistogram extends AbstractExponentialHistogram implements GenericNamedWriteable {
3030

31-
// TODO(b/133393): as it turns out, this is also required in production. Therefore we have to properly register this class,
32-
// like in https://github.com/elastic/elasticsearch/pull/135054
31+
// TODO: This is currently only used for serializing literals in tests.
32+
// At the time of writing, it is impossible for ExponentialHistogram literals to appear outside of tests
33+
// as there is no conversion function or other means for constructing them except from a block loader.
34+
// For this reason, this class is currently only available and registered for deserialization in tests,
35+
// so that we don't have to care about backwards compatibility.
36+
// If we eventually have to support ExponentialHistogram literals in production code,
37+
// proper testing for this class needs to be added, similar to the following PR:
38+
// https://github.com/elastic/elasticsearch/pull/135054
3339

3440
private static final String WRITEABLE_NAME = "test_exponential_histogram";
3541

0 commit comments

Comments
 (0)