88package org .elasticsearch .compute .data ;
99
1010import org .apache .lucene .util .BytesRef ;
11+ import org .elasticsearch .common .io .stream .BytesStreamOutput ;
1112import org .elasticsearch .common .io .stream .StreamOutput ;
1213import org .elasticsearch .common .unit .ByteSizeValue ;
1314import org .elasticsearch .core .ReleasableIterator ;
1415import org .elasticsearch .core .Releasables ;
16+ import org .elasticsearch .exponentialhistogram .CompressedExponentialHistogram ;
1517import org .elasticsearch .exponentialhistogram .ExponentialHistogram ;
18+ import org .elasticsearch .exponentialhistogram .ZeroBucket ;
1619
1720import java .io .IOException ;
1821import java .util .List ;
@@ -106,6 +109,45 @@ private List<Block> getSubBlocks() {
106109 return List .of (sums , valueCounts , zeroThresholds , encodedHistograms , minima , maxima );
107110 }
108111
112+ public static EncodedHistogramData encode (ExponentialHistogram histogram ) {
113+ assert histogram != null ;
114+ // TODO: check and potentially improve performance and correctness before moving out of tech-preview
115+ // The current implementation encodes the histogram into the format we use for storage on disk
116+ // This format is optimized for minimal memory usage at the cost of encoding speed
117+ // In addition, it only support storing the zero threshold as a double value, which is lossy when merging histograms
118+ // In practice this currently occurs, as the zero threshold is usually 0.0 and not impacted by merges
119+ // And even if it occurs, the error is usually tiny
120+ // We should add a dedicated encoding when building a block from computed histograms which do not originate from doc values
121+ // That encoding should be optimized for speed and support storing the zero threshold as (scale, index) pair
122+ ZeroBucket zeroBucket = histogram .zeroBucket ();
123+ BytesStreamOutput encodedBytes = new BytesStreamOutput ();
124+ try {
125+ CompressedExponentialHistogram .writeHistogramBytes (
126+ encodedBytes ,
127+ histogram .scale (),
128+ histogram .negativeBuckets ().iterator (),
129+ histogram .positiveBuckets ().iterator ()
130+ );
131+ } catch (IOException e ) {
132+ throw new RuntimeException ("Failed to encode histogram" , e );
133+ }
134+ double sum ;
135+ if (histogram .valueCount () == 0 ) {
136+ assert histogram .sum () == 0.0 : "Empty histogram should have sum 0.0 but was " + histogram .sum ();
137+ sum = Double .NaN ; // we store null/NaN for empty histograms to ensure avg is null/0.0 instead of 0.0/0.0
138+ } else {
139+ sum = histogram .sum ();
140+ }
141+ return new EncodedHistogramData (
142+ histogram .valueCount (),
143+ sum ,
144+ histogram .min (),
145+ histogram .max (),
146+ zeroBucket .zeroThreshold (),
147+ encodedBytes .bytes ().toBytesRef ()
148+ );
149+ }
150+
109151 @ Override
110152 public ExponentialHistogram getExponentialHistogram (int valueIndex , ExponentialHistogramScratch scratch ) {
111153 assert isNull (valueIndex ) == false : "tried to get histogram at null position " + valueIndex ;
@@ -125,6 +167,43 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH
125167 }
126168 }
127169
170+ public static ExponentialHistogramBlock createConstant (ExponentialHistogram histogram , int positionCount , BlockFactory blockFactory ) {
171+ EncodedHistogramData data = encode (histogram );
172+ DoubleBlock minBlock = null ;
173+ DoubleBlock maxBlock = null ;
174+ DoubleBlock sumBlock = null ;
175+ DoubleBlock countBlock = null ;
176+ DoubleBlock zeroThresholdBlock = null ;
177+ BytesRefBlock encodedHistogramBlock = null ;
178+ boolean success = false ;
179+ try {
180+ countBlock = blockFactory .newConstantDoubleBlockWith (data .count , positionCount );
181+ if (Double .isNaN (data .min )) {
182+ minBlock = (DoubleBlock ) blockFactory .newConstantNullBlock (positionCount );
183+ } else {
184+ minBlock = blockFactory .newConstantDoubleBlockWith (data .min , positionCount );
185+ }
186+ if (Double .isNaN (data .max )) {
187+ maxBlock = (DoubleBlock ) blockFactory .newConstantNullBlock (positionCount );
188+ } else {
189+ maxBlock = blockFactory .newConstantDoubleBlockWith (data .max , positionCount );
190+ }
191+ if (Double .isNaN (data .sum )) {
192+ sumBlock = (DoubleBlock ) blockFactory .newConstantNullBlock (positionCount );
193+ } else {
194+ sumBlock = blockFactory .newConstantDoubleBlockWith (data .sum , positionCount );
195+ }
196+ zeroThresholdBlock = blockFactory .newConstantDoubleBlockWith (data .zeroThreshold , positionCount );
197+ encodedHistogramBlock = blockFactory .newConstantBytesRefBlockWith (data .encodedHistogram , positionCount );
198+ success = true ;
199+ return new ExponentialHistogramArrayBlock (minBlock , maxBlock , sumBlock , countBlock , zeroThresholdBlock , encodedHistogramBlock );
200+ } finally {
201+ if (success == false ) {
202+ Releasables .close (minBlock , maxBlock , sumBlock , countBlock , zeroThresholdBlock , encodedHistogramBlock );
203+ }
204+ }
205+ }
206+
128207 @ Override
129208 public Block buildExponentialHistogramComponentBlock (Component component ) {
130209 // as soon as we support multi-values, we need to implement this differently,
@@ -438,4 +517,6 @@ public int hashCode() {
438517 // this ensures proper equality with null blocks and should be unique enough for practical purposes
439518 return encodedHistograms .hashCode ();
440519 }
520+
521+ record EncodedHistogramData (double count , double sum , double min , double max , double zeroThreshold , BytesRef encodedHistogram ) {}
441522}
0 commit comments