|
19 | 19 |
|
20 | 20 | final class ExponentialHistogramArrayBlock extends AbstractNonThreadSafeRefCounted implements ExponentialHistogramBlock { |
21 | 21 |
|
| 22 | + // Exponential histograms consist of several components that we store in separate blocks |
| 23 | + // due to (a) better compression in the field mapper for disk storage and (b) faster computations if only one sub-component is needed |
| 24 | + // What are the semantics of positions, multi-value counts and nulls in the exponential histogram block and |
| 25 | + // how do they relate to the sub-blocks? |
| 26 | + // ExponentialHistogramBlock need to adhere to the contract of Blocks for the access patterns: |
| 27 | + // |
| 28 | + // for (int position = 0; position < block.getPositionCount(); position++) { |
| 29 | + // ...int valueCount = block.getValueCount(position); |
| 30 | + // ...for (int valueIndex = 0; valueIndex < valueCount; valueIndex++) { |
| 31 | + // ......ExponentialHistogram histo = block.getExponentialHistogram(valueIndex, scratch); |
| 32 | + // ...} |
| 33 | + // } |
| 34 | + // |
| 35 | + // That implies that given only a value-index, we need to be able to retrieve all components of the histogram. |
| 36 | + // Because we can't make any assumptions on how value indices are laid out in the sub-blocks for multi-values, |
| 37 | + // we enforce that the sub-blocks have at most one value per position (i.e., no multi-values). |
| 38 | + // Based on this, we can define the valueIndex for ExponentialHistogramArrayBlock to correspond to positions in the sub-blocks. |
| 39 | + // So basically the sub-blocks are the "flattened" components of the histograms. |
| 40 | + // If we later add multi-value support to ExponentialHistogramArrayBlock, |
| 41 | + // we can't use the multi-value support of the sub-blocks to implement that. |
| 42 | + // Instead, we need to maintain a firstValueIndex array ourselves in ExponentialHistogramArrayBlock. |
| 43 | + |
22 | 44 | private final DoubleBlock minima; |
23 | 45 | private final DoubleBlock maxima; |
24 | 46 | private final DoubleBlock sums; |
| 47 | + /** |
| 48 | + Holds the number of values in each histogram. Note that this is a different concept from getValueCount(position)! |
| 49 | + */ |
25 | 50 | private final LongBlock valueCounts; |
26 | 51 | private final DoubleBlock zeroThresholds; |
27 | 52 | private final BytesRefBlock encodedHistograms; |
@@ -96,6 +121,8 @@ public ExponentialHistogram getExponentialHistogram(int valueIndex, ExponentialH |
96 | 121 |
|
97 | 122 | @Override |
98 | 123 | public void serializeExponentialHistogram(int valueIndex, SerializedOutput out, BytesRef scratch) { |
| 124 | + // not that this value count is different from getValueCount(position)! |
| 125 | + // this value count represents the number of individual samples the histogram was computed for |
99 | 126 | long valueCount = valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex)); |
100 | 127 | out.appendLong(valueCounts.getLong(valueCounts.getFirstValueIndex(valueIndex))); |
101 | 128 | out.appendDouble(sums.getDouble(sums.getFirstValueIndex(valueIndex))); |
|
0 commit comments