3232public class CompressedExponentialHistogram implements ExponentialHistogram {
3333
3434 private static final int SCALE_OFFSET = 11 ;
35- private static final int HAS_NEGATIVE_BUCKETS_FLAG = 1 << 6 ;
36- private static final int SCALE_MASK = 0x3F ;
35+ private static final int HAS_NEGATIVE_BUCKETS_FLAG = 1 << 6 ; // = 64
36+ private static final int SCALE_MASK = 0x3F ; // = 63
3737 static {
3838 // protection against changes to MIN_SCALE and MAX_SCALE messing with our encoding
3939 assert MIN_SCALE + SCALE_OFFSET >= 0 ;
@@ -271,6 +271,7 @@ public static void writeHistogramBytes(
271271 assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ", " + MAX_SCALE + "]" ;
272272 boolean hasNegativeBuckets = negativeBuckets .isEmpty () == false ;
273273 int scaleWithFlags = (scale + SCALE_OFFSET );
274+ assert scale >= 0 && scale <= SCALE_MASK ;
274275 if (hasNegativeBuckets ) {
275276 scaleWithFlags |= HAS_NEGATIVE_BUCKETS_FLAG ;
276277 }
@@ -285,32 +286,22 @@ public static void writeHistogramBytes(
285286 serializeBuckets (output , positiveBuckets );
286287 }
287288
288- /**
289- * Encodes the given bucket indices and counts as bytes into the given output.
290- * The following scheme is used to maximize compression:
291- * <ul>
292- * <li>if there are no buckets, the result is an empty array ({@code byte[0]})</li>
293- * <li> write the index of the first bucket as ZigZag-VLong</li>
294- * <li> write the count of the first bucket as ZigZag-VLong</li>
295- * <li> for each remaining bucket:
296- * <ul>
297- * <li>if the index of the bucket is exactly {@code previousBucketIndex+1}, write the count for the bucket as ZigZag-VLong</li>
298- * <li>Otherwise there is at least one empty bucket between this one and the previous one.
299- * We compute that number as {@code n=currentBucketIndex-previousIndex-1} and then write {@code -n} out as
300- * ZigZag-VLong followed by the count for the bucket as ZigZag-VLong. The negation is performed to allow to
301- * distinguish the cases when decoding.</li>
302- * </ul>
303- * </li>
304- * </ul>
305- *
306- * This encoding provides a compact storage for both dense and sparse histograms:
307- * For dense histograms it effectively results in encoding the index of the first bucket, followed by just an array of counts.
308- * For sparse histograms it corresponds to an interleaved encoding of the bucket indices with delta compression and the bucket counts.
309- * Even partially dense histograms profit from this encoding.
310- *
311- * @param out the output to write the encoded buckets to
312- * @param buckets the indices and counts of the buckets to encode, must be provided sorted based on the indices.
313- */
289+ // Encodes the given bucket indices and counts as bytes into the given output.
290+ // The following scheme is used to maximize compression:
291+ // - if there are no buckets, the result is an empty array (byte[0])
292+ // - write the index of the first bucket as ZigZag-VLong
293+ // - write the count of the first bucket as ZigZag-VLong
294+ // - for each remaining bucket:
295+ // - if the index of the bucket is exactly previousBucketIndex+1, write the count for the bucket as ZigZag-VLong
296+ // - Otherwise there is at least one empty bucket between this one and the previous one.
297+ // We compute that number as n=currentBucketIndex-previousIndex-1 and then write -n out as
298+ // ZigZag-VLong followed by the count for the bucket as ZigZag-VLong. The negation is performed to allow to
299+ // distinguish the cases when decoding.
300+ //
301+ // This encoding provides a compact storage for both dense and sparse histograms:
302+ // For dense histograms it effectively results in encoding the index of the first bucket, followed by just an array of counts.
303+ // For sparse histograms it corresponds to an interleaved encoding of the bucket indices with delta compression and the bucket counts.
304+ // Even partially dense histograms profit from this encoding.
314305 private static void serializeBuckets (StreamOutput out , List <IndexWithCount > buckets ) throws IOException {
315306 if (buckets .isEmpty ()) {
316307 return ; // no buckets, therefore nothing to write
0 commit comments