Skip to content

Commit ff78a50

Browse files
committed
Review fixes
1 parent 69cd05b commit ff78a50

File tree

1 file changed

+19
-28
lines changed

1 file changed

+19
-28
lines changed

x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java

Lines changed: 19 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@
3232
public class CompressedExponentialHistogram implements ExponentialHistogram {
3333

3434
private static final int SCALE_OFFSET = 11;
35-
private static final int HAS_NEGATIVE_BUCKETS_FLAG = 1 << 6;
36-
private static final int SCALE_MASK = 0x3F;
35+
private static final int HAS_NEGATIVE_BUCKETS_FLAG = 1 << 6; // = 64
36+
private static final int SCALE_MASK = 0x3F; // = 63
3737
static {
3838
// protection against changes to MIN_SCALE and MAX_SCALE messing with our encoding
3939
assert MIN_SCALE + SCALE_OFFSET >= 0;
@@ -271,6 +271,7 @@ public static void writeHistogramBytes(
271271
assert scale >= MIN_SCALE && scale <= MAX_SCALE : "scale must be in range [" + MIN_SCALE + ", " + MAX_SCALE + "]";
272272
boolean hasNegativeBuckets = negativeBuckets.isEmpty() == false;
273273
int scaleWithFlags = (scale + SCALE_OFFSET);
274+
assert scale >= 0 && scale <= SCALE_MASK;
274275
if (hasNegativeBuckets) {
275276
scaleWithFlags |= HAS_NEGATIVE_BUCKETS_FLAG;
276277
}
@@ -285,32 +286,22 @@ public static void writeHistogramBytes(
285286
serializeBuckets(output, positiveBuckets);
286287
}
287288

288-
/**
289-
* Encodes the given bucket indices and counts as bytes into the given output.
290-
* The following scheme is used to maximize compression:
291-
* <ul>
292-
* <li>if there are no buckets, the result is an empty array ({@code byte[0]})</li>
293-
* <li> write the index of the first bucket as ZigZag-VLong</li>
294-
* <li> write the count of the first bucket as ZigZag-VLong</li>
295-
* <li> for each remaining bucket:
296-
* <ul>
297-
* <li>if the index of the bucket is exactly {@code previousBucketIndex+1}, write the count for the bucket as ZigZag-VLong</li>
298-
* <li>Otherwise there is at least one empty bucket between this one and the previous one.
299-
* We compute that number as {@code n=currentBucketIndex-previousIndex-1} and then write {@code -n} out as
300-
* ZigZag-VLong followed by the count for the bucket as ZigZag-VLong. The negation is performed to allow to
301-
* distinguish the cases when decoding.</li>
302-
* </ul>
303-
* </li>
304-
* </ul>
305-
*
306-
* This encoding provides a compact storage for both dense and sparse histograms:
307-
* For dense histograms it effectively results in encoding the index of the first bucket, followed by just an array of counts.
308-
* For sparse histograms it corresponds to an interleaved encoding of the bucket indices with delta compression and the bucket counts.
309-
* Even partially dense histograms profit from this encoding.
310-
*
311-
* @param out the output to write the encoded buckets to
312-
* @param buckets the indices and counts of the buckets to encode, must be provided sorted based on the indices.
313-
*/
289+
// Encodes the given bucket indices and counts as bytes into the given output.
290+
// The following scheme is used to maximize compression:
291+
// - if there are no buckets, the result is an empty array (byte[0])
292+
// - write the index of the first bucket as ZigZag-VLong
293+
// - write the count of the first bucket as ZigZag-VLong
294+
// - for each remaining bucket:
295+
// - if the index of the bucket is exactly previousBucketIndex+1, write the count for the bucket as ZigZag-VLong
296+
// - Otherwise there is at least one empty bucket between this one and the previous one.
297+
// We compute that number as n=currentBucketIndex-previousIndex-1 and then write -n out as
298+
// ZigZag-VLong followed by the count for the bucket as ZigZag-VLong. The negation is performed to allow to
299+
// distinguish the cases when decoding.
300+
//
301+
// This encoding provides a compact storage for both dense and sparse histograms:
302+
// For dense histograms it effectively results in encoding the index of the first bucket, followed by just an array of counts.
303+
// For sparse histograms it corresponds to an interleaved encoding of the bucket indices with delta compression and the bucket counts.
304+
// Even partially dense histograms profit from this encoding.
314305
private static void serializeBuckets(StreamOutput out, List<IndexWithCount> buckets) throws IOException {
315306
if (buckets.isEmpty()) {
316307
return; // no buckets, therefore nothing to write

0 commit comments

Comments
 (0)