From 23e80843569f326c1f2284e5706c390619bf512f Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Wed, 27 Aug 2025 11:06:23 +0200 Subject: [PATCH 01/11] Implement exponential histogram equality --- .../ExponentialHistogram.java | 52 ++++++++++++ .../ExponentialScaleUtils.java | 11 +++ .../exponentialhistogram/ZeroBucket.java | 80 +++++++++++++++---- .../ExponentialHistogramMergerTests.java | 8 +- .../ExponentialHistogramXContentTests.java | 6 +- .../exponentialhistogram/ZeroBucketTests.java | 33 +++++++- .../CompressedExponentialHistogram.java | 2 +- 7 files changed, 164 insertions(+), 28 deletions(-) diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index f4603f3fe679c..2a631b052b107 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -120,6 +120,58 @@ static ExponentialHistogram empty() { return EmptyExponentialHistogram.INSTANCE; } + static boolean equals(ExponentialHistogram a, ExponentialHistogram b) { + if (a == b) { + return true; + } + if (a == null || b == null) { + return false; + } + if (a.scale() != b.scale()) { + return false; + } + if (a.zeroBucket().count() != b.zeroBucket().count()) { + return false; + } + if (Double.compare(a.zeroBucket().zeroThreshold(), b.zeroBucket().zeroThreshold()) != 0) { + return false; + } + if (a.positiveBuckets().valueCount() != b.positiveBuckets().valueCount()) { + return false; + } + if (a.negativeBuckets().valueCount() != b.negativeBuckets().valueCount()) { + return false; + } + BucketIterator itA = a.positiveBuckets().iterator(); + BucketIterator itB = b.positiveBuckets().iterator(); + while (itA.hasNext() && itB.hasNext()) { + if (itA.peekIndex() != itB.peekIndex()) { + return false; + } + if (itA.peekCount() != itB.peekCount()) { + return false; + } + itA.advance(); + itB.advance(); + } + if (itA.hasNext() || itB.hasNext()) { + return false; + } + itA = a.negativeBuckets().iterator(); + itB = b.negativeBuckets().iterator(); + while (itA.hasNext() && itB.hasNext()) { + if (itA.peekIndex() != itB.peekIndex()) { + return false; + } + if (itA.peekCount() != itB.peekCount()) { + return false; + } + itA.advance(); + itB.advance(); + } + return itA.hasNext() == false && itB.hasNext() == false; + } + /** * Creates a histogram representing the distribution of the given values with at most the given number of buckets. * If the given {@code maxBucketCount} is greater than or equal to the number of values, the resulting histogram will have a diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java index a9a93cf023369..b2df7a28bafcf 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialScaleUtils.java @@ -185,6 +185,17 @@ public static int getMaximumScaleIncrease(long index) { return Long.numberOfLeadingZeros(index) - (64 - MAX_INDEX_BITS); } + + /** + * Returns a scale at to which the given index can be scaled down without changing the exponentially scaled number it represents. + * @param index the index of the number + * @param scale the current scale of the number + * @return the new scale + */ + static int normalizeScale(long index, int scale) { + return Math.max(MIN_SCALE, scale - Long.numberOfTrailingZeros(index)); + } + /** * Returns the upper boundary of the bucket with the given index and scale. * diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 6a9d24e87c0e1..86d95573769a3 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -27,9 +27,11 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.compareExponentiallyScaledValues; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.exponentiallyScaledToDoubleValue; +import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.normalizeScale; /** * Represents the bucket for values around zero in an exponential histogram. @@ -62,13 +64,7 @@ public final class ZeroBucket { // A singleton for an empty zero bucket with the smallest possible threshold. private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, MIN_SCALE, 0); - /** - * Creates a new zero bucket with a specific threshold and count. - * - * @param zeroThreshold The threshold defining the bucket's range [-zeroThreshold, +zeroThreshold]. - * @param count The number of values in the bucket. - */ - public ZeroBucket(double zeroThreshold, long count) { + private ZeroBucket(double zeroThreshold, long count) { assert zeroThreshold >= 0.0 : "zeroThreshold must not be negative"; this.index = Long.MAX_VALUE; // compute lazily when needed this.scale = MAX_SCALE; @@ -85,11 +81,11 @@ private ZeroBucket(long index, int scale, long count) { this.count = count; } - private ZeroBucket(double realThreshold, long index, int scale, long count) { - this.realThreshold = realThreshold; - this.index = index; - this.scale = scale; - this.count = count; + private ZeroBucket(ZeroBucket toCopy, long newCount) { + this.realThreshold = toCopy.realThreshold; + this.index = toCopy.index; + this.scale = toCopy.scale; + this.count = newCount; } /** @@ -109,8 +105,37 @@ public static ZeroBucket minimalWithCount(long count) { if (count == 0) { return MINIMAL_EMPTY; } else { - return new ZeroBucket(MINIMAL_EMPTY.zeroThreshold(), MINIMAL_EMPTY.index(), MINIMAL_EMPTY.scale(), count); + return new ZeroBucket(MINIMAL_EMPTY, count); + } + } + + /** + * Creates a zero bucket from the given threshold represented as double. + * + * @param zeroThreshold the zero threshold defining the bucket range [-zeroThreshold, +zeroThreshold], must be non-negative + * @param count the number of values in the bucket + * @return the new {@link ZeroBucket} + */ + public static ZeroBucket create(double zeroThreshold, long count) { + if (zeroThreshold == 0) { + return minimalWithCount(count); } + return new ZeroBucket(zeroThreshold, count); + } + + /** + * Creates a zero bucket from the given threshold represented as exponentially scaled number. + * + * @param index the index of the exponentially scaled number defining the zero threshold + * @param scale the corresponding scale for the index + * @param count the number of values in the bucket + * @return the new {@link ZeroBucket} + */ + public static ZeroBucket create(long index, int scale, long count) { + if (index == MINIMAL_EMPTY.index && scale == MINIMAL_EMPTY.scale) { + return minimalWithCount(count); + } + return new ZeroBucket(index, scale, count); } /** @@ -158,9 +183,9 @@ public ZeroBucket merge(ZeroBucket other) { long totalCount = count + other.count; // Both are populated, so we need to use the higher zero-threshold. if (this.compareZeroThreshold(other) >= 0) { - return new ZeroBucket(realThreshold, index, scale, totalCount); + return new ZeroBucket(this, totalCount); } else { - return new ZeroBucket(other.realThreshold, other.index, other.scale, totalCount); + return new ZeroBucket(other, totalCount); } } } @@ -219,10 +244,33 @@ public ZeroBucket collapseOverlappingBuckets(BucketIterator buckets) { long collapsedUpperBoundIndex = highestCollapsedIndex + 1; if (compareExponentiallyScaledValues(index(), scale(), collapsedUpperBoundIndex, buckets.scale()) >= 0) { // Our current zero-threshold is larger than the upper boundary of the largest collapsed bucket, so we keep it. - return new ZeroBucket(realThreshold, index, scale, newZeroCount); + return new ZeroBucket(this, newZeroCount); } else { return new ZeroBucket(collapsedUpperBoundIndex, buckets.scale(), newZeroCount); } } } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + ZeroBucket that = (ZeroBucket) o; + if (count() != that.count()) return false; + if (Double.compare(zeroThreshold(), that.zeroThreshold()) != 0) return false; + if (compareExponentiallyScaledValues(index(), scale(), that.index(), that.scale()) != 0) return false; + return true; + } + + @Override + public int hashCode() { + int normalizedScale = normalizeScale(index(), scale); + int scaleAdjustment = normalizedScale - scale; + long normalizedIndex = adjustScale(index(), scale, scaleAdjustment); + + int result = normalizedScale; + result = 31 * result + Long.hashCode(normalizedIndex); + result = 31 * result + Double.hashCode(zeroThreshold()); + result = 31 * result + Long.hashCode(count); + return result; + } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 98eca3fe94100..efd6652d6d5b7 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -43,7 +43,7 @@ public class ExponentialHistogramMergerTests extends ExponentialHistogramTestCas public void testZeroThresholdCollapsesOverlappingBuckets() { FixedCapacityExponentialHistogram first = createAutoReleasedHistogram(100); - first.setZeroBucket(new ZeroBucket(2.0001, 10)); + first.setZeroBucket(ZeroBucket.create(2.0001, 10)); FixedCapacityExponentialHistogram second = createAutoReleasedHistogram(100); first.resetBuckets(0); // scale 0 means base 2 @@ -76,7 +76,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { // ensure buckets of the accumulated histogram are collapsed too if needed FixedCapacityExponentialHistogram third = createAutoReleasedHistogram(100); - third.setZeroBucket(new ZeroBucket(45.0, 1)); + third.setZeroBucket(ZeroBucket.create(45.0, 1)); mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); assertThat(mergeResult.zeroBucket().zeroThreshold(), closeTo(45.0, 0.000001)); @@ -87,12 +87,12 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { public void testEmptyZeroBucketIgnored() { FixedCapacityExponentialHistogram first = createAutoReleasedHistogram(100); - first.setZeroBucket(new ZeroBucket(2.0, 10)); + first.setZeroBucket(ZeroBucket.create(2.0, 10)); first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(2, 42L, true); // bucket (4, 8] FixedCapacityExponentialHistogram second = createAutoReleasedHistogram(100); - second.setZeroBucket(new ZeroBucket(100.0, 0)); + second.setZeroBucket(ZeroBucket.create(100.0, 0)); ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java index f0ccfba098e20..8731c52a49e04 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java @@ -38,7 +38,7 @@ public void testEmptyHistogram() { public void testFullHistogram() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100); - histo.setZeroBucket(new ZeroBucket(0.1234, 42)); + histo.setZeroBucket(ZeroBucket.create(0.1234, 42)); histo.resetBuckets(7); histo.tryAddBucket(-10, 15, false); histo.tryAddBucket(10, 5, false); @@ -59,14 +59,14 @@ public void testFullHistogram() { public void testOnlyZeroThreshold() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); - histo.setZeroBucket(new ZeroBucket(5.0, 0)); + histo.setZeroBucket(ZeroBucket.create(5.0, 0)); histo.resetBuckets(3); assertThat(toJson(histo), equalTo("{\"scale\":3,\"zero\":{\"threshold\":5.0}}")); } public void testOnlyZeroCount() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); - histo.setZeroBucket(new ZeroBucket(0.0, 7)); + histo.setZeroBucket(ZeroBucket.create(0.0, 7)); histo.resetBuckets(2); assertThat(toJson(histo), equalTo("{\"scale\":2,\"zero\":{\"count\":7}}")); } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java index 1d2cbd57604ab..361a53badb94d 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java @@ -22,6 +22,7 @@ package org.elasticsearch.exponentialhistogram; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; public class ZeroBucketTests extends ExponentialHistogramTestCase { @@ -30,13 +31,13 @@ public void testMinimalBucketHasZeroThreshold() { } public void testExactThresholdPreserved() { - ZeroBucket bucket = new ZeroBucket(3.0, 10); + ZeroBucket bucket = ZeroBucket.create(3.0, 10); assertThat(bucket.zeroThreshold(), equalTo(3.0)); } public void testMergingPreservesExactThreshold() { - ZeroBucket bucketA = new ZeroBucket(3.0, 10); - ZeroBucket bucketB = new ZeroBucket(3.5, 20); + ZeroBucket bucketA = ZeroBucket.create(3.0, 10); + ZeroBucket bucketB = ZeroBucket.create(3.5, 20); ZeroBucket merged = bucketA.merge(bucketB); assertThat(merged.zeroThreshold(), equalTo(3.5)); assertThat(merged.count(), equalTo(30L)); @@ -47,7 +48,7 @@ public void testBucketCollapsingPreservesExactThreshold() { histo.resetBuckets(0); histo.tryAddBucket(0, 42, true); // bucket [1,2] - ZeroBucket bucketA = new ZeroBucket(3.0, 10); + ZeroBucket bucketA = ZeroBucket.create(3.0, 10); CopyableBucketIterator iterator = histo.positiveBuckets().iterator(); ZeroBucket merged = bucketA.collapseOverlappingBuckets(iterator); @@ -57,4 +58,28 @@ public void testBucketCollapsingPreservesExactThreshold() { assertThat(merged.count(), equalTo(52L)); } + public void testHashCodeEquality() { + assertEqualsContract(ZeroBucket.minimalEmpty(), ZeroBucket.create(0.0, 0)); + assertThat(ZeroBucket.minimalEmpty(), not(equalTo(ZeroBucket.create(0.0, 1)))); + assertThat(ZeroBucket.minimalEmpty(), not(equalTo(ZeroBucket.create(0.1, 0)))); + + assertEqualsContract(ZeroBucket.minimalWithCount(42), ZeroBucket.create(0.0, 42)); + assertThat(ZeroBucket.minimalWithCount(42), not(equalTo(ZeroBucket.create(0.0, 12)))); + assertThat(ZeroBucket.minimalWithCount(42), not(equalTo(ZeroBucket.create(0.1, 42)))); + + ZeroBucket minimalEmpty = ZeroBucket.minimalEmpty(); + assertEqualsContract(ZeroBucket.minimalWithCount(42), ZeroBucket.create(minimalEmpty.index(), minimalEmpty.scale(), 42)); + assertThat(ZeroBucket.minimalWithCount(42), not(equalTo(ZeroBucket.create(minimalEmpty.index(), minimalEmpty.scale(), 41)))); + assertThat(ZeroBucket.minimalWithCount(42), not(equalTo(ZeroBucket.create(minimalEmpty.index() + 1, minimalEmpty.scale(), 42)))); + + assertEqualsContract(ZeroBucket.create(123.56, 123), ZeroBucket.create(123.56, 123)); + assertThat(ZeroBucket.create(123.56, 123), not(equalTo(ZeroBucket.create(123.57, 123)))); + assertThat(ZeroBucket.create(123.56, 123), not(equalTo(ZeroBucket.create(123.56, 12)))); + } + + void assertEqualsContract(ZeroBucket a, ZeroBucket b) { + assertThat(a, equalTo(b)); + assertThat(a.hashCode(), equalTo(b.hashCode())); + } + } diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java index 8a2e348568bbc..38144cad1fe25 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java @@ -48,7 +48,7 @@ public int scale() { public ZeroBucket zeroBucket() { if (lazyZeroBucket == null) { long zeroCount = valueCount - negativeBuckets.valueCount() - positiveBuckets.valueCount(); - lazyZeroBucket = new ZeroBucket(zeroThreshold, zeroCount); + lazyZeroBucket = ZeroBucket.create(zeroThreshold, zeroCount); } return lazyZeroBucket; } From 0723de6ecba91cc54ecb466b10c7e7a2fe5a66b8 Mon Sep 17 00:00:00 2001 From: Jonas Kunz Date: Wed, 27 Aug 2025 16:10:59 +0200 Subject: [PATCH 02/11] Add tests, fix implementation --- .../EmptyExponentialHistogram.java | 2 +- .../ExponentialHistogram.java | 116 +++++------- .../ExponentialHistogramUtils.java | 24 +++ .../ExponentialScaleUtils.java | 1 - .../FixedCapacityExponentialHistogram.java | 2 +- .../ReleasableExponentialHistogram.java | 4 +- .../ExponentialHistogramEqualityTests.java | 172 ++++++++++++++++++ .../CompressedExponentialHistogram.java | 2 +- 8 files changed, 249 insertions(+), 74 deletions(-) create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramEqualityTests.java diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java index eb0be8bc9262d..40d016504c7c2 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java @@ -23,7 +23,7 @@ import java.util.OptionalLong; -class EmptyExponentialHistogram implements ReleasableExponentialHistogram { +class EmptyExponentialHistogram extends ReleasableExponentialHistogram { static final EmptyExponentialHistogram INSTANCE = new EmptyExponentialHistogram(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index eafed3a827e67..221972b3263c4 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -27,10 +27,13 @@ import java.util.List; import java.util.OptionalLong; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.bucketIteratorHash; +import static org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils.bucketIteratorsEqual; + /** - * Interface for implementations of exponential histograms adhering to the + * Base class for implementations of exponential histograms adhering to the * OpenTelemetry definition. - * This interface supports sparse implementations, allowing iteration over buckets without requiring direct index access.
+ * This class supports sparse implementations, allowing iteration over buckets without requiring direct index access.
* The most important properties are: *