diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java index 0f7ee6a2c763e..d1fcf0fb28464 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -130,7 +130,7 @@ private ExponentialHistogram asCompressedHistogram(ExponentialHistogram histogra CompressedExponentialHistogram.writeHistogramBytes(histoBytes, histogram.scale(), negativeBuckets, positiveBuckets); CompressedExponentialHistogram result = new CompressedExponentialHistogram(); BytesRef data = histoBytes.bytes().toBytesRef(); - result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, data); + result.reset(histogram.zeroBucket().zeroThreshold(), totalCount, histogram.sum(), data); return result; } catch (IOException e) { throw new RuntimeException(e); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java index f2d21f0861065..eb0be8bc9262d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java @@ -77,6 +77,11 @@ public Buckets negativeBuckets() { return EmptyBuckets.INSTANCE; } + @Override + public double sum() { + return 0; + } + @Override public long ramBytesUsed() { return 0; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index f4603f3fe679c..3abfdddcf3f71 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -47,7 +47,7 @@ */ public interface ExponentialHistogram extends Accountable { - // TODO(b/128622): support min/max/sum/count storage and merging. + // TODO(b/128622): support min/max storage and merging. // TODO(b/128622): Add special positive and negative infinity buckets // to allow representation of explicit bucket histograms with open boundaries. @@ -93,6 +93,15 @@ public interface ExponentialHistogram extends Accountable { */ Buckets negativeBuckets(); + /** + * Returns the sum of all values represented by this histogram. + * Note that even if histograms are cumulative, the sum is not guaranteed to be monotonically increasing, + * because histograms support negative values. + * + * @return the sum, guaranteed to be zero for empty histograms + */ + double sum(); + /** * Represents a bucket range of an {@link ExponentialHistogram}, either the positive or the negative range. */ diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index ef4e7f57dba44..29a28db64175d 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -123,6 +123,7 @@ private void mergeValuesToHistogram() { } valueBuffer.reset(); + valueBuffer.setSum(rawValuesSum()); int scale = valueBuffer.scale(); // Buckets must be provided with their indices in ascending order. @@ -161,6 +162,14 @@ private void mergeValuesToHistogram() { valueCount = 0; } + private double rawValuesSum() { + double sum = 0; + for (int i = 0; i < valueCount; i++) { + sum += rawValueBuffer[i]; + } + return sum; + } + private static long estimateBaseSize(int numBuckets) { return SHALLOW_SIZE + RamEstimationUtil.estimateDoubleArray(numBuckets); }; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index a92fb376abc9a..ba71f90bf923e 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -150,6 +150,7 @@ public void add(ExponentialHistogram toAdd) { buffer = FixedCapacityExponentialHistogram.create(bucketLimit, circuitBreaker); } buffer.setZeroBucket(zeroBucket); + buffer.setSum(a.sum() + b.sum()); // We attempt to bring everything to the scale of A. // This might involve increasing the scale for B, which would increase its indices. diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java new file mode 100644 index 0000000000000..5359b3c41da15 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtils.java @@ -0,0 +1,62 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +public class ExponentialHistogramUtils { + + /** + * Estimates the sum of all values of a histogram just based on the populated buckets. + * Will never return NaN, but might return +/-Infinity if the histogram is too big. + * + * @param negativeBuckets the negative buckets of the histogram + * @param positiveBuckets the positive buckets of the histogram + * @return the estimated sum of all values in the histogram, guaranteed to be zero if there are no buckets. + */ + public static double estimateSum(BucketIterator negativeBuckets, BucketIterator positiveBuckets) { + assert negativeBuckets.scale() == positiveBuckets.scale(); + + // for each bucket index, sum up the counts, but account for the positive/negative sign + BucketIterator it = new MergingBucketIterator( + positiveBuckets, + negativeBuckets, + positiveBuckets.scale(), + (positiveCount, negativeCount) -> positiveCount - negativeCount + ); + double sum = 0.0; + while (it.hasNext()) { + long countWithSign = it.peekCount(); + double bucketMidPoint = ExponentialScaleUtils.getPointOfLeastRelativeError(it.peekIndex(), it.scale()); + if (countWithSign != 0) { // avoid 0 * INFINITY = NaN + double toAdd = bucketMidPoint * countWithSign; + if (Double.isFinite(toAdd)) { + sum += toAdd; + } else { + // Avoid NaN in case we end up with e.g. -Infinity+Infinity + // we consider the bucket with the bigger index the winner for the sign + sum = toAdd; + } + } + it.advance(); + } + return sum; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java index 7fb066bc325f5..8f0995dca6e9c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContent.java @@ -31,6 +31,7 @@ public class ExponentialHistogramXContent { public static final String SCALE_FIELD = "scale"; + public static final String SUM_FIELD = "sum"; public static final String ZERO_FIELD = "zero"; public static final String ZERO_COUNT_FIELD = "count"; public static final String ZERO_THRESHOLD_FIELD = "threshold"; @@ -49,6 +50,7 @@ public static void serialize(XContentBuilder builder, ExponentialHistogram histo builder.startObject(); builder.field(SCALE_FIELD, histogram.scale()); + builder.field(SUM_FIELD, histogram.sum()); double zeroThreshold = histogram.zeroBucket().zeroThreshold(); long zeroCount = histogram.zeroBucket().count(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 255206cad8e81..29fd8f01520a2 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -53,6 +53,8 @@ final class FixedCapacityExponentialHistogram implements ReleasableExponentialHi private final Buckets positiveBuckets = new Buckets(true); + private double sum; + private final ExponentialHistogramCircuitBreaker circuitBreaker; private boolean closed = false; @@ -78,6 +80,7 @@ private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogr * Resets this histogram to the same state as a newly constructed one with the same capacity. */ void reset() { + sum = 0; setZeroBucket(ZeroBucket.minimalEmpty()); resetBuckets(MAX_SCALE); } @@ -110,6 +113,15 @@ void setZeroBucket(ZeroBucket zeroBucket) { this.zeroBucket = zeroBucket; } + @Override + public double sum() { + return sum; + } + + void setSum(double sum) { + this.sum = sum; + } + /** * Attempts to add a bucket to the positive or negative range of this histogram. *
diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java index ca13901cd95e9..6ba293d2b11cb 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/MergingBucketIterator.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import java.util.function.LongBinaryOperator; + /** * An iterator that merges two bucket iterators, aligning them to a common scale and combining buckets with the same index. */ @@ -33,6 +35,8 @@ final class MergingBucketIterator implements BucketIterator { private long currentIndex; private long currentCount; + private final LongBinaryOperator countMergeOperator; + /** * Creates a new merging iterator. * @@ -41,8 +45,22 @@ final class MergingBucketIterator implements BucketIterator { * @param targetScale the histogram scale to which both iterators should be aligned */ MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale) { + this(itA, itB, targetScale, Long::sum); + } + + /** + * Creates a new merging iterator, using the provided operator to merge the counts. + * Note that the resulting count can be negative if the operator produces negative results. + * + * @param itA the first iterator to merge + * @param itB the second iterator to merge + * @param countMergeOperator the operator to use to merge counts of buckets with the same index + * @param targetScale the histogram scale to which both iterators should be aligned + */ + MergingBucketIterator(BucketIterator itA, BucketIterator itB, int targetScale, LongBinaryOperator countMergeOperator) { this.itA = new ScaleAdjustingBucketIterator(itA, targetScale); this.itB = new ScaleAdjustingBucketIterator(itB, targetScale); + this.countMergeOperator = countMergeOperator; endReached = false; advance(); } @@ -64,19 +82,21 @@ public void advance() { idxB = itB.peekIndex(); } - currentCount = 0; boolean advanceA = hasNextA && (hasNextB == false || idxA <= idxB); boolean advanceB = hasNextB && (hasNextA == false || idxB <= idxA); + long countA = 0; + long countB = 0; if (advanceA) { currentIndex = idxA; - currentCount += itA.peekCount(); + countA = itA.peekCount(); itA.advance(); } if (advanceB) { currentIndex = idxB; - currentCount += itB.peekCount(); + countB = itB.peekCount(); itB.advance(); } + currentCount = countMergeOperator.applyAsLong(countA, countB); } @Override @@ -106,4 +126,5 @@ private void assertEndNotReached() { throw new IllegalStateException("Iterator has no more buckets"); } } + } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 98eca3fe94100..7800270811054 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -106,6 +106,22 @@ public void testEmptyZeroBucketIgnored() { assertThat(posBuckets.hasNext(), equalTo(false)); } + public void testSumCorrectness() { + double[] firstValues = randomDoubles(100).map(val -> val * 2 - 1).toArray(); + double[] secondValues = randomDoubles(50).map(val -> val * 2 - 1).toArray(); + double correctSum = Arrays.stream(firstValues).sum() + Arrays.stream(secondValues).sum(); + try ( + ReleasableExponentialHistogram merged = ExponentialHistogram.merge( + 2, + breaker(), + createAutoReleasedHistogram(10, firstValues), + createAutoReleasedHistogram(20, secondValues) + ) + ) { + assertThat(merged.sum(), closeTo(correctSum, 0.000001)); + } + } + public void testUpscalingDoesNotExceedIndexLimits() { for (int i = 0; i < 4; i++) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java new file mode 100644 index 0000000000000..96d9d40a37208 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramUtilsTests.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import static org.hamcrest.Matchers.closeTo; +import static org.hamcrest.Matchers.equalTo; + +public class ExponentialHistogramUtilsTests extends ExponentialHistogramTestCase { + + public void testRandomDataSumEstimation() { + for (int i = 0; i < 100; i++) { + int valueCount = randomIntBetween(100, 10_000); + int bucketCount = randomIntBetween(2, 500); + + double correctSum = 0; + double sign = randomBoolean() ? 1 : -1; + double[] values = new double[valueCount]; + for (int j = 0; j < valueCount; j++) { + values[j] = sign * Math.pow(10, randomIntBetween(1, 9)) * randomDouble(); + correctSum += values[j]; + } + + ExponentialHistogram histo = createAutoReleasedHistogram(bucketCount, values); + + double estimatedSum = ExponentialHistogramUtils.estimateSum( + histo.negativeBuckets().iterator(), + histo.positiveBuckets().iterator() + ); + + double correctAverage = correctSum / valueCount; + double estimatedAverage = estimatedSum / valueCount; + + // If the histogram does not contain mixed sign values, we have a guaranteed relative error bound of 2^(2^-scale) - 1 + double histogramBase = Math.pow(2, Math.pow(2, -histo.scale())); + double allowedError = Math.abs(correctAverage * (histogramBase - 1)); + + assertThat(estimatedAverage, closeTo(correctAverage, allowedError)); + } + } + + public void testInfinityHandling() { + FixedCapacityExponentialHistogram morePositiveValues = createAutoReleasedHistogram(100); + morePositiveValues.resetBuckets(0); + morePositiveValues.tryAddBucket(1999, 1, false); + morePositiveValues.tryAddBucket(2000, 2, false); + morePositiveValues.tryAddBucket(1999, 2, true); + morePositiveValues.tryAddBucket(2000, 2, true); + + double sum = ExponentialHistogramUtils.estimateSum( + morePositiveValues.negativeBuckets().iterator(), + morePositiveValues.positiveBuckets().iterator() + ); + assertThat(sum, equalTo(Double.POSITIVE_INFINITY)); + FixedCapacityExponentialHistogram moreNegativeValues = createAutoReleasedHistogram(100); + moreNegativeValues.resetBuckets(0); + moreNegativeValues.tryAddBucket(1999, 2, false); + moreNegativeValues.tryAddBucket(2000, 2, false); + moreNegativeValues.tryAddBucket(1999, 1, true); + moreNegativeValues.tryAddBucket(2000, 2, true); + + sum = ExponentialHistogramUtils.estimateSum( + moreNegativeValues.negativeBuckets().iterator(), + moreNegativeValues.positiveBuckets().iterator() + ); + assertThat(sum, equalTo(Double.NEGATIVE_INFINITY)); + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java index f0ccfba098e20..11c226eac433a 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramXContentTests.java @@ -33,13 +33,14 @@ public class ExponentialHistogramXContentTests extends ExponentialHistogramTestC public void testEmptyHistogram() { ExponentialHistogram emptyHistogram = ExponentialHistogram.empty(); - assertThat(toJson(emptyHistogram), equalTo("{\"scale\":" + emptyHistogram.scale() + "}")); + assertThat(toJson(emptyHistogram), equalTo("{\"scale\":" + emptyHistogram.scale() + ",\"sum\":0.0}")); } public void testFullHistogram() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(100); histo.setZeroBucket(new ZeroBucket(0.1234, 42)); histo.resetBuckets(7); + histo.setSum(1234.56); histo.tryAddBucket(-10, 15, false); histo.tryAddBucket(10, 5, false); histo.tryAddBucket(-11, 10, true); @@ -49,6 +50,7 @@ public void testFullHistogram() { equalTo( "{" + "\"scale\":7," + + "\"sum\":1234.56," + "\"zero\":{\"count\":42,\"threshold\":0.1234}," + "\"positive\":{\"indices\":[-11,11],\"counts\":[10,20]}," + "\"negative\":{\"indices\":[-10,10],\"counts\":[15,5]}" @@ -61,30 +63,34 @@ public void testOnlyZeroThreshold() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.setZeroBucket(new ZeroBucket(5.0, 0)); histo.resetBuckets(3); - assertThat(toJson(histo), equalTo("{\"scale\":3,\"zero\":{\"threshold\":5.0}}")); + histo.setSum(1.1); + assertThat(toJson(histo), equalTo("{\"scale\":3,\"sum\":1.1,\"zero\":{\"threshold\":5.0}}")); } public void testOnlyZeroCount() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.setZeroBucket(new ZeroBucket(0.0, 7)); histo.resetBuckets(2); - assertThat(toJson(histo), equalTo("{\"scale\":2,\"zero\":{\"count\":7}}")); + histo.setSum(1.1); + assertThat(toJson(histo), equalTo("{\"scale\":2,\"sum\":1.1,\"zero\":{\"count\":7}}")); } public void testOnlyPositiveBuckets() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.resetBuckets(4); + histo.setSum(1.1); histo.tryAddBucket(-1, 3, true); histo.tryAddBucket(2, 5, true); - assertThat(toJson(histo), equalTo("{\"scale\":4,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}")); + assertThat(toJson(histo), equalTo("{\"scale\":4,\"sum\":1.1,\"positive\":{\"indices\":[-1,2],\"counts\":[3,5]}}")); } public void testOnlyNegativeBuckets() { FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(10); histo.resetBuckets(5); + histo.setSum(1.1); histo.tryAddBucket(-1, 4, false); histo.tryAddBucket(2, 6, false); - assertThat(toJson(histo), equalTo("{\"scale\":5,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}")); + assertThat(toJson(histo), equalTo("{\"scale\":5,\"sum\":1.1,\"negative\":{\"indices\":[-1,2],\"counts\":[4,6]}}")); } private static String toJson(ExponentialHistogram histo) { diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java index 8a2e348568bbc..78d3afaecf2e4 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/CompressedExponentialHistogram.java @@ -33,6 +33,7 @@ public class CompressedExponentialHistogram implements ExponentialHistogram { private double zeroThreshold; private long valueCount; + private double sum; private ZeroBucket lazyZeroBucket; private final EncodedHistogramData encodedData = new EncodedHistogramData(); @@ -53,6 +54,11 @@ public ZeroBucket zeroBucket() { return lazyZeroBucket; } + @Override + public double sum() { + return sum; + } + @Override public ExponentialHistogram.Buckets positiveBuckets() { return positiveBuckets; @@ -68,20 +74,23 @@ public ExponentialHistogram.Buckets negativeBuckets() { * * @param zeroThreshold the zeroThreshold for the histogram, which needs to be stored externally * @param valueCount the total number of values the histogram contains, needs to be stored externally + * @param sum the total sum of the values the histogram contains, needs to be stored externally * @param encodedHistogramData the encoded histogram bytes which previously where generated via * {@link #writeHistogramBytes(StreamOutput, int, List, List)}. */ - public void reset(double zeroThreshold, long valueCount, BytesRef encodedHistogramData) throws IOException { + public void reset(double zeroThreshold, long valueCount, double sum, BytesRef encodedHistogramData) throws IOException { lazyZeroBucket = null; this.zeroThreshold = zeroThreshold; this.valueCount = valueCount; + this.sum = sum; encodedData.decode(encodedHistogramData); negativeBuckets.resetCachedData(); positiveBuckets.resetCachedData(); } /** - * Serializes the given histogram, so that exactly the same data can be reconstructed via {@link #reset(double, long, BytesRef)}. + * Serializes the given histogram, so that exactly the same data can be reconstructed via + * {@link #reset(double, long, double, BytesRef)}. * * @param output the output to write the serialized bytes to * @param scale the scale of the histogram diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java index 9f137aa03ef73..30715b8411304 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapper.java @@ -20,6 +20,7 @@ import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.exponentialhistogram.ExponentialHistogramXContent; import org.elasticsearch.index.fielddata.FieldDataContext; import org.elasticsearch.index.fielddata.IndexFieldData; @@ -93,6 +94,7 @@ public class ExponentialHistogramFieldMapper extends FieldMapper { public static final String CONTENT_TYPE = "exponential_histogram"; public static final ParseField SCALE_FIELD = new ParseField(ExponentialHistogramXContent.SCALE_FIELD); + public static final ParseField SUM_FIELD = new ParseField(ExponentialHistogramXContent.SUM_FIELD); public static final ParseField ZERO_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_FIELD); public static final ParseField ZERO_COUNT_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_COUNT_FIELD); public static final ParseField ZERO_THRESHOLD_FIELD = new ParseField(ExponentialHistogramXContent.ZERO_THRESHOLD_FIELD); @@ -134,6 +136,10 @@ private static String valuesCountSubFieldName(String fullPath) { return fullPath + "._values_count"; } + private static String valuesSumSubFieldName(String fullPath) { + return fullPath + "._values_sum"; + } + static class Builder extends FieldMapper.Builder { private final FieldMapper.Parameter> meta = FieldMapper.Parameter.metaParam(); @@ -259,6 +265,7 @@ public void parse(DocumentParserContext context) throws IOException { return; } + Double sum = null; Integer scale = null; ParsedZeroBucket zeroBucket = ParsedZeroBucket.DEFAULT; List negativeBuckets = Collections.emptyList(); @@ -296,6 +303,8 @@ public void parse(DocumentParserContext context) throws IOException { + scale ); } + } else if (fieldName.equals(SUM_FIELD.getPreferredName())) { + sum = parseDoubleAllowingInfinity(subParser); } else if (fieldName.equals(ZERO_FIELD.getPreferredName())) { zeroBucket = parseZeroBucket(subParser); } else if (fieldName.equals(POSITIVE_FIELD.getPreferredName())) { @@ -336,6 +345,20 @@ public void parse(DocumentParserContext context) throws IOException { ); } + if (sum == null) { + sum = ExponentialHistogramUtils.estimateSum( + IndexWithCount.asBucketIterator(scale, negativeBuckets), + IndexWithCount.asBucketIterator(scale, positiveBuckets) + ); + } else { + if (totalValueCount == 0 && sum != 0.0) { + throw new DocumentParsingException( + subParser.getTokenLocation(), + "error parsing field [" + fullPath() + "], sum field must be zero if the histogram is empty, but got " + sum + ); + } + } + BytesStreamOutput histogramBytesOutput = new BytesStreamOutput(); CompressedExponentialHistogram.writeHistogramBytes(histogramBytesOutput, scale, negativeBuckets, positiveBuckets); BytesRef histoBytes = histogramBytesOutput.bytes().toBytesRef(); @@ -344,10 +367,15 @@ public void parse(DocumentParserContext context) throws IOException { long thresholdAsLong = NumericUtils.doubleToSortableLong(zeroBucket.threshold()); NumericDocValuesField zeroThresholdField = new NumericDocValuesField(zeroThresholdSubFieldName(fullPath()), thresholdAsLong); NumericDocValuesField valuesCountField = new NumericDocValuesField(valuesCountSubFieldName(fullPath()), totalValueCount); + NumericDocValuesField sumField = new NumericDocValuesField( + valuesSumSubFieldName(fullPath()), + NumericUtils.doubleToSortableLong(sum) + ); context.doc().addWithKey(fieldType().name(), histoField); context.doc().add(zeroThresholdField); context.doc().add(valuesCountField); + context.doc().add(sumField); } catch (Exception ex) { if (ignoreMalformed.value() == false) { @@ -377,6 +405,24 @@ public void parse(DocumentParserContext context) throws IOException { context.path().remove(); } + private double parseDoubleAllowingInfinity(XContentParser parser) throws IOException { + XContentParser.Token token = parser.nextToken(); + boolean isValidNumber = token == XContentParser.Token.VALUE_NUMBER; + if (token == XContentParser.Token.VALUE_STRING) { + String text = parser.text(); + if (text.equals("-Infinity") || text.equals("Infinity")) { + isValidNumber = true; + } + } + if (isValidNumber) { + return parser.doubleValue(); + } + throw new DocumentParsingException( + parser.getTokenLocation(), + "error parsing field [" + fullPath() + "], expected a number but got " + token + ); + } + private static long getTotalValueCount( ParsedZeroBucket zeroBucket, List positiveBuckets, @@ -582,6 +628,7 @@ private class ExponentialHistogramSyntheticFieldLoader implements CompositeSynth private BytesRef binaryValue; private double zeroThreshold; private long valueCount; + private double valueSum; @Override public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) @@ -594,18 +641,22 @@ public SourceLoader.SyntheticFieldLoader.DocValuesLoader docValuesLoader(LeafRea } NumericDocValues zeroThresholds = leafReader.getNumericDocValues(zeroThresholdSubFieldName(fullPath())); NumericDocValues valueCounts = leafReader.getNumericDocValues(valuesCountSubFieldName(fullPath())); + NumericDocValues valueSums = leafReader.getNumericDocValues(valuesSumSubFieldName(fullPath())); assert zeroThresholds != null; assert valueCounts != null; + assert valueSums != null; return docId -> { if (histoDocValues.advanceExact(docId)) { boolean zeroThresholdPresent = zeroThresholds.advanceExact(docId); boolean valueCountsPresent = valueCounts.advanceExact(docId); - assert zeroThresholdPresent && valueCountsPresent; + boolean valueSumsPresent = valueSums.advanceExact(docId); + assert zeroThresholdPresent && valueCountsPresent && valueSumsPresent; binaryValue = histoDocValues.binaryValue(); zeroThreshold = NumericUtils.sortableLongToDouble(zeroThresholds.longValue()); valueCount = valueCounts.longValue(); + valueSum = NumericUtils.sortableLongToDouble(valueSums.longValue()); return true; } binaryValue = null; @@ -624,7 +675,7 @@ public void write(XContentBuilder b) throws IOException { return; } - histogram.reset(zeroThreshold, valueCount, binaryValue); + histogram.reset(zeroThreshold, valueCount, valueSum, binaryValue); ExponentialHistogramXContent.serialize(b, histogram); } diff --git a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java index 6a6d0254ea1bd..54c23508f220a 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/main/java/org/elasticsearch/xpack/exponentialhistogram/IndexWithCount.java @@ -7,9 +7,44 @@ package org.elasticsearch.xpack.exponentialhistogram; +import org.elasticsearch.exponentialhistogram.BucketIterator; + +import java.util.List; + /** * An exponential histogram bucket represented by its index and associated bucket count. * @param index the index of the bucket * @param count the number of values in that bucket. */ -public record IndexWithCount(long index, long count) {} +public record IndexWithCount(long index, long count) { + public static BucketIterator asBucketIterator(int scale, List buckets) { + return new BucketIterator() { + int position = 0; + + @Override + public boolean hasNext() { + return position < buckets.size(); + } + + @Override + public long peekCount() { + return buckets.get(position).count; + } + + @Override + public long peekIndex() { + return buckets.get(position).index; + } + + @Override + public void advance() { + position++; + } + + @Override + public int scale() { + return scale; + } + }; + } +} diff --git a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java index 6bf9ba8d0336d..583e420358c22 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java +++ b/x-pack/plugin/mapper-exponential-histogram/src/test/java/org/elasticsearch/xpack/exponentialhistogram/ExponentialHistogramFieldMapperTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.exponentialhistogram; import org.elasticsearch.core.Types; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramUtils; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.MappedFieldType; @@ -23,14 +24,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.NavigableMap; import java.util.Set; -import java.util.TreeMap; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; @@ -117,16 +117,22 @@ private static Map createRandomHistogramValue(int maxBucketCount fillBucketsRandomly(negativeIndices, negativeCounts, maxBucketCount / 2); } - return Map.of( - "scale", - scale, - "zero", - Map.of("count", zeroCount, "threshold", zeroThreshold), - "positive", - Map.of("indices", positiveIndices, "counts", positiveCounts), - "negative", - Map.of("indices", negativeIndices, "counts", negativeCounts) + Map result = new HashMap<>( + Map.of( + "scale", + scale, + "zero", + Map.of("count", zeroCount, "threshold", zeroThreshold), + "positive", + Map.of("indices", positiveIndices, "counts", positiveCounts), + "negative", + Map.of("indices", negativeIndices, "counts", negativeCounts) + ) ); + if (randomBoolean() && (positiveIndices.isEmpty() == false || negativeIndices.isEmpty() == false)) { + result.put("sum", randomDoubleBetween(-1000, 1000, true)); + } + return result; } private static void fillBucketsRandomly(List indices, List counts, int maxBucketCount) { @@ -383,7 +389,12 @@ protected List exampleMalformedValues() { .endArray() .endObject() .endObject() - ).errorMatches("has a total value count exceeding the allowed maximum value of " + Long.MAX_VALUE) + ).errorMatches("has a total value count exceeding the allowed maximum value of " + Long.MAX_VALUE), + + // Non-Zero sum for empty histogram + exampleMalformedValue(b -> b.startObject().field("scale", 0).field("sum", 42.0).endObject()).errorMatches( + "sum field must be zero if the histogram is empty, but got 42.0" + ) ); } @@ -425,47 +436,60 @@ public SyntheticSourceExample example(int maxValues) { private Map convertHistogramToCanonicalForm(Map histogram) { Map result = new LinkedHashMap<>(); - result.put("scale", histogram.get("scale")); + int scale = (Integer) histogram.get("scale"); + result.put("scale", scale); + + List positive = parseBuckets(Types.forciblyCast(histogram.get("positive"))); + List negative = parseBuckets(Types.forciblyCast(histogram.get("negative"))); + + Object sum = histogram.get("sum"); + if (sum == null) { + sum = ExponentialHistogramUtils.estimateSum( + IndexWithCount.asBucketIterator(scale, negative), + IndexWithCount.asBucketIterator(scale, positive) + ); + } + result.put("sum", sum); Map zeroBucket = convertZeroBucketToCanonicalForm(Types.forciblyCast(histogram.get("zero"))); if (zeroBucket != null) { result.put("zero", zeroBucket); } - Map positive = convertBucketListToCanonicalForm(Types.forciblyCast(histogram.get("positive"))); - if (positive != null) { - result.put("positive", positive); + if (positive.isEmpty() == false) { + result.put("positive", writeBucketsInCanonicalForm(positive)); } - - Map negative = convertBucketListToCanonicalForm(Types.forciblyCast(histogram.get("negative"))); - if (negative != null) { - result.put("negative", negative); + if (negative.isEmpty() == false) { + result.put("negative", writeBucketsInCanonicalForm(negative)); } return result; } - private Map convertBucketListToCanonicalForm(Map buckets) { + private List parseBuckets(Map buckets) { if (buckets == null) { - return null; + return List.of(); } List indices = Types.forciblyCast(buckets.get("indices")); List counts = Types.forciblyCast(buckets.get("counts")); if (indices == null || indices.isEmpty()) { - return null; + return List.of(); } - NavigableMap indicesToCountsSorted = new TreeMap<>(); + List indexWithCounts = new ArrayList<>(); for (int i = 0; i < indices.size(); i++) { - indicesToCountsSorted.put(indices.get(i).longValue(), counts.get(i).longValue()); + indexWithCounts.add(new IndexWithCount(indices.get(i).longValue(), counts.get(i).longValue())); } + indexWithCounts.sort(Comparator.comparing(IndexWithCount::index)); + return indexWithCounts; + } + private Map writeBucketsInCanonicalForm(List buckets) { List resultIndices = new ArrayList<>(); List resultCounts = new ArrayList<>(); - indicesToCountsSorted.forEach((index, count) -> { - resultIndices.add(index); - resultCounts.add(count); - }); - + for (IndexWithCount indexWithCount : buckets) { + resultIndices.add(indexWithCount.index()); + resultCounts.add(indexWithCount.count()); + } LinkedHashMap result = new LinkedHashMap<>(); result.put("indices", resultIndices); result.put("counts", resultCounts); diff --git a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml index 461748b552421..c95db7bf8587d 100644 --- a/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml +++ b/x-pack/plugin/mapper-exponential-histogram/src/yamlRestTest/resources/rest-api-spec/test/10_synthetic_source.yml @@ -28,6 +28,7 @@ setup: body: my_histo: scale: 12 + sum: 1234.56 zero: threshold: 0.123456 count: 42 @@ -45,6 +46,7 @@ setup: - match: _source.my_histo: scale: 12 + sum: 1234.56 zero: threshold: 0.123456 count: 42 @@ -65,6 +67,7 @@ setup: body: my_histo: scale: -10 + sum: 1234.56 positive: indices: [1,2,3,4,5] counts: [6,7,8,9,10] @@ -79,6 +82,7 @@ setup: - match: _source.my_histo: scale: -10 + sum: 1234.56 positive: indices: [1,2,3,4,5] counts: [6,7,8,9,10] @@ -96,6 +100,7 @@ setup: body: my_histo: scale: 0 + sum: 1234.56 positive: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -107,6 +112,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 1234.56 positive: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -121,6 +127,7 @@ setup: body: my_histo: scale: 0 + sum: 1234.56 negative: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -132,6 +139,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 1234.56 negative: indices: [-100, 10, 20] counts: [3, 2, 1] @@ -145,12 +153,14 @@ setup: body: my_histo: scale: -7 + sum: 0.0 - do: get: index: test_exponential_histogram id: "1" - match: _source.my_histo: + sum: 0.0 scale: -7 --- @@ -172,6 +182,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 0.0 zero: threshold: 42.7 @@ -194,6 +205,7 @@ setup: - match: _source.my_histo: scale: 0 + sum: 0.0 zero: count: 101 @@ -208,6 +220,7 @@ setup: body: my_histo: scale: 38 + sum: 1E300 zero: count: 2305843009213693952 # 2^61 to not cause overflows for the total value count sum threshold: 1E-300 @@ -224,6 +237,7 @@ setup: - match: _source.my_histo: scale: 38 + sum: 1E300 zero: count: 2305843009213693952 threshold: 1E-300 @@ -233,3 +247,87 @@ setup: negative: indices: [-4611686018427387903, 4611686018427387903] counts: [2305843009213693952, 1] + +--- +"Sum estimation": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 1 + positive: + indices: [8] + counts: [1] + negative: + indices: [0, 1] + counts: [1, 5] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 1 + sum: 9.289321881345247 + positive: + indices: [8] + counts: [1] + negative: + indices: [0, 1] + counts: [1, 5] + +--- +"Positive infinity sum": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 0 + positive: + indices: [2000] + counts: [1] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 0 + sum: Infinity + positive: + indices: [2000] + counts: [1] + +--- +"negative infinity sum": + - do: + index: + index: test_exponential_histogram + id: "1" + refresh: true + body: + my_histo: + scale: 0 + negative: + indices: [2000] + counts: [1] + + - do: + get: + index: test_exponential_histogram + id: "1" + - match: + _source.my_histo: + scale: 0 + sum: -Infinity + negative: + indices: [2000] + counts: [1]