2323
2424import java .util .Arrays ;
2525import java .util .Random ;
26+ import java .util .stream .DoubleStream ;
27+ import java .util .stream .IntStream ;
2628
2729import static org .hamcrest .Matchers .closeTo ;
2830import static org .hamcrest .Matchers .lessThan ;
2931
30- public class PercentileAccuracyTests extends ESTestCase {
32+ public class QuantileAccuracyTests extends ESTestCase {
3133
32- public static final double [] PERCENTILES_TO_TEST = { 0 , 0.01 , 0.1 , 0.25 , 0.5 , 0.75 , 0.9 , 0.95 , 0.99 , 1.0 };
34+ public static final double [] QUANTILES_TO_TEST = { 0 , 0.01 , 0.1 , 0.25 , 0.5 , 0.75 , 0.9 , 0.95 , 0.99 , 1.0 };
35+
36+ public void testBasicSmall () {
37+ DoubleStream values = IntStream .range (1 ,10 ).mapToDouble (Double ::valueOf );
38+ testQuantileAccuracy (values .toArray (), 100 );
39+ }
3340
34- /**
35- * Test percentile accuracy with uniform distribution
36- */
3741 public void testUniformDistribution () {
38- testDistributionPercentileAccuracy (new UniformRealDistribution (new Well19937c (42 ), 0 , 100 ), 50000 , 500 );
42+ testDistributionQuantileAccuracy (new UniformRealDistribution (new Well19937c (42 ), 0 , 100 ), 50000 , 500 );
3943 }
4044
41- /**
42- * Test percentile accuracy with normal distribution
43- */
4445 public void testNormalDistribution () {
45- testDistributionPercentileAccuracy (new NormalDistribution (new Well19937c (42 ), 100 , 15 ), 50000 , 500 );
46+ testDistributionQuantileAccuracy (new NormalDistribution (new Well19937c (42 ), 100 , 15 ), 50000 , 500 );
4647 }
4748
48- /**
49- * Test percentile accuracy with exponential distribution
50- */
5149 public void testExponentialDistribution () {
52- testDistributionPercentileAccuracy (new ExponentialDistribution (new Well19937c (42 ), 10 ), 50000 , 500 );
50+ testDistributionQuantileAccuracy (new ExponentialDistribution (new Well19937c (42 ), 10 ), 50000 , 500 );
5351 }
5452
55- /**
56- * Test percentile accuracy with log-normal distribution
57- */
5853 public void testLogNormalDistribution () {
59- testDistributionPercentileAccuracy (new LogNormalDistribution (new Well19937c (42 ), 0 , 1 ), 50000 , 500 );
54+ testDistributionQuantileAccuracy (new LogNormalDistribution (new Well19937c (42 ), 0 , 1 ), 50000 , 500 );
6055 }
6156
62- /**
63- * Test percentile accuracy with gamma distribution
64- */
6557 public void testGammaDistribution () {
66- testDistributionPercentileAccuracy (new GammaDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
58+ testDistributionQuantileAccuracy (new GammaDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
6759 }
6860
69- /**
70- * Test percentile accuracy with beta distribution
71- */
7261 public void testBetaDistribution () {
73- testDistributionPercentileAccuracy (new BetaDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
62+ testDistributionQuantileAccuracy (new BetaDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
7463 }
7564
76- /**
77- * Test percentile accuracy with Weibull distribution
78- */
7965 public void testWeibullDistribution () {
80- testDistributionPercentileAccuracy (new WeibullDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
66+ testDistributionQuantileAccuracy (new WeibullDistribution (new Well19937c (42 ), 2 , 5 ), 50000 , 500 );
67+ }
68+
69+ public void testBigJump () {
70+ double [] values = DoubleStream .concat (
71+ IntStream .range (0 ,18 ).mapToDouble (Double ::valueOf ),
72+ DoubleStream .of (1_000_000.0 )
73+ ).toArray ();
74+
75+ testQuantileAccuracy (values , 500 );
8176 }
8277
83- /**
84- * Test how bucket count affects percentile accuracy
85- */
8678 public void testBucketCountImpact () {
8779 RealDistribution distribution = new LogNormalDistribution (new Well19937c (42 ), 0 , 1 );
8880 int sampleSize = 50000 ;
@@ -91,32 +83,26 @@ public void testBucketCountImpact() {
9183 // Test with different bucket counts
9284 int [] bucketCounts = { 10 , 50 , 100 , 200 , 500 };
9385 for (int bucketCount : bucketCounts ) {
94- double maxError = testPercentileAccuracy (values , bucketCount );
86+ double maxError = testQuantileAccuracy (values , bucketCount );
9587 logger .info ("Bucket count: " + bucketCount + ", Max relative error: " + maxError );
9688 }
9789
9890 // Verify that more buckets generally means better accuracy
99- double errorWithFewBuckets = testPercentileAccuracy (values , 20 );
100- double errorWithManyBuckets = testPercentileAccuracy (values , 200 );
91+ double errorWithFewBuckets = testQuantileAccuracy (values , 20 );
92+ double errorWithManyBuckets = testQuantileAccuracy (values , 200 );
10193 assertThat ("More buckets should improve accuracy" , errorWithManyBuckets , lessThan (errorWithFewBuckets ));
10294 }
10395
104- /**
105- * Test percentile accuracy with mixed positive and negative values
106- */
10796 public void testMixedSignValues () {
10897 Random random = new Random (42 );
10998 double [] values = new double [10000 ];
11099 for (int i = 0 ; i < values .length ; i ++) {
111100 values [i ] = (random .nextDouble () * 200 ) - 100 ; // Range from -100 to 100
112101 }
113102
114- testPercentileAccuracy (values , 100 );
103+ testQuantileAccuracy (values , 100 );
115104 }
116105
117- /**
118- * Test percentile accuracy with skewed data
119- */
120106 public void testSkewedData () {
121107 // Create a highly skewed dataset
122108 Random random = new Random (42 );
@@ -131,12 +117,9 @@ public void testSkewedData() {
131117 }
132118 }
133119
134- testPercentileAccuracy (values , 100 );
120+ testQuantileAccuracy (values , 100 );
135121 }
136122
137- /**
138- * Test percentile accuracy with data containing zeros
139- */
140123 public void testDataWithZeros () {
141124 Random random = new Random (42 );
142125 double [] values = new double [10000 ];
@@ -149,32 +132,23 @@ public void testDataWithZeros() {
149132 }
150133 }
151134
152- testPercentileAccuracy (values , 100 );
135+ testQuantileAccuracy (values , 100 );
153136 }
154137
155- /**
156- * Helper method to test percentile accuracy for a given distribution
157- */
158- private void testDistributionPercentileAccuracy (RealDistribution distribution , int sampleSize , int bucketCount ) {
138+ private void testDistributionQuantileAccuracy (RealDistribution distribution , int sampleSize , int bucketCount ) {
159139 double [] values = generateSamples (distribution , sampleSize );
160- testPercentileAccuracy (values , bucketCount );
140+ testQuantileAccuracy (values , bucketCount );
161141 }
162142
163- /**
164- * Helper method to generate samples from a distribution
165- */
166- private double [] generateSamples (RealDistribution distribution , int sampleSize ) {
143+ private static double [] generateSamples (RealDistribution distribution , int sampleSize ) {
167144 double [] values = new double [sampleSize ];
168145 for (int i = 0 ; i < sampleSize ; i ++) {
169146 values [i ] = distribution .sample ();
170147 }
171148 return values ;
172149 }
173150
174- /**
175- * Helper method to test percentile accuracy for a given dataset
176- */
177- private double testPercentileAccuracy (double [] values , int bucketCount ) {
151+ private double testQuantileAccuracy (double [] values , int bucketCount ) {
178152 // Create histogram
179153 ExponentialHistogram histogram = ExponentialHistogramGenerator .createFor (bucketCount , Arrays .stream (values ));
180154
@@ -185,17 +159,17 @@ private double testPercentileAccuracy(double[] values, int bucketCount) {
185159 double allowedError = getMaximumRelativeError (values , bucketCount );
186160 double maxError = 0 ;
187161
188- // Compare histogram percentiles with exact percentiles
189- for (double p : PERCENTILES_TO_TEST ) {
162+ // Compare histogram quantiles with exact quantiles
163+ for (double q : QUANTILES_TO_TEST ) {
190164 double exactValue ;
191- if (p == 0 ) {
165+ if (q == 0 ) {
192166 exactValue = Arrays .stream (values ).min ().getAsDouble ();
193- } else if (p == 1 ) {
167+ } else if (q == 1 ) {
194168 exactValue = Arrays .stream (values ).max ().getAsDouble ();
195169 } else {
196- exactValue = exactPercentile .evaluate (p * 100 );
170+ exactValue = exactPercentile .evaluate (q * 100 );
197171 }
198- double histoValue = ExpHistoPercentiles . getPercentile (histogram , p );
172+ double histoValue = ExponentialHistogramQuantile . getQuantile (histogram , q );
199173
200174 // Skip comparison if exact value is zero to avoid division by zero
201175 if (Math .abs (exactValue ) < 1e-10 ) {
@@ -207,8 +181,8 @@ private double testPercentileAccuracy(double[] values, int bucketCount) {
207181
208182 logger .info (
209183 String .format (
210- "Percentile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f" ,
211- p ,
184+ "Quantile %.2f: Exact=%.6f, Histogram=%.6f, Relative Error=%.8f, Allowed Relative Error=%.8f" ,
185+ q ,
212186 exactValue ,
213187 histoValue ,
214188 relativeError ,
@@ -217,7 +191,7 @@ private double testPercentileAccuracy(double[] values, int bucketCount) {
217191 );
218192
219193 assertThat (
220- String .format ("Percentile %.2f should be accurate within %.6f%% relative error" , p , allowedError * 100 ),
194+ String .format ("Quantile %.2f should be accurate within %.6f%% relative error" , q , allowedError * 100 ),
221195 histoValue ,
222196 closeTo (exactValue , Math .abs (exactValue * allowedError ))
223197 );
@@ -231,7 +205,7 @@ private double testPercentileAccuracy(double[] values, int bucketCount) {
231205 * The error depends on the raw values put into the histogram and the number of buckets allowed.
232206 * This is an implementation of the error bound computation proven by Theorem 3 in the <a href="https://arxiv.org/pdf/2004.08604">UDDSketch paper</a>
233207 */
234- private double getMaximumRelativeError (double [] values , int bucketCount ) {
208+ private static double getMaximumRelativeError (double [] values , int bucketCount ) {
235209 double smallestAbsNegative = Double .MAX_VALUE ;
236210 double largestAbsNegative = 0 ;
237211 double smallestPositive = Double .MAX_VALUE ;
0 commit comments