Skip to content

Commit c1b1ef2

Browse files
JonasKunzelasticsearchmachinekkrik-es
authored andcommitted
Add memory accounting to exponential histogram library. (elastic#132580)
--------- Co-authored-by: elasticsearchmachine <[email protected]> Co-authored-by: Kostas Krikellas <[email protected]>
1 parent c989fd0 commit c1b1ef2

24 files changed

+1444
-168
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.benchmark.exponentialhistogram;
1111

12+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker;
1213
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
1314
import org.openjdk.jmh.annotations.Benchmark;
1415
import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -59,7 +60,7 @@ public class ExponentialHistogramGenerationBench {
5960
@Setup
6061
public void setUp() {
6162
random = ThreadLocalRandom.current();
62-
histoGenerator = new ExponentialHistogramGenerator(bucketCount);
63+
histoGenerator = ExponentialHistogramGenerator.create(bucketCount, ExponentialHistogramCircuitBreaker.noop());
6364

6465
DoubleSupplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble();
6566

benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import org.elasticsearch.exponentialhistogram.BucketIterator;
1313
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
14+
import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker;
1415
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
1516
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
1617
import org.openjdk.jmh.annotations.Benchmark;
@@ -56,13 +57,14 @@ public class ExponentialHistogramMergeBench {
5657
@Setup
5758
public void setUp() {
5859
random = ThreadLocalRandom.current();
59-
histoMerger = new ExponentialHistogramMerger(bucketCount);
60+
ExponentialHistogramCircuitBreaker breaker = ExponentialHistogramCircuitBreaker.noop();
61+
histoMerger = ExponentialHistogramMerger.create(bucketCount, breaker);
6062

61-
ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount);
63+
ExponentialHistogramGenerator initialGenerator = ExponentialHistogramGenerator.create(bucketCount, breaker);
6264
for (int j = 0; j < bucketCount; j++) {
63-
initial.add(Math.pow(1.001, j));
65+
initialGenerator.add(Math.pow(1.001, j));
6466
}
65-
ExponentialHistogram initialHisto = initial.get();
67+
ExponentialHistogram initialHisto = initialGenerator.getAndClear();
6668
int cnt = getBucketCount(initialHisto);
6769
if (cnt < bucketCount) {
6870
throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt);
@@ -72,14 +74,14 @@ public void setUp() {
7274
int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor);
7375

7476
for (int i = 0; i < toMerge.length; i++) {
75-
ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize);
77+
ExponentialHistogramGenerator generator = ExponentialHistogramGenerator.create(dataPointSize, breaker);
7678

7779
int bucketIndex = 0;
7880
for (int j = 0; j < dataPointSize; j++) {
7981
bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize));
8082
generator.add(Math.pow(1.001, bucketIndex));
8183
}
82-
toMerge[i] = generator.get();
84+
toMerge[i] = generator.getAndClear();
8385
cnt = getBucketCount(toMerge[i]);
8486
if (cnt < dataPointSize) {
8587
throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt);

libs/exponential-histogram/build.gradle

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
apply plugin: 'elasticsearch.build'
1313

1414
dependencies {
15+
api project(':libs:core')
16+
api "org.apache.lucene:lucene-core:${versions.lucene}"
17+
1518
testImplementation(project(":test:framework"))
1619
testImplementation('ch.obermuhlner:big-math:2.3.2')
1720
testImplementation('org.apache.commons:commons-math3:3.6.1')

libs/exponential-histogram/licenses/lucene-core-LICENSE.txt

Lines changed: 475 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
Apache Lucene
2+
Copyright 2014 The Apache Software Foundation
3+
4+
This product includes software developed at
5+
The Apache Software Foundation (http://www.apache.org/).
6+
7+
Includes software from other Apache Software Foundation projects,
8+
including, but not limited to:
9+
- Apache Ant
10+
- Apache Jakarta Regexp
11+
- Apache Commons
12+
- Apache Xerces
13+
14+
ICU4J, (under analysis/icu) is licensed under an MIT styles license
15+
and Copyright (c) 1995-2008 International Business Machines Corporation and others
16+
17+
Some data files (under analysis/icu/src/data) are derived from Unicode data such
18+
as the Unicode Character Database. See http://unicode.org/copyright.html for more
19+
details.
20+
21+
Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
22+
BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
23+
24+
The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
25+
automatically generated with the moman/finenight FSA library, created by
26+
Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
27+
see http://sites.google.com/site/rrettesite/moman and
28+
http://bitbucket.org/jpbarrette/moman/overview/
29+
30+
The class org.apache.lucene.util.WeakIdentityMap was derived from
31+
the Apache CXF project and is Apache License 2.0.
32+
33+
The Google Code Prettify is Apache License 2.0.
34+
See http://code.google.com/p/google-code-prettify/
35+
36+
JUnit (junit-4.10) is licensed under the Common Public License v. 1.0
37+
See http://junit.sourceforge.net/cpl-v10.html
38+
39+
This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
40+
g Package (jaspell): http://jaspell.sourceforge.net/
41+
License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
42+
43+
The snowball stemmers in
44+
analysis/common/src/java/net/sf/snowball
45+
were developed by Martin Porter and Richard Boulton.
46+
The snowball stopword lists in
47+
analysis/common/src/resources/org/apache/lucene/analysis/snowball
48+
were developed by Martin Porter and Richard Boulton.
49+
The full snowball package is available from
50+
http://snowball.tartarus.org/
51+
52+
The KStem stemmer in
53+
analysis/common/src/org/apache/lucene/analysis/en
54+
was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
55+
under the BSD-license.
56+
57+
The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
58+
stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
59+
analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
60+
analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
61+
analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
62+
analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
63+
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
64+
analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
65+
See http://members.unine.ch/jacques.savoy/clef/index.html.
66+
67+
The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
68+
(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
69+
Ljiljana Dolamic. These files reside in:
70+
analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
71+
analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
72+
analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
73+
analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
74+
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
75+
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
76+
analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
77+
analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
78+
analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
79+
analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
80+
analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java
81+
82+
The Stempel analyzer (stempel) includes BSD-licensed software developed
83+
by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
84+
and Edmond Nolan.
85+
86+
The Polish analyzer (stempel) comes with a default
87+
stopword list that is BSD-licensed created by the Carrot2 project. The file resides
88+
in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
89+
See http://project.carrot2.org/license.html.
90+
91+
The SmartChineseAnalyzer source code (smartcn) was
92+
provided by Xiaoping Gao and copyright 2009 by www.imdict.net.
93+
94+
WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
95+
is derived from Unicode data such as the Unicode Character Database.
96+
See http://unicode.org/copyright.html for more details.
97+
98+
The Morfologik analyzer (morfologik) includes BSD-licensed software
99+
developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/).
100+
101+
Morfologik uses data from Polish ispell/myspell dictionary
102+
(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia)
103+
LGPL and Creative Commons ShareAlike.
104+
105+
Morfologic includes data from BSD-licensed dictionary of Polish (SGJP)
106+
(http://sgjp.pl/morfeusz/)
107+
108+
Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original
109+
source code for this can be found at http://www.eclipse.org/jetty/downloads.php
110+
111+
===========================================================================
112+
Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
113+
===========================================================================
114+
115+
This software includes a binary and/or source version of data from
116+
117+
mecab-ipadic-2.7.0-20070801
118+
119+
which can be obtained from
120+
121+
http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz
122+
123+
or
124+
125+
http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz
126+
127+
===========================================================================
128+
mecab-ipadic-2.7.0-20070801 Notice
129+
===========================================================================
130+
131+
Nara Institute of Science and Technology (NAIST),
132+
the copyright holders, disclaims all warranties with regard to this
133+
software, including all implied warranties of merchantability and
134+
fitness, in no event shall NAIST be liable for
135+
any special, indirect or consequential damages or any damages
136+
whatsoever resulting from loss of use, data or profits, whether in an
137+
action of contract, negligence or other tortuous action, arising out
138+
of or in connection with the use or performance of this software.
139+
140+
A large portion of the dictionary entries
141+
originate from ICOT Free Software. The following conditions for ICOT
142+
Free Software applies to the current dictionary as well.
143+
144+
Each User may also freely distribute the Program, whether in its
145+
original form or modified, to any third party or parties, PROVIDED
146+
that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
147+
on, or be attached to, the Program, which is distributed substantially
148+
in the same form as set out herein and that such intended
149+
distribution, if actually made, will neither violate or otherwise
150+
contravene any of the laws and regulations of the countries having
151+
jurisdiction over the User or the intended distribution itself.
152+
153+
NO WARRANTY
154+
155+
The program was produced on an experimental basis in the course of the
156+
research and development conducted during the project and is provided
157+
to users as so produced on an experimental basis. Accordingly, the
158+
program is provided without any warranty whatsoever, whether express,
159+
implied, statutory or otherwise. The term "warranty" used herein
160+
includes, but is not limited to, any warranty of the quality,
161+
performance, merchantability and fitness for a particular purpose of
162+
the program and the nonexistence of any infringement or violation of
163+
any right of any third party.
164+
165+
Each user of the program will agree and understand, and be deemed to
166+
have agreed and understood, that there is no warranty whatsoever for
167+
the program and, accordingly, the entire risk arising from or
168+
otherwise connected with the program is assumed by the user.
169+
170+
Therefore, neither ICOT, the copyright holder, or any other
171+
organization that participated in or was otherwise related to the
172+
development of the program and their respective officials, directors,
173+
officers and other employees shall be held liable for any and all
174+
damages, including, without limitation, general, special, incidental
175+
and consequential damages, arising out of or otherwise in connection
176+
with the use or inability to use the program or any product, material
177+
or result produced or otherwise obtained by using the program,
178+
regardless of whether they have been advised of, or otherwise had
179+
knowledge of, the possibility of such damages at any time during the
180+
project or thereafter. Each user will be deemed to have agreed to the
181+
foregoing by his or her commencement of use of the program. The term
182+
"use" as used herein includes, but is not limited to, the use,
183+
modification, copying and distribution of the program and the
184+
production of secondary products from the program.
185+
186+
In the case where the program, whether in its original form or
187+
modified, was distributed or delivered to or received by a user from
188+
any person, organization or entity other than ICOT, unless it makes or
189+
grants independently of ICOT any specific warranty to the user in
190+
writing, such person, organization or entity, will also be exempted
191+
from and not be held liable to the user for any such damages as noted
192+
above as far as the program is concerned.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
/*
2+
* Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
3+
* under one or more license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch B.V. licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*
19+
* This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
20+
*/
21+
22+
package org.elasticsearch.exponentialhistogram;
23+
24+
class BucketArrayIterator implements CopyableBucketIterator {
25+
26+
private final int scale;
27+
private final long[] bucketCounts;
28+
private final long[] bucketIndices;
29+
30+
private int currentSlot;
31+
private final int limit;
32+
33+
BucketArrayIterator(int scale, long[] bucketCounts, long[] bucketIndices, int startSlot, int limit) {
34+
this.scale = scale;
35+
this.bucketCounts = bucketCounts;
36+
this.bucketIndices = bucketIndices;
37+
this.currentSlot = startSlot;
38+
this.limit = limit;
39+
}
40+
41+
@Override
42+
public boolean hasNext() {
43+
return currentSlot < limit;
44+
}
45+
46+
@Override
47+
public long peekCount() {
48+
ensureEndNotReached();
49+
return bucketCounts[currentSlot];
50+
}
51+
52+
@Override
53+
public long peekIndex() {
54+
ensureEndNotReached();
55+
return bucketIndices[currentSlot];
56+
}
57+
58+
@Override
59+
public void advance() {
60+
ensureEndNotReached();
61+
currentSlot++;
62+
}
63+
64+
@Override
65+
public int scale() {
66+
return scale;
67+
}
68+
69+
@Override
70+
public CopyableBucketIterator copy() {
71+
return new BucketArrayIterator(scale, bucketCounts, bucketIndices, currentSlot, limit);
72+
}
73+
74+
private void ensureEndNotReached() {
75+
if (hasNext() == false) {
76+
throw new IllegalStateException("Iterator has no more buckets");
77+
}
78+
}
79+
}

libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
package org.elasticsearch.exponentialhistogram;
2323

24+
import org.apache.lucene.util.RamUsageEstimator;
25+
2426
import java.util.Arrays;
2527

2628
import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
@@ -33,6 +35,8 @@
3335
*/
3436
class DownscaleStats {
3537

38+
static final long SIZE = RamUsageEstimator.shallowSizeOf(DownscaleStats.class) + RamEstimationUtil.estimateIntArray(MAX_INDEX_BITS);
39+
3640
// collapsedBucketCount[i] stores the number of additional
3741
// collapsed buckets when increasing the scale by (i+1) instead of just by (i)
3842
int[] collapsedBucketCount = new int[MAX_INDEX_BITS];

0 commit comments

Comments
 (0)