Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

package org.elasticsearch.benchmark.exponentialhistogram;

import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
Expand Down Expand Up @@ -59,7 +60,7 @@ public class ExponentialHistogramGenerationBench {
@Setup
public void setUp() {
random = ThreadLocalRandom.current();
histoGenerator = new ExponentialHistogramGenerator(bucketCount);
histoGenerator = ExponentialHistogramGenerator.create(bucketCount, ExponentialHistogramCircuitBreaker.noop());

DoubleSupplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import org.elasticsearch.exponentialhistogram.BucketIterator;
import org.elasticsearch.exponentialhistogram.ExponentialHistogram;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator;
import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger;
import org.openjdk.jmh.annotations.Benchmark;
Expand Down Expand Up @@ -56,13 +57,14 @@ public class ExponentialHistogramMergeBench {
@Setup
public void setUp() {
random = ThreadLocalRandom.current();
histoMerger = new ExponentialHistogramMerger(bucketCount);
ExponentialHistogramCircuitBreaker breaker = ExponentialHistogramCircuitBreaker.noop();
histoMerger = ExponentialHistogramMerger.create(bucketCount, breaker);

ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount);
ExponentialHistogramGenerator initialGenerator = ExponentialHistogramGenerator.create(bucketCount, breaker);
for (int j = 0; j < bucketCount; j++) {
initial.add(Math.pow(1.001, j));
initialGenerator.add(Math.pow(1.001, j));
}
ExponentialHistogram initialHisto = initial.get();
ExponentialHistogram initialHisto = initialGenerator.getAndClear();
int cnt = getBucketCount(initialHisto);
if (cnt < bucketCount) {
throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt);
Expand All @@ -72,14 +74,14 @@ public void setUp() {
int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor);

for (int i = 0; i < toMerge.length; i++) {
ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize);
ExponentialHistogramGenerator generator = ExponentialHistogramGenerator.create(dataPointSize, breaker);

int bucketIndex = 0;
for (int j = 0; j < dataPointSize; j++) {
bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize));
generator.add(Math.pow(1.001, bucketIndex));
}
toMerge[i] = generator.get();
toMerge[i] = generator.getAndClear();
cnt = getBucketCount(toMerge[i]);
if (cnt < dataPointSize) {
throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt);
Expand Down
3 changes: 3 additions & 0 deletions libs/exponential-histogram/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
apply plugin: 'elasticsearch.build'

dependencies {
api project(':libs:core')
api "org.apache.lucene:lucene-core:${versions.lucene}"

testImplementation(project(":test:framework"))
testImplementation('ch.obermuhlner:big-math:2.3.2')
testImplementation('org.apache.commons:commons-math3:3.6.1')
Expand Down
475 changes: 475 additions & 0 deletions libs/exponential-histogram/licenses/lucene-core-LICENSE.txt

Large diffs are not rendered by default.

192 changes: 192 additions & 0 deletions libs/exponential-histogram/licenses/lucene-core-NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
Apache Lucene
Copyright 2014 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

Includes software from other Apache Software Foundation projects,
including, but not limited to:
- Apache Ant
- Apache Jakarta Regexp
- Apache Commons
- Apache Xerces

ICU4J, (under analysis/icu) is licensed under an MIT styles license
and Copyright (c) 1995-2008 International Business Machines Corporation and others

Some data files (under analysis/icu/src/data) are derived from Unicode data such
as the Unicode Character Database. See http://unicode.org/copyright.html for more
details.

Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/

The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
automatically generated with the moman/finenight FSA library, created by
Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
see http://sites.google.com/site/rrettesite/moman and
http://bitbucket.org/jpbarrette/moman/overview/

The class org.apache.lucene.util.WeakIdentityMap was derived from
the Apache CXF project and is Apache License 2.0.

The Google Code Prettify is Apache License 2.0.
See http://code.google.com/p/google-code-prettify/

JUnit (junit-4.10) is licensed under the Common Public License v. 1.0
See http://junit.sourceforge.net/cpl-v10.html

This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
g Package (jaspell): http://jaspell.sourceforge.net/
License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)

The snowball stemmers in
analysis/common/src/java/net/sf/snowball
were developed by Martin Porter and Richard Boulton.
The snowball stopword lists in
analysis/common/src/resources/org/apache/lucene/analysis/snowball
were developed by Martin Porter and Richard Boulton.
The full snowball package is available from
http://snowball.tartarus.org/

The KStem stemmer in
analysis/common/src/org/apache/lucene/analysis/en
was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst)
under the BSD-license.

The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default
stopword list that is BSD-licensed created by Jacques Savoy. These files reside in:
analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt,
analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt,
analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt,
analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt,
analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt,
analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt
See http://members.unine.ch/jacques.savoy/clef/index.html.

The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers
(common) are based on BSD-licensed reference implementations created by Jacques Savoy and
Ljiljana Dolamic. These files reside in:
analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java
analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java

The Stempel analyzer (stempel) includes BSD-licensed software developed
by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil,
and Edmond Nolan.

The Polish analyzer (stempel) comes with a default
stopword list that is BSD-licensed created by the Carrot2 project. The file resides
in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt.
See http://project.carrot2.org/license.html.

The SmartChineseAnalyzer source code (smartcn) was
provided by Xiaoping Gao and copyright 2009 by www.imdict.net.

WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/)
is derived from Unicode data such as the Unicode Character Database.
See http://unicode.org/copyright.html for more details.

The Morfologik analyzer (morfologik) includes BSD-licensed software
developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/).

Morfologik uses data from Polish ispell/myspell dictionary
(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia)
LGPL and Creative Commons ShareAlike.

Morfologic includes data from BSD-licensed dictionary of Polish (SGJP)
(http://sgjp.pl/morfeusz/)

Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original
source code for this can be found at http://www.eclipse.org/jetty/downloads.php

===========================================================================
Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration
===========================================================================

This software includes a binary and/or source version of data from

mecab-ipadic-2.7.0-20070801

which can be obtained from

http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz

or

http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz

===========================================================================
mecab-ipadic-2.7.0-20070801 Notice
===========================================================================

Nara Institute of Science and Technology (NAIST),
the copyright holders, disclaims all warranties with regard to this
software, including all implied warranties of merchantability and
fitness, in no event shall NAIST be liable for
any special, indirect or consequential damages or any damages
whatsoever resulting from loss of use, data or profits, whether in an
action of contract, negligence or other tortuous action, arising out
of or in connection with the use or performance of this software.

A large portion of the dictionary entries
originate from ICOT Free Software. The following conditions for ICOT
Free Software applies to the current dictionary as well.

Each User may also freely distribute the Program, whether in its
original form or modified, to any third party or parties, PROVIDED
that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
on, or be attached to, the Program, which is distributed substantially
in the same form as set out herein and that such intended
distribution, if actually made, will neither violate or otherwise
contravene any of the laws and regulations of the countries having
jurisdiction over the User or the intended distribution itself.

NO WARRANTY

The program was produced on an experimental basis in the course of the
research and development conducted during the project and is provided
to users as so produced on an experimental basis. Accordingly, the
program is provided without any warranty whatsoever, whether express,
implied, statutory or otherwise. The term "warranty" used herein
includes, but is not limited to, any warranty of the quality,
performance, merchantability and fitness for a particular purpose of
the program and the nonexistence of any infringement or violation of
any right of any third party.

Each user of the program will agree and understand, and be deemed to
have agreed and understood, that there is no warranty whatsoever for
the program and, accordingly, the entire risk arising from or
otherwise connected with the program is assumed by the user.

Therefore, neither ICOT, the copyright holder, or any other
organization that participated in or was otherwise related to the
development of the program and their respective officials, directors,
officers and other employees shall be held liable for any and all
damages, including, without limitation, general, special, incidental
and consequential damages, arising out of or otherwise in connection
with the use or inability to use the program or any product, material
or result produced or otherwise obtained by using the program,
regardless of whether they have been advised of, or otherwise had
knowledge of, the possibility of such damages at any time during the
project or thereafter. Each user will be deemed to have agreed to the
foregoing by his or her commencement of use of the program. The term
"use" as used herein includes, but is not limited to, the use,
modification, copying and distribution of the program and the
production of secondary products from the program.

In the case where the program, whether in its original form or
modified, was distributed or delivered to or received by a user from
any person, organization or entity other than ICOT, unless it makes or
grants independently of ICOT any specific warranty to the user in
writing, such person, organization or entity, will also be exempted
from and not be held liable to the user for any such damages as noted
above as far as the program is concerned.
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V.
* under one or more license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch B.V. licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
* This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License.
*/

package org.elasticsearch.exponentialhistogram;

class BucketArrayIterator implements CopyableBucketIterator {

private final int scale;
private final long[] bucketCounts;
private final long[] bucketIndices;

private int currentSlot;
private final int limit;

BucketArrayIterator(int scale, long[] bucketCounts, long[] bucketIndices, int startSlot, int limit) {
this.scale = scale;
this.bucketCounts = bucketCounts;
this.bucketIndices = bucketIndices;
this.currentSlot = startSlot;
this.limit = limit;
}

@Override
public boolean hasNext() {
return currentSlot < limit;
}

@Override
public long peekCount() {
ensureEndNotReached();
return bucketCounts[currentSlot];
}

@Override
public long peekIndex() {
ensureEndNotReached();
return bucketIndices[currentSlot];
}

@Override
public void advance() {
ensureEndNotReached();
currentSlot++;
}

@Override
public int scale() {
return scale;
}

@Override
public CopyableBucketIterator copy() {
return new BucketArrayIterator(scale, bucketCounts, bucketIndices, currentSlot, limit);
}

private void ensureEndNotReached() {
if (hasNext() == false) {
throw new IllegalStateException("Iterator has no more buckets");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@

package org.elasticsearch.exponentialhistogram;

import org.apache.lucene.util.RamUsageEstimator;

import java.util.Arrays;

import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX;
Expand All @@ -33,6 +35,8 @@
*/
class DownscaleStats {

static final long SIZE = RamUsageEstimator.shallowSizeOf(DownscaleStats.class) + RamEstimationUtil.estimateIntArray(MAX_INDEX_BITS);

// collapsedBucketCount[i] stores the number of additional
// collapsed buckets when increasing the scale by (i+1) instead of just by (i)
int[] collapsedBucketCount = new int[MAX_INDEX_BITS];
Expand Down
Loading