Skip to content

Commit 41dc1da

Browse files
Merge branch 'main' into bk-cost-opt-spot
2 parents 2cced9e + aedc07d commit 41dc1da

File tree

239 files changed

+12258
-2160
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

239 files changed

+12258
-2160
lines changed

benchmarks/build.gradle

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ apply plugin: org.elasticsearch.gradle.internal.ElasticsearchJavaBasePlugin
1212
apply plugin: 'java-library'
1313
apply plugin: 'application'
1414

15+
var os = org.gradle.internal.os.OperatingSystem.current()
16+
1517
application {
1618
mainClass = 'org.openjdk.jmh.Main'
1719
}
@@ -39,6 +41,7 @@ dependencies {
3941
api(project(':x-pack:plugin:ql'))
4042
api(project(':x-pack:plugin:esql'))
4143
api(project(':x-pack:plugin:esql:compute'))
44+
implementation project(path: ':libs:elasticsearch-vec')
4245
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
4346
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
4447
api "org.openjdk.jmh:jmh-core:$versions.jmh"
@@ -73,6 +76,16 @@ tasks.named("run").configure {
7376
executable = "${BuildParams.runtimeJavaHome}/bin/java"
7477
args << "-Dplugins.dir=${buildDir}/plugins" << "-Dtests.index=${buildDir}/index"
7578
dependsOn "copyExpression", "copyPainless"
79+
systemProperty 'java.library.path', file("../libs/native/libraries/build/platform/${platformName()}-${os.arch}")
80+
}
81+
82+
String platformName() {
83+
String name = System.getProperty("os.name");
84+
if (name.startsWith("Mac")) {
85+
return "darwin";
86+
} else {
87+
return name.toLowerCase(Locale.ROOT);
88+
}
7689
}
7790

7891
spotless {
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.index.VectorSimilarityFunction;
12+
import org.apache.lucene.store.Directory;
13+
import org.apache.lucene.store.IOContext;
14+
import org.apache.lucene.store.IndexInput;
15+
import org.apache.lucene.store.IndexOutput;
16+
import org.apache.lucene.store.MMapDirectory;
17+
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
18+
import org.elasticsearch.common.logging.LogConfigurator;
19+
import org.elasticsearch.core.IOUtils;
20+
import org.elasticsearch.vec.VectorScorer;
21+
import org.elasticsearch.vec.VectorScorerFactory;
22+
import org.openjdk.jmh.annotations.Benchmark;
23+
import org.openjdk.jmh.annotations.BenchmarkMode;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Mode;
27+
import org.openjdk.jmh.annotations.OutputTimeUnit;
28+
import org.openjdk.jmh.annotations.Param;
29+
import org.openjdk.jmh.annotations.Scope;
30+
import org.openjdk.jmh.annotations.Setup;
31+
import org.openjdk.jmh.annotations.State;
32+
import org.openjdk.jmh.annotations.TearDown;
33+
import org.openjdk.jmh.annotations.Warmup;
34+
35+
import java.io.IOException;
36+
import java.nio.file.Files;
37+
import java.util.concurrent.ThreadLocalRandom;
38+
import java.util.concurrent.TimeUnit;
39+
40+
import static org.elasticsearch.vec.VectorSimilarityType.DOT_PRODUCT;
41+
import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN;
42+
43+
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
44+
@Warmup(iterations = 3, time = 3)
45+
@Measurement(iterations = 5, time = 3)
46+
@BenchmarkMode(Mode.Throughput)
47+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
48+
@State(Scope.Thread)
49+
/**
50+
* Benchmark that compares various scalar quantized vector similarity function
51+
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
52+
* Run with ./gradlew -p benchmarks run --args 'VectorScorerBenchmark'
53+
*/
54+
public class VectorScorerBenchmark {
55+
56+
static {
57+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
58+
}
59+
60+
@Param({ "96", "768", "1024" })
61+
int dims;
62+
int size = 2; // there are only two vectors to compare
63+
64+
Directory dir;
65+
IndexInput in;
66+
VectorScorerFactory factory;
67+
68+
byte[] vec1;
69+
byte[] vec2;
70+
float vec1Offset;
71+
float vec2Offset;
72+
float scoreCorrectionConstant;
73+
74+
ScalarQuantizedVectorSimilarity luceneDotScorer;
75+
ScalarQuantizedVectorSimilarity luceneSqrScorer;
76+
VectorScorer nativeDotScorer;
77+
VectorScorer nativeSqrScorer;
78+
79+
@Setup
80+
public void setup() throws IOException {
81+
var optionalVectorScorerFactory = VectorScorerFactory.instance();
82+
if (optionalVectorScorerFactory.isEmpty()) {
83+
String msg = "JDK=["
84+
+ Runtime.version()
85+
+ "], os.name=["
86+
+ System.getProperty("os.name")
87+
+ "], os.arch=["
88+
+ System.getProperty("os.arch")
89+
+ "]";
90+
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
91+
}
92+
factory = optionalVectorScorerFactory.get();
93+
scoreCorrectionConstant = 1f;
94+
vec1 = new byte[dims];
95+
vec2 = new byte[dims];
96+
97+
ThreadLocalRandom.current().nextBytes(vec1);
98+
ThreadLocalRandom.current().nextBytes(vec2);
99+
vec1Offset = ThreadLocalRandom.current().nextFloat();
100+
vec2Offset = ThreadLocalRandom.current().nextFloat();
101+
102+
dir = new MMapDirectory(Files.createTempDirectory("nativeScalarQuantBench"));
103+
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
104+
out.writeBytes(vec1, 0, vec1.length);
105+
out.writeInt(Float.floatToIntBits(vec1Offset));
106+
out.writeBytes(vec2, 0, vec2.length);
107+
out.writeInt(Float.floatToIntBits(vec2Offset));
108+
}
109+
in = dir.openInput("vector.data", IOContext.DEFAULT);
110+
111+
luceneDotScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(
112+
VectorSimilarityFunction.DOT_PRODUCT,
113+
scoreCorrectionConstant
114+
);
115+
luceneSqrScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(VectorSimilarityFunction.EUCLIDEAN, scoreCorrectionConstant);
116+
nativeDotScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, DOT_PRODUCT, in).get();
117+
nativeSqrScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, EUCLIDEAN, in).get();
118+
119+
// sanity
120+
var f1 = dotProductLucene();
121+
var f2 = dotProductNative();
122+
var f3 = dotProductScalar();
123+
if (f1 != f2) {
124+
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
125+
}
126+
if (f1 != f3) {
127+
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
128+
}
129+
// square distance
130+
f1 = squareDistanceLucene();
131+
f2 = squareDistanceNative();
132+
f3 = squareDistanceScalar();
133+
if (f1 != f2) {
134+
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
135+
}
136+
if (f1 != f3) {
137+
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
138+
}
139+
}
140+
141+
@TearDown
142+
public void teardown() throws IOException {
143+
IOUtils.close(dir, in);
144+
}
145+
146+
@Benchmark
147+
public float dotProductLucene() {
148+
return luceneDotScorer.score(vec1, vec1Offset, vec2, vec2Offset);
149+
}
150+
151+
@Benchmark
152+
public float dotProductNative() throws IOException {
153+
return nativeDotScorer.score(0, 1);
154+
}
155+
156+
@Benchmark
157+
public float dotProductScalar() {
158+
int dotProduct = 0;
159+
for (int i = 0; i < vec1.length; i++) {
160+
dotProduct += vec1[i] * vec2[i];
161+
}
162+
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset;
163+
return (1 + adjustedDistance) / 2;
164+
}
165+
166+
// -- square distance
167+
168+
@Benchmark
169+
public float squareDistanceLucene() {
170+
return luceneSqrScorer.score(vec1, vec1Offset, vec2, vec2Offset);
171+
}
172+
173+
@Benchmark
174+
public float squareDistanceNative() throws IOException {
175+
return nativeSqrScorer.score(0, 1);
176+
}
177+
178+
@Benchmark
179+
public float squareDistanceScalar() {
180+
int squareDistance = 0;
181+
for (int i = 0; i < vec1.length; i++) {
182+
int diff = vec1[i] - vec2[i];
183+
squareDistance += diff * diff;
184+
}
185+
float adjustedDistance = squareDistance * scoreCorrectionConstant;
186+
return 1 / (1f + adjustedDistance);
187+
}
188+
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public class InternalDistributionModuleCheckTaskProvider {
6363
"org.elasticsearch.securesm",
6464
"org.elasticsearch.server",
6565
"org.elasticsearch.tdigest",
66+
"org.elasticsearch.vec",
6667
"org.elasticsearch.xcontent"
6768
);
6869

build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ GET /_analyze
535535
]
536536
}
537537
],
538-
"text": "My license plate is ٢٥٠١٥"
538+
"text": "My license plate is empty"
539539
}
540540
----
541541
"""
@@ -557,7 +557,7 @@ GET /_analyze
557557
]
558558
}
559559
],
560-
"text": "My license plate is ٢٥٠١٥"
560+
"text": "My license plate is empty"
561561
}"""
562562
}
563563

dev-tools/publish_zstd_binaries.sh

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
4+
# or more contributor license agreements. Licensed under the Elastic License
5+
# 2.0 and the Server Side Public License, v 1; you may not use this file except
6+
# in compliance with, at your election, the Elastic License 2.0 or the Server
7+
# Side Public License, v 1.
8+
#
9+
10+
set -e
11+
12+
if [ "$#" -ne 1 ]; then
13+
printf 'Usage: %s <version>\n' "$(basename "$0")"
14+
exit 0;
15+
fi
16+
17+
if [ $(docker buildx inspect --bootstrap | grep -c 'Platforms:.*linux/arm64') -ne 1 ]; then
18+
echo 'Error: No Docker support for linux/arm64 detected'
19+
echo 'For more information see https://docs.docker.com/build/building/multi-platform'
20+
exit 1;
21+
fi
22+
23+
if [ -z "$ARTIFACTORY_API_KEY" ]; then
24+
echo 'Error: The ARTIFACTORY_API_KEY environment variable must be set.'
25+
exit 1;
26+
fi
27+
28+
VERSION="$1"
29+
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
30+
TEMP=$(mktemp -d)
31+
32+
fetch_homebrew_artifact() {
33+
DIGEST=$(curl -sS --retry 3 -H "Accept: application/vnd.oci.image.index.v1+json" -H "Authorization: Bearer QQ==" \
34+
--location "https://ghcr.io/v2/homebrew/core/zstd/manifests/$VERSION" | jq -r \
35+
".manifests[] | select(.platform.os == \"darwin\" and .platform.architecture == \"$1\" and .platform.\"os.version\" == \"macOS 13\") | .annotations.\"sh.brew.bottle.digest\"")
36+
37+
OUTPUT_FILE="$TEMP/zstd-$VERSION-darwin-$1.tar.gz"
38+
curl -sS --retry 3 -H "Authorization: Bearer QQ==" --output "$OUTPUT_FILE" --location "https://ghcr.io/v2/homebrew/core/zstd/blobs/sha256:$DIGEST"
39+
echo $OUTPUT_FILE
40+
}
41+
42+
download_license() {
43+
curl -sS --retry 3 --location https://raw.githubusercontent.com/facebook/zstd/v${VERSION}/LICENSE --output $1
44+
}
45+
46+
echo 'Downloading MacOS zstd binaries...'
47+
DARWIN_ARM_BREW=$(fetch_homebrew_artifact 'arm64')
48+
DARWIN_X86_BREW=$(fetch_homebrew_artifact 'amd64')
49+
50+
build_darwin_jar() {
51+
ARTIFACT="$TEMP/zstd-$VERSION-darwin-$2.jar"
52+
TAR_DIR="$TEMP/darwin-$2"
53+
mkdir $TAR_DIR
54+
tar zxf $1 --strip-components=2 --include="*/LICENSE" --include="*/libzstd.$VERSION.dylib" -C $TAR_DIR && rm $1
55+
mv $TAR_DIR/lib/libzstd.$VERSION.dylib $TAR_DIR/libzstd.dylib && rm -rf $TAR_DIR/lib
56+
FILE_COUNT=$(ls -1 $TAR_DIR | wc -l | xargs)
57+
if [ "$FILE_COUNT" -ne 2 ]; then
58+
>&2 echo "ERROR: Expected 2 files in $TAR_DIR but found $FILE_COUNT"
59+
exit 1
60+
fi
61+
(cd $TAR_DIR/../ && zip -rq - $(basename $TAR_DIR)) > $ARTIFACT && rm -rf $TAR_DIR
62+
echo $ARTIFACT
63+
}
64+
65+
echo 'Building MacOS jars...'
66+
DARWIN_ARM_JAR=$(build_darwin_jar $DARWIN_ARM_BREW "aarch64")
67+
DARWIN_X86_JAR=$(build_darwin_jar $DARWIN_X86_BREW "x86-64")
68+
69+
build_linux_jar() {
70+
ARTIFACT="$TEMP/zstd-$VERSION-linux-$2.jar"
71+
OUTPUT_DIR="$TEMP/linux-$2"
72+
mkdir $OUTPUT_DIR
73+
DOCKER_IMAGE=$(docker build --build-arg="ZSTD_VERSION=1.5.5" --file zstd.Dockerfile --platform $1 --quiet .)
74+
docker run --platform $1 $DOCKER_IMAGE > $OUTPUT_DIR/libzstd.so
75+
download_license $OUTPUT_DIR/LICENSE
76+
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR
77+
echo $ARTIFACT
78+
}
79+
80+
echo 'Building Linux jars...'
81+
LINUX_ARM_JAR=$(build_linux_jar "linux/amd64" "x86-64")
82+
LINUX_X86_JAR=$(build_linux_jar "linux/arm64" "aarch64")
83+
84+
build_windows_jar() {
85+
ARTIFACT="$TEMP/zstd-$VERSION-windows-x86-64.jar"
86+
OUTPUT_DIR="$TEMP/win32-x86-64"
87+
mkdir $OUTPUT_DIR
88+
curl -sS --retry 3 --location https://github.com/facebook/zstd/releases/download/v${VERSION}/zstd-v${VERSION}-win64.zip --output $OUTPUT_DIR/zstd.zip
89+
unzip -jq $OUTPUT_DIR/zstd.zip zstd-v${VERSION}-win64/dll/libzstd.dll -d $OUTPUT_DIR && rm $OUTPUT_DIR/zstd.zip
90+
mv $OUTPUT_DIR/libzstd.dll $OUTPUT_DIR/zstd.dll
91+
download_license $OUTPUT_DIR/LICENSE
92+
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR
93+
echo $ARTIFACT
94+
}
95+
96+
echo 'Building Windows jar...'
97+
WINDOWS_X86_JAR=$(build_windows_jar)
98+
99+
upload_artifact() {
100+
curl -sS -X PUT -H "X-JFrog-Art-Api: ${ARTIFACTORY_API_KEY}" --data-binary "@$1" --location "${ARTIFACTORY_REPOSITORY}/org/elasticsearch/zstd/${VERSION}/$(basename $1)"
101+
}
102+
103+
echo 'Uploading artifacts...'
104+
upload_artifact ${DARWIN_ARM_JAR}
105+
upload_artifact ${DARWIN_X86_JAR}
106+
upload_artifact ${LINUX_ARM_JAR}
107+
upload_artifact ${LINUX_X86_JAR}
108+
upload_artifact ${WINDOWS_X86_JAR}
109+
110+
rm -rf $TEMP

dev-tools/zstd.Dockerfile

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
FROM centos:7
2+
ARG ZSTD_VERSION
3+
4+
RUN yum install -y git gcc gcc-c++ make
5+
RUN git clone --depth 1 --branch v${ZSTD_VERSION} https://github.com/facebook/zstd.git
6+
WORKDIR zstd
7+
RUN make lib-release && strip --strip-unneeded lib/libzstd.so.${ZSTD_VERSION}
8+
9+
ENV ZSTD_VERSION=${ZSTD_VERSION}
10+
11+
CMD cat lib/libzstd.so.${ZSTD_VERSION}

distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
7373
* explore alternatives. See org.elasticsearch.xpack.searchablesnapshots.preallocate.Preallocate.
7474
*/
7575
"--add-opens=java.base/java.io=org.elasticsearch.preallocate",
76+
"--add-opens=org.apache.lucene.core/org.apache.lucene.store=org.elasticsearch.vec",
7677
maybeEnableNativeAccess(),
7778
maybeOverrideDockerCgroup(distroType),
7879
maybeSetActiveProcessorCount(nodeSettings),

docs/changelog/106133.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 106133
2+
summary: Add an optimised vector distance function for aarch64
3+
area: Search
4+
type: enhancement
5+
issues: []

docs/changelog/106796.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 106796
2+
summary: Bulk loading enrich fields in ESQL
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

docs/changelog/106851.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 106851
2+
summary: Catching `StackOverflowErrors` from bad regexes in `GsubProcessor`
3+
area: Ingest Node
4+
type: bug
5+
issues: []

0 commit comments

Comments
 (0)