Skip to content

Commit d4c5c11

Browse files
committed
Merge branch 'main' into lru
2 parents 020ae6e + 5f59477 commit d4c5c11

File tree

1,065 files changed

+26412
-7991
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,065 files changed

+26412
-7991
lines changed

benchmarks/build.gradle

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ apply plugin: org.elasticsearch.gradle.internal.ElasticsearchJavaBasePlugin
1212
apply plugin: 'java-library'
1313
apply plugin: 'application'
1414

15+
var os = org.gradle.internal.os.OperatingSystem.current()
16+
1517
application {
1618
mainClass = 'org.openjdk.jmh.Main'
1719
}
@@ -39,6 +41,7 @@ dependencies {
3941
api(project(':x-pack:plugin:ql'))
4042
api(project(':x-pack:plugin:esql'))
4143
api(project(':x-pack:plugin:esql:compute'))
44+
implementation project(path: ':libs:elasticsearch-vec')
4245
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
4346
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
4447
api "org.openjdk.jmh:jmh-core:$versions.jmh"
@@ -73,6 +76,16 @@ tasks.named("run").configure {
7376
executable = "${BuildParams.runtimeJavaHome}/bin/java"
7477
args << "-Dplugins.dir=${buildDir}/plugins" << "-Dtests.index=${buildDir}/index"
7578
dependsOn "copyExpression", "copyPainless"
79+
systemProperty 'java.library.path', file("../libs/native/libraries/build/platform/${platformName()}-${os.arch}")
80+
}
81+
82+
String platformName() {
83+
String name = System.getProperty("os.name");
84+
if (name.startsWith("Mac")) {
85+
return "darwin";
86+
} else {
87+
return name.toLowerCase(Locale.ROOT);
88+
}
7689
}
7790

7891
spotless {
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0 and the Server Side Public License, v 1; you may not use this file except
5+
* in compliance with, at your election, the Elastic License 2.0 or the Server
6+
* Side Public License, v 1.
7+
*/
8+
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.index.VectorSimilarityFunction;
12+
import org.apache.lucene.store.Directory;
13+
import org.apache.lucene.store.IOContext;
14+
import org.apache.lucene.store.IndexInput;
15+
import org.apache.lucene.store.IndexOutput;
16+
import org.apache.lucene.store.MMapDirectory;
17+
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
18+
import org.elasticsearch.common.logging.LogConfigurator;
19+
import org.elasticsearch.core.IOUtils;
20+
import org.elasticsearch.vec.VectorScorer;
21+
import org.elasticsearch.vec.VectorScorerFactory;
22+
import org.openjdk.jmh.annotations.Benchmark;
23+
import org.openjdk.jmh.annotations.BenchmarkMode;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Mode;
27+
import org.openjdk.jmh.annotations.OutputTimeUnit;
28+
import org.openjdk.jmh.annotations.Param;
29+
import org.openjdk.jmh.annotations.Scope;
30+
import org.openjdk.jmh.annotations.Setup;
31+
import org.openjdk.jmh.annotations.State;
32+
import org.openjdk.jmh.annotations.TearDown;
33+
import org.openjdk.jmh.annotations.Warmup;
34+
35+
import java.io.IOException;
36+
import java.nio.file.Files;
37+
import java.util.concurrent.ThreadLocalRandom;
38+
import java.util.concurrent.TimeUnit;
39+
40+
import static org.elasticsearch.vec.VectorSimilarityType.DOT_PRODUCT;
41+
import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN;
42+
43+
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
44+
@Warmup(iterations = 3, time = 3)
45+
@Measurement(iterations = 5, time = 3)
46+
@BenchmarkMode(Mode.Throughput)
47+
@OutputTimeUnit(TimeUnit.MICROSECONDS)
48+
@State(Scope.Thread)
49+
/**
50+
* Benchmark that compares various scalar quantized vector similarity function
51+
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
52+
* Run with ./gradlew -p benchmarks run --args 'VectorScorerBenchmark'
53+
*/
54+
public class VectorScorerBenchmark {
55+
56+
static {
57+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
58+
}
59+
60+
@Param({ "96", "768", "1024" })
61+
int dims;
62+
int size = 2; // there are only two vectors to compare
63+
64+
Directory dir;
65+
IndexInput in;
66+
VectorScorerFactory factory;
67+
68+
byte[] vec1;
69+
byte[] vec2;
70+
float vec1Offset;
71+
float vec2Offset;
72+
float scoreCorrectionConstant;
73+
74+
ScalarQuantizedVectorSimilarity luceneDotScorer;
75+
ScalarQuantizedVectorSimilarity luceneSqrScorer;
76+
VectorScorer nativeDotScorer;
77+
VectorScorer nativeSqrScorer;
78+
79+
@Setup
80+
public void setup() throws IOException {
81+
var optionalVectorScorerFactory = VectorScorerFactory.instance();
82+
if (optionalVectorScorerFactory.isEmpty()) {
83+
String msg = "JDK=["
84+
+ Runtime.version()
85+
+ "], os.name=["
86+
+ System.getProperty("os.name")
87+
+ "], os.arch=["
88+
+ System.getProperty("os.arch")
89+
+ "]";
90+
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
91+
}
92+
factory = optionalVectorScorerFactory.get();
93+
scoreCorrectionConstant = 1f;
94+
vec1 = new byte[dims];
95+
vec2 = new byte[dims];
96+
97+
ThreadLocalRandom.current().nextBytes(vec1);
98+
ThreadLocalRandom.current().nextBytes(vec2);
99+
vec1Offset = ThreadLocalRandom.current().nextFloat();
100+
vec2Offset = ThreadLocalRandom.current().nextFloat();
101+
102+
dir = new MMapDirectory(Files.createTempDirectory("nativeScalarQuantBench"));
103+
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
104+
out.writeBytes(vec1, 0, vec1.length);
105+
out.writeInt(Float.floatToIntBits(vec1Offset));
106+
out.writeBytes(vec2, 0, vec2.length);
107+
out.writeInt(Float.floatToIntBits(vec2Offset));
108+
}
109+
in = dir.openInput("vector.data", IOContext.DEFAULT);
110+
111+
luceneDotScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(
112+
VectorSimilarityFunction.DOT_PRODUCT,
113+
scoreCorrectionConstant
114+
);
115+
luceneSqrScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(VectorSimilarityFunction.EUCLIDEAN, scoreCorrectionConstant);
116+
nativeDotScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, DOT_PRODUCT, in).get();
117+
nativeSqrScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, EUCLIDEAN, in).get();
118+
119+
// sanity
120+
var f1 = dotProductLucene();
121+
var f2 = dotProductNative();
122+
var f3 = dotProductScalar();
123+
if (f1 != f2) {
124+
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
125+
}
126+
if (f1 != f3) {
127+
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
128+
}
129+
// square distance
130+
f1 = squareDistanceLucene();
131+
f2 = squareDistanceNative();
132+
f3 = squareDistanceScalar();
133+
if (f1 != f2) {
134+
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
135+
}
136+
if (f1 != f3) {
137+
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
138+
}
139+
}
140+
141+
@TearDown
142+
public void teardown() throws IOException {
143+
IOUtils.close(dir, in);
144+
}
145+
146+
@Benchmark
147+
public float dotProductLucene() {
148+
return luceneDotScorer.score(vec1, vec1Offset, vec2, vec2Offset);
149+
}
150+
151+
@Benchmark
152+
public float dotProductNative() throws IOException {
153+
return nativeDotScorer.score(0, 1);
154+
}
155+
156+
@Benchmark
157+
public float dotProductScalar() {
158+
int dotProduct = 0;
159+
for (int i = 0; i < vec1.length; i++) {
160+
dotProduct += vec1[i] * vec2[i];
161+
}
162+
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset;
163+
return (1 + adjustedDistance) / 2;
164+
}
165+
166+
// -- square distance
167+
168+
@Benchmark
169+
public float squareDistanceLucene() {
170+
return luceneSqrScorer.score(vec1, vec1Offset, vec2, vec2Offset);
171+
}
172+
173+
@Benchmark
174+
public float squareDistanceNative() throws IOException {
175+
return nativeSqrScorer.score(0, 1);
176+
}
177+
178+
@Benchmark
179+
public float squareDistanceScalar() {
180+
int squareDistance = 0;
181+
for (int i = 0; i < vec1.length; i++) {
182+
int diff = vec1[i] - vec2[i];
183+
squareDistance += diff * diff;
184+
}
185+
float adjustedDistance = squareDistance * scoreCorrectionConstant;
186+
return 1 / (1f + adjustedDistance);
187+
}
188+
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/InternalDistributionModuleCheckTaskProvider.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ public class InternalDistributionModuleCheckTaskProvider {
6363
"org.elasticsearch.securesm",
6464
"org.elasticsearch.server",
6565
"org.elasticsearch.tdigest",
66+
"org.elasticsearch.vec",
6667
"org.elasticsearch.xcontent"
6768
);
6869

build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/DocSnippetTaskSpec.groovy

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -529,13 +529,15 @@ GET /_analyze
529529
{
530530
"type": "mapping",
531531
"mappings": [
532-
"٠ => 0",
533-
"١ => 1",
534-
"٢ => 2"
532+
"e => 0",
533+
"m => 1",
534+
"p => 2",
535+
"t => 3",
536+
"y => 4"
535537
]
536538
}
537539
],
538-
"text": "My license plate is ٢٥٠١٥"
540+
"text": "My license plate is empty"
539541
}
540542
----
541543
"""
@@ -551,13 +553,15 @@ GET /_analyze
551553
{
552554
"type": "mapping",
553555
"mappings": [
554-
"٠ => 0",
555-
"١ => 1",
556-
"٢ => 2"
556+
"e => 0",
557+
"m => 1",
558+
"p => 2",
559+
"t => 3",
560+
"y => 4"
557561
]
558562
}
559563
],
560-
"text": "My license plate is ٢٥٠١٥"
564+
"text": "My license plate is empty"
561565
}"""
562566
}
563567

build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/doc/RestTestsFromDocSnippetTaskSpec.groovy

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -454,11 +454,11 @@ Requires the `manage_ml` cluster privilege. This privilege is included in the
454454
455455
`<job_id>`::
456456
(Required, string)
457-
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
457+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=job-id-anomaly-detection]
458458
459459
`<snapshot_id>`::
460460
(Required, string)
461-
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=snapshot-id]
461+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=snapshot-id]
462462
463463
[[ml-update-snapshot-request-body]]
464464
== {api-request-body-title}
@@ -470,7 +470,7 @@ The following properties can be updated after the model snapshot is created:
470470
471471
`retain`::
472472
(Optional, Boolean)
473-
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=retain]
473+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=retain]
474474
475475
476476
[[ml-update-snapshot-example]]

0 commit comments

Comments
 (0)