Skip to content

Commit b99f44a

Browse files
authored
Merge branch 'main' into esql-exphisto-median-tests
2 parents 4f034fc + 59a8bb2 commit b99f44a

File tree

69 files changed

+1004
-343
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+1004
-343
lines changed

.buildkite/pipelines/periodic-java-ea.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,8 @@ steps:
385385
- signal_reason: agent_stop
386386
limit: 3
387387

388-
- label: 9.0.7 / bwc
389-
command: .ci/scripts/run-gradle.sh -Druntime.java=$$JAVA_EA_VERSION -Dbwc.checkout.align=true v9.0.7#bwcTest
388+
- label: 9.0.8 / bwc
389+
command: .ci/scripts/run-gradle.sh -Druntime.java=$$JAVA_EA_VERSION -Dbwc.checkout.align=true v9.0.8#bwcTest
390390
timeout_in_minutes: 300
391391
agents:
392392
provider: gcp
@@ -395,7 +395,7 @@ steps:
395395
buildDirectory: /dev/shm/bk
396396
preemptible: true
397397
env:
398-
BWC_VERSION: 9.0.7
398+
BWC_VERSION: 9.0.8
399399
retry:
400400
automatic:
401401
- exit_status: "-1"

.buildkite/pipelines/periodic-packaging.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,8 +352,8 @@ steps:
352352
env:
353353
BWC_VERSION: 8.19.8
354354

355-
- label: "{{matrix.image}} / 9.0.7 / packaging-tests-upgrade"
356-
command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v9.0.7
355+
- label: "{{matrix.image}} / 9.0.8 / packaging-tests-upgrade"
356+
command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v9.0.8
357357
timeout_in_minutes: 300
358358
matrix:
359359
setup:
@@ -366,7 +366,7 @@ steps:
366366
machineType: custom-16-32768
367367
buildDirectory: /dev/shm/bk
368368
env:
369-
BWC_VERSION: 9.0.7
369+
BWC_VERSION: 9.0.8
370370

371371
- label: "{{matrix.image}} / 9.1.8 / packaging-tests-upgrade"
372372
command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v9.1.8

.buildkite/pipelines/periodic.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,8 +382,8 @@ steps:
382382
- signal_reason: agent_stop
383383
limit: 3
384384

385-
- label: 9.0.7 / bwc
386-
command: .ci/scripts/run-gradle.sh --continue -Dbwc.checkout.align=true v9.0.7#bwcTest
385+
- label: 9.0.8 / bwc
386+
command: .ci/scripts/run-gradle.sh --continue -Dbwc.checkout.align=true v9.0.8#bwcTest
387387
timeout_in_minutes: 300
388388
agents:
389389
provider: gcp
@@ -392,7 +392,7 @@ steps:
392392
buildDirectory: /dev/shm/bk
393393
preemptible: true
394394
env:
395-
BWC_VERSION: 9.0.7
395+
BWC_VERSION: 9.0.8
396396
retry:
397397
automatic:
398398
- exit_status: "-1"

.ci/bwcVersions

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ BWC_VERSION:
1919
- "8.17.10"
2020
- "8.18.8"
2121
- "8.19.8"
22-
- "9.0.7"
22+
- "9.0.8"
2323
- "9.1.8"
2424
- "9.2.2"
2525
- "9.3.0"
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.benchmark.vector.scorer;
11+
12+
import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer;
13+
import org.apache.lucene.codecs.lucene99.OffHeapQuantizedByteVectorValues;
14+
import org.apache.lucene.index.VectorSimilarityFunction;
15+
import org.apache.lucene.store.Directory;
16+
import org.apache.lucene.store.IOContext;
17+
import org.apache.lucene.store.IndexInput;
18+
import org.apache.lucene.store.IndexOutput;
19+
import org.apache.lucene.store.MemorySegmentAccessInput;
20+
import org.apache.lucene.util.hnsw.RandomVectorScorer;
21+
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
22+
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
23+
import org.apache.lucene.util.quantization.ScalarQuantizer;
24+
import org.elasticsearch.simdvec.VectorScorerFactory;
25+
26+
import java.io.IOException;
27+
import java.util.concurrent.ThreadLocalRandom;
28+
29+
class BenchmarkUtils {
30+
// Unsigned int7 byte vectors have values in the range of 0 to 127 (inclusive).
31+
static final byte MIN_INT7_VALUE = 0;
32+
static final byte MAX_INT7_VALUE = 127;
33+
34+
static void randomInt7BytesBetween(byte[] bytes) {
35+
var random = ThreadLocalRandom.current();
36+
for (int i = 0, len = bytes.length; i < len;) {
37+
bytes[i++] = (byte) random.nextInt(MIN_INT7_VALUE, MAX_INT7_VALUE + 1);
38+
}
39+
}
40+
41+
static void createRandomInt7VectorData(ThreadLocalRandom random, Directory dir, int dims, int numVectors) throws IOException {
42+
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
43+
var vec = new byte[dims];
44+
for (int v = 0; v < numVectors; v++) {
45+
randomInt7BytesBetween(vec);
46+
var vecOffset = random.nextFloat();
47+
out.writeBytes(vec, 0, vec.length);
48+
out.writeInt(Float.floatToIntBits(vecOffset));
49+
}
50+
}
51+
}
52+
53+
static VectorScorerFactory getScorerFactoryOrDie() {
54+
var optionalVectorScorerFactory = VectorScorerFactory.instance();
55+
if (optionalVectorScorerFactory.isEmpty()) {
56+
String msg = "JDK=["
57+
+ Runtime.version()
58+
+ "], os.name=["
59+
+ System.getProperty("os.name")
60+
+ "], os.arch=["
61+
+ System.getProperty("os.arch")
62+
+ "]";
63+
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
64+
}
65+
return optionalVectorScorerFactory.get();
66+
}
67+
68+
static boolean supportsHeapSegments() {
69+
return Runtime.version().feature() >= 22;
70+
}
71+
72+
static QuantizedByteVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
73+
var sq = new ScalarQuantizer(0.1f, 0.9f, (byte) 7);
74+
var slice = in.slice("values", 0, in.length());
75+
return new OffHeapQuantizedByteVectorValues.DenseOffHeapVectorValues(dims, size, sq, false, sim, null, slice);
76+
}
77+
78+
static RandomVectorScorerSupplier luceneScoreSupplier(QuantizedByteVectorValues values, VectorSimilarityFunction sim)
79+
throws IOException {
80+
return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorerSupplier(sim, values);
81+
}
82+
83+
static RandomVectorScorer luceneScorer(QuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVec)
84+
throws IOException {
85+
return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec);
86+
}
87+
88+
static float readNodeCorrectionConstant(QuantizedByteVectorValues values, int targetOrd) throws IOException {
89+
var vectorByteSize = values.getVectorByteLength();
90+
var input = (MemorySegmentAccessInput) values.getSlice();
91+
long byteOffset = (long) targetOrd * (vectorByteSize + Float.BYTES);
92+
return Float.intBitsToFloat(input.readInt(byteOffset + vectorByteSize));
93+
}
94+
}

benchmarks/src/main/java/org/elasticsearch/benchmark/vector/scorer/VectorScorerInt7uBenchmark.java

Lines changed: 50 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,13 @@
99

1010
package org.elasticsearch.benchmark.vector.scorer;
1111

12-
import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorScorer;
13-
import org.apache.lucene.codecs.lucene99.OffHeapQuantizedByteVectorValues;
1412
import org.apache.lucene.index.VectorSimilarityFunction;
1513
import org.apache.lucene.store.Directory;
1614
import org.apache.lucene.store.IOContext;
1715
import org.apache.lucene.store.IndexInput;
18-
import org.apache.lucene.store.IndexOutput;
1916
import org.apache.lucene.store.MMapDirectory;
2017
import org.apache.lucene.util.hnsw.RandomVectorScorer;
21-
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
2218
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
23-
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
24-
import org.apache.lucene.util.quantization.ScalarQuantizer;
2519
import org.elasticsearch.common.logging.LogConfigurator;
2620
import org.elasticsearch.core.IOUtils;
2721
import org.elasticsearch.logging.LogManager;
@@ -42,23 +36,31 @@
4236

4337
import java.io.IOException;
4438
import java.nio.file.Files;
39+
import java.nio.file.Path;
4540
import java.util.concurrent.ThreadLocalRandom;
4641
import java.util.concurrent.TimeUnit;
4742

43+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.createRandomInt7VectorData;
44+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.getScorerFactoryOrDie;
45+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.luceneScoreSupplier;
46+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.luceneScorer;
47+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.readNodeCorrectionConstant;
48+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.supportsHeapSegments;
49+
import static org.elasticsearch.benchmark.vector.scorer.BenchmarkUtils.vectorValues;
4850
import static org.elasticsearch.simdvec.VectorSimilarityType.DOT_PRODUCT;
4951
import static org.elasticsearch.simdvec.VectorSimilarityType.EUCLIDEAN;
5052

53+
/**
54+
* Benchmark that compares various scalar quantized vector similarity function
55+
* implementations: scalar, lucene's panama-ized, and Elasticsearch's native.
56+
* Run with ./gradlew -p benchmarks run --args 'VectorScorerInt7uBenchmark'
57+
*/
5158
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
5259
@Warmup(iterations = 3, time = 3)
5360
@Measurement(iterations = 5, time = 3)
5461
@BenchmarkMode(Mode.Throughput)
5562
@OutputTimeUnit(TimeUnit.MICROSECONDS)
5663
@State(Scope.Thread)
57-
/**
58-
* Benchmark that compares various scalar quantized vector similarity function
59-
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
60-
* Run with ./gradlew -p benchmarks run --args 'Int7uScorerBenchmark'
61-
*/
6264
public class VectorScorerInt7uBenchmark {
6365

6466
static {
@@ -71,15 +73,16 @@ public class VectorScorerInt7uBenchmark {
7173

7274
@Param({ "96", "768", "1024" })
7375
public int dims;
74-
final int size = 2; // there are only two vectors to compare
76+
public int numVectors = 2; // there are only two vectors to compare
7577

78+
Path path;
7679
Directory dir;
7780
IndexInput in;
7881
VectorScorerFactory factory;
7982

8083
byte[] vec1, vec2;
81-
float vec1Offset;
82-
float vec2Offset;
84+
float vec1CorrectionConstant;
85+
float vec2CorrectionConstant;
8386
float scoreCorrectionConstant;
8487

8588
UpdateableRandomVectorScorer luceneDotScorer;
@@ -94,45 +97,34 @@ public class VectorScorerInt7uBenchmark {
9497

9598
@Setup
9699
public void setup() throws IOException {
97-
var optionalVectorScorerFactory = VectorScorerFactory.instance();
98-
if (optionalVectorScorerFactory.isEmpty()) {
99-
String msg = "JDK=["
100-
+ Runtime.version()
101-
+ "], os.name=["
102-
+ System.getProperty("os.name")
103-
+ "], os.arch=["
104-
+ System.getProperty("os.arch")
105-
+ "]";
106-
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
107-
}
108-
factory = optionalVectorScorerFactory.get();
109-
vec1 = new byte[dims];
110-
vec2 = new byte[dims];
111-
112-
randomInt7BytesBetween(vec1);
113-
randomInt7BytesBetween(vec2);
114-
vec1Offset = ThreadLocalRandom.current().nextFloat();
115-
vec2Offset = ThreadLocalRandom.current().nextFloat();
116-
117-
dir = new MMapDirectory(Files.createTempDirectory("nativeScalarQuantBench"));
118-
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
119-
out.writeBytes(vec1, 0, vec1.length);
120-
out.writeInt(Float.floatToIntBits(vec1Offset));
121-
out.writeBytes(vec2, 0, vec2.length);
122-
out.writeInt(Float.floatToIntBits(vec2Offset));
123-
}
100+
factory = getScorerFactoryOrDie();
101+
102+
var random = ThreadLocalRandom.current();
103+
path = Files.createTempDirectory("Int7uScorerBenchmark");
104+
dir = new MMapDirectory(path);
105+
createRandomInt7VectorData(random, dir, dims, numVectors);
106+
124107
in = dir.openInput("vector.data", IOContext.DEFAULT);
125-
var values = vectorValues(dims, 2, in, VectorSimilarityFunction.DOT_PRODUCT);
126-
scoreCorrectionConstant = values.getScalarQuantizer().getConstantMultiplier();
127-
luceneDotScorer = luceneScoreSupplier(values, VectorSimilarityFunction.DOT_PRODUCT).scorer();
108+
final var dotProductValues = vectorValues(dims, numVectors, in, VectorSimilarityFunction.DOT_PRODUCT);
109+
scoreCorrectionConstant = dotProductValues.getScalarQuantizer().getConstantMultiplier();
110+
luceneDotScorer = luceneScoreSupplier(dotProductValues, VectorSimilarityFunction.DOT_PRODUCT).scorer();
128111
luceneDotScorer.setScoringOrdinal(0);
129-
values = vectorValues(dims, 2, in, VectorSimilarityFunction.EUCLIDEAN);
130-
luceneSqrScorer = luceneScoreSupplier(values, VectorSimilarityFunction.EUCLIDEAN).scorer();
131-
luceneSqrScorer.setScoringOrdinal(0);
132-
133-
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, values, scoreCorrectionConstant).get().scorer();
112+
nativeDotScorer = factory.getInt7SQVectorScorerSupplier(DOT_PRODUCT, in, dotProductValues, scoreCorrectionConstant)
113+
.orElseThrow()
114+
.scorer();
134115
nativeDotScorer.setScoringOrdinal(0);
135-
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, values, scoreCorrectionConstant).get().scorer();
116+
117+
vec1 = dotProductValues.vectorValue(0).clone();
118+
vec1CorrectionConstant = readNodeCorrectionConstant(dotProductValues, 0);
119+
vec2 = dotProductValues.vectorValue(1).clone();
120+
vec2CorrectionConstant = readNodeCorrectionConstant(dotProductValues, 1);
121+
122+
final var euclideanValues = vectorValues(dims, numVectors, in, VectorSimilarityFunction.EUCLIDEAN);
123+
luceneSqrScorer = luceneScoreSupplier(euclideanValues, VectorSimilarityFunction.EUCLIDEAN).scorer();
124+
luceneSqrScorer.setScoringOrdinal(0);
125+
nativeSqrScorer = factory.getInt7SQVectorScorerSupplier(EUCLIDEAN, in, euclideanValues, scoreCorrectionConstant)
126+
.orElseThrow()
127+
.scorer();
136128
nativeSqrScorer.setScoringOrdinal(0);
137129

138130
if (supportsHeapSegments()) {
@@ -141,16 +133,19 @@ public void setup() throws IOException {
141133
for (int i = 0; i < dims; i++) {
142134
queryVec[i] = ThreadLocalRandom.current().nextFloat();
143135
}
144-
luceneDotScorerQuery = luceneScorer(values, VectorSimilarityFunction.DOT_PRODUCT, queryVec);
145-
nativeDotScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.DOT_PRODUCT, values, queryVec).get();
146-
luceneSqrScorerQuery = luceneScorer(values, VectorSimilarityFunction.EUCLIDEAN, queryVec);
147-
nativeSqrScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.EUCLIDEAN, values, queryVec).get();
136+
luceneDotScorerQuery = luceneScorer(dotProductValues, VectorSimilarityFunction.DOT_PRODUCT, queryVec);
137+
nativeDotScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.DOT_PRODUCT, dotProductValues, queryVec)
138+
.orElseThrow();
139+
luceneSqrScorerQuery = luceneScorer(euclideanValues, VectorSimilarityFunction.EUCLIDEAN, queryVec);
140+
nativeSqrScorerQuery = factory.getInt7SQVectorScorer(VectorSimilarityFunction.EUCLIDEAN, euclideanValues, queryVec)
141+
.orElseThrow();
148142
}
149143
}
150144

151145
@TearDown
152146
public void teardown() throws IOException {
153147
IOUtils.close(dir, in);
148+
IOUtils.rm(path);
154149
}
155150

156151
@Benchmark
@@ -169,7 +164,7 @@ public float dotProductScalar() {
169164
for (int i = 0; i < vec1.length; i++) {
170165
dotProduct += vec1[i] * vec2[i];
171166
}
172-
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset;
167+
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1CorrectionConstant + vec2CorrectionConstant;
173168
return (1 + adjustedDistance) / 2;
174169
}
175170

@@ -215,33 +210,4 @@ public float squareDistanceLuceneQuery() throws IOException {
215210
public float squareDistanceNativeQuery() throws IOException {
216211
return nativeSqrScorerQuery.score(1);
217212
}
218-
219-
static boolean supportsHeapSegments() {
220-
return Runtime.version().feature() >= 22;
221-
}
222-
223-
QuantizedByteVectorValues vectorValues(int dims, int size, IndexInput in, VectorSimilarityFunction sim) throws IOException {
224-
var sq = new ScalarQuantizer(0.1f, 0.9f, (byte) 7);
225-
var slice = in.slice("values", 0, in.length());
226-
return new OffHeapQuantizedByteVectorValues.DenseOffHeapVectorValues(dims, size, sq, false, sim, null, slice);
227-
}
228-
229-
RandomVectorScorerSupplier luceneScoreSupplier(QuantizedByteVectorValues values, VectorSimilarityFunction sim) throws IOException {
230-
return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorerSupplier(sim, values);
231-
}
232-
233-
RandomVectorScorer luceneScorer(QuantizedByteVectorValues values, VectorSimilarityFunction sim, float[] queryVec) throws IOException {
234-
return new Lucene99ScalarQuantizedVectorScorer(null).getRandomVectorScorer(sim, values, queryVec);
235-
}
236-
237-
// Unsigned int7 byte vectors have values in the range of 0 to 127 (inclusive).
238-
static final byte MIN_INT7_VALUE = 0;
239-
static final byte MAX_INT7_VALUE = 127;
240-
241-
static void randomInt7BytesBetween(byte[] bytes) {
242-
var random = ThreadLocalRandom.current();
243-
for (int i = 0, len = bytes.length; i < len;) {
244-
bytes[i++] = (byte) random.nextInt(MIN_INT7_VALUE, MAX_INT7_VALUE + 1);
245-
}
246-
}
247213
}

0 commit comments

Comments
 (0)