Skip to content

Commit a9189e3

Browse files
committed
Merge branch 'main' into lucene_snapshot
2 parents 124bf4b + 6edc76d commit a9189e3

File tree

184 files changed

+9928
-923
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

184 files changed

+9928
-923
lines changed

BUILDING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ To wire this registered cluster into a `TestClusterAware` task (e.g. `RestIntegT
144144
Additional integration tests for a certain Elasticsearch modules that are specific to certain cluster configuration can be declared in a separate so called `qa` subproject of your module.
145145

146146
The benefit of a dedicated project for these tests are:
147-
- `qa` projects are dedicated two specific use-cases and easier to maintain
147+
- `qa` projects are dedicated to specific use-cases and easier to maintain
148148
- It keeps the specific test logic separated from the common test logic.
149149
- You can run those tests in parallel to other projects of the build.
150150

benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.util.NumericUtils;
2626
import org.elasticsearch.common.breaker.NoopCircuitBreaker;
2727
import org.elasticsearch.common.lucene.Lucene;
28+
import org.elasticsearch.common.settings.Settings;
2829
import org.elasticsearch.common.util.BigArrays;
2930
import org.elasticsearch.compute.data.BlockFactory;
3031
import org.elasticsearch.compute.data.BytesRefBlock;
@@ -50,6 +51,7 @@
5051
import org.elasticsearch.index.mapper.MappedFieldType;
5152
import org.elasticsearch.index.mapper.NumberFieldMapper;
5253
import org.elasticsearch.search.lookup.SearchLookup;
54+
import org.elasticsearch.xpack.esql.plugin.EsqlPlugin;
5355
import org.openjdk.jmh.annotations.Benchmark;
5456
import org.openjdk.jmh.annotations.BenchmarkMode;
5557
import org.openjdk.jmh.annotations.Fork;
@@ -335,7 +337,7 @@ public void benchmark() {
335337
fields(name),
336338
List.of(new ValuesSourceReaderOperator.ShardContext(reader, () -> {
337339
throw new UnsupportedOperationException("can't load _source here");
338-
})),
340+
}, EsqlPlugin.STORED_FIELDS_SEQUENTIAL_PROPORTION.getDefault(Settings.EMPTY))),
339341
0
340342
);
341343
long sum = 0;
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
package org.elasticsearch.benchmark.vector;
10+
11+
import org.apache.lucene.index.VectorSimilarityFunction;
12+
import org.apache.lucene.store.Directory;
13+
import org.apache.lucene.store.IOContext;
14+
import org.apache.lucene.store.IndexInput;
15+
import org.apache.lucene.store.IndexOutput;
16+
import org.apache.lucene.store.MMapDirectory;
17+
import org.apache.lucene.util.VectorUtil;
18+
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
19+
import org.elasticsearch.common.logging.LogConfigurator;
20+
import org.elasticsearch.simdvec.internal.vectorization.ES91OSQVectorsScorer;
21+
import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider;
22+
import org.openjdk.jmh.annotations.Benchmark;
23+
import org.openjdk.jmh.annotations.BenchmarkMode;
24+
import org.openjdk.jmh.annotations.Fork;
25+
import org.openjdk.jmh.annotations.Measurement;
26+
import org.openjdk.jmh.annotations.Mode;
27+
import org.openjdk.jmh.annotations.OutputTimeUnit;
28+
import org.openjdk.jmh.annotations.Param;
29+
import org.openjdk.jmh.annotations.Scope;
30+
import org.openjdk.jmh.annotations.Setup;
31+
import org.openjdk.jmh.annotations.State;
32+
import org.openjdk.jmh.annotations.Warmup;
33+
import org.openjdk.jmh.infra.Blackhole;
34+
35+
import java.io.IOException;
36+
import java.nio.file.Files;
37+
import java.util.Random;
38+
import java.util.concurrent.TimeUnit;
39+
40+
@BenchmarkMode(Mode.Throughput)
41+
@OutputTimeUnit(TimeUnit.MILLISECONDS)
42+
@State(Scope.Benchmark)
43+
// first iteration is complete garbage, so make sure we really warmup
44+
@Warmup(iterations = 4, time = 1)
45+
// real iterations. not useful to spend tons of time here, better to fork more
46+
@Measurement(iterations = 5, time = 1)
47+
// engage some noise reduction
48+
@Fork(value = 1)
49+
public class OSQScorerBenchmark {
50+
51+
static {
52+
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
53+
}
54+
55+
@Param({ "1024" })
56+
int dims;
57+
58+
int length;
59+
60+
int numVectors = ES91OSQVectorsScorer.BULK_SIZE * 10;
61+
int numQueries = 10;
62+
63+
byte[][] binaryVectors;
64+
byte[][] binaryQueries;
65+
OptimizedScalarQuantizer.QuantizationResult result;
66+
float centroidDp;
67+
68+
byte[] scratch;
69+
ES91OSQVectorsScorer scorer;
70+
71+
IndexInput in;
72+
73+
float[] scratchScores;
74+
float[] corrections;
75+
76+
@Setup
77+
public void setup() throws IOException {
78+
Random random = new Random(123);
79+
80+
this.length = OptimizedScalarQuantizer.discretize(dims, 64) / 8;
81+
82+
binaryVectors = new byte[numVectors][length];
83+
for (byte[] binaryVector : binaryVectors) {
84+
random.nextBytes(binaryVector);
85+
}
86+
87+
Directory dir = new MMapDirectory(Files.createTempDirectory("vectorData"));
88+
IndexOutput out = dir.createOutput("vectors", IOContext.DEFAULT);
89+
byte[] correctionBytes = new byte[14 * ES91OSQVectorsScorer.BULK_SIZE];
90+
for (int i = 0; i < numVectors; i += ES91OSQVectorsScorer.BULK_SIZE) {
91+
for (int j = 0; j < ES91OSQVectorsScorer.BULK_SIZE; j++) {
92+
out.writeBytes(binaryVectors[i + j], 0, binaryVectors[i + j].length);
93+
}
94+
random.nextBytes(correctionBytes);
95+
out.writeBytes(correctionBytes, 0, correctionBytes.length);
96+
}
97+
out.close();
98+
in = dir.openInput("vectors", IOContext.DEFAULT);
99+
100+
binaryQueries = new byte[numVectors][4 * length];
101+
for (byte[] binaryVector : binaryVectors) {
102+
random.nextBytes(binaryVector);
103+
}
104+
result = new OptimizedScalarQuantizer.QuantizationResult(
105+
random.nextFloat(),
106+
random.nextFloat(),
107+
random.nextFloat(),
108+
Short.toUnsignedInt((short) random.nextInt())
109+
);
110+
centroidDp = random.nextFloat();
111+
112+
scratch = new byte[length];
113+
scorer = ESVectorizationProvider.getInstance().newES91OSQVectorsScorer(in, dims);
114+
scratchScores = new float[16];
115+
corrections = new float[3];
116+
}
117+
118+
@Benchmark
119+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
120+
public void scoreFromArray(Blackhole bh) throws IOException {
121+
for (int j = 0; j < numQueries; j++) {
122+
in.seek(0);
123+
for (int i = 0; i < numVectors; i++) {
124+
in.readBytes(scratch, 0, length);
125+
float qDist = VectorUtil.int4BitDotProduct(binaryQueries[j], scratch);
126+
in.readFloats(corrections, 0, corrections.length);
127+
int addition = Short.toUnsignedInt(in.readShort());
128+
float score = scorer.score(
129+
result,
130+
VectorSimilarityFunction.EUCLIDEAN,
131+
centroidDp,
132+
corrections[0],
133+
corrections[1],
134+
addition,
135+
corrections[2],
136+
qDist
137+
);
138+
bh.consume(score);
139+
}
140+
}
141+
}
142+
143+
@Benchmark
144+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
145+
public void scoreFromMemorySegmentOnlyVector(Blackhole bh) throws IOException {
146+
for (int j = 0; j < numQueries; j++) {
147+
in.seek(0);
148+
for (int i = 0; i < numVectors; i++) {
149+
float qDist = scorer.quantizeScore(binaryQueries[j]);
150+
in.readFloats(corrections, 0, corrections.length);
151+
int addition = Short.toUnsignedInt(in.readShort());
152+
float score = scorer.score(
153+
result,
154+
VectorSimilarityFunction.EUCLIDEAN,
155+
centroidDp,
156+
corrections[0],
157+
corrections[1],
158+
addition,
159+
corrections[2],
160+
qDist
161+
);
162+
bh.consume(score);
163+
}
164+
}
165+
}
166+
167+
@Benchmark
168+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
169+
public void scoreFromMemorySegmentOnlyVectorBulk(Blackhole bh) throws IOException {
170+
for (int j = 0; j < numQueries; j++) {
171+
in.seek(0);
172+
for (int i = 0; i < numVectors; i += 16) {
173+
scorer.quantizeScoreBulk(binaryQueries[j], ES91OSQVectorsScorer.BULK_SIZE, scratchScores);
174+
for (int k = 0; k < ES91OSQVectorsScorer.BULK_SIZE; k++) {
175+
in.readFloats(corrections, 0, corrections.length);
176+
int addition = Short.toUnsignedInt(in.readShort());
177+
float score = scorer.score(
178+
result,
179+
VectorSimilarityFunction.EUCLIDEAN,
180+
centroidDp,
181+
corrections[0],
182+
corrections[1],
183+
addition,
184+
corrections[2],
185+
scratchScores[k]
186+
);
187+
bh.consume(score);
188+
}
189+
}
190+
}
191+
}
192+
193+
@Benchmark
194+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
195+
public void scoreFromMemorySegmentAllBulk(Blackhole bh) throws IOException {
196+
for (int j = 0; j < numQueries; j++) {
197+
in.seek(0);
198+
for (int i = 0; i < numVectors; i += 16) {
199+
scorer.scoreBulk(binaryQueries[j], result, VectorSimilarityFunction.EUCLIDEAN, centroidDp, scratchScores);
200+
bh.consume(scratchScores);
201+
}
202+
}
203+
}
204+
}

docs/changelog/124708.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 124708
2+
summary: Throw exception for unknown token in RestIndexPutAliasAction
3+
area: Indices APIs
4+
type: enhancement
5+
issues: []

docs/changelog/124737.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 124737
2+
summary: Throw exception for unsupported values type in Alias
3+
area: Indices APIs
4+
type: enhancement
5+
issues: []

docs/changelog/125922.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 125922
2+
summary: Fix text structure NPE when fields in list have null value
3+
area: Machine Learning
4+
type: bug
5+
issues: []

docs/changelog/126286.yaml

Lines changed: 0 additions & 6 deletions
This file was deleted.

docs/changelog/126629.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 126629
2+
summary: Default new `semantic_text` fields to use BBQ when models are compatible
3+
area: Relevance
4+
type: enhancement
5+
issues: []

docs/changelog/127134.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127134
2+
summary: Define a default oversample value for dense vectors with bbq_hnsw/bbq_flat
3+
area: Vector Search
4+
type: enhancement
5+
issues: []

docs/changelog/127139.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 127139
2+
summary: Add `suggested_cast`
3+
area: ES|QL
4+
type: enhancement
5+
issues: []

0 commit comments

Comments
 (0)