Skip to content

Commit 01bd097

Browse files
committed
Merge branch 'main' into fix-multiple-reduce-errors-assertion
2 parents 476eeb8 + 789101b commit 01bd097

File tree

181 files changed

+4711
-3063
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

181 files changed

+4711
-3063
lines changed

benchmarks/src/main/java/org/elasticsearch/benchmark/vector/Int4ScorerBenchmark.java

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,14 @@
88
*/
99
package org.elasticsearch.benchmark.vector;
1010

11+
import org.apache.lucene.index.VectorSimilarityFunction;
1112
import org.apache.lucene.store.Directory;
1213
import org.apache.lucene.store.IOContext;
1314
import org.apache.lucene.store.IndexInput;
1415
import org.apache.lucene.store.IndexOutput;
1516
import org.apache.lucene.store.MMapDirectory;
1617
import org.apache.lucene.util.VectorUtil;
18+
import org.apache.lucene.util.quantization.OptimizedScalarQuantizer;
1719
import org.elasticsearch.common.logging.LogConfigurator;
1820
import org.elasticsearch.core.IOUtils;
1921
import org.elasticsearch.simdvec.ES91Int4VectorsScorer;
@@ -52,20 +54,26 @@ public class Int4ScorerBenchmark {
5254
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
5355
}
5456

55-
@Param({ "384", "702", "1024" })
57+
@Param({ "384", "782", "1024" })
5658
int dims;
5759

58-
int numVectors = 200;
59-
int numQueries = 10;
60+
int numVectors = 20 * ES91Int4VectorsScorer.BULK_SIZE;
61+
int numQueries = 5;
6062

6163
byte[] scratch;
6264
byte[][] binaryVectors;
6365
byte[][] binaryQueries;
66+
float[] scores = new float[ES91Int4VectorsScorer.BULK_SIZE];
67+
68+
float[] scratchFloats = new float[3];
6469

6570
ES91Int4VectorsScorer scorer;
6671
Directory dir;
6772
IndexInput in;
6873

74+
OptimizedScalarQuantizer.QuantizationResult queryCorrections;
75+
float centroidDp;
76+
6977
@Setup
7078
public void setup() throws IOException {
7179
binaryVectors = new byte[numVectors][dims];
@@ -77,9 +85,19 @@ public void setup() throws IOException {
7785
binaryVector[i] = (byte) ThreadLocalRandom.current().nextInt(16);
7886
}
7987
out.writeBytes(binaryVector, 0, binaryVector.length);
88+
ThreadLocalRandom.current().nextBytes(binaryVector);
89+
out.writeBytes(binaryVector, 0, 14); // corrections
8090
}
8191
}
8292

93+
queryCorrections = new OptimizedScalarQuantizer.QuantizationResult(
94+
ThreadLocalRandom.current().nextFloat(),
95+
ThreadLocalRandom.current().nextFloat(),
96+
ThreadLocalRandom.current().nextFloat(),
97+
Short.toUnsignedInt((short) ThreadLocalRandom.current().nextInt())
98+
);
99+
centroidDp = ThreadLocalRandom.current().nextFloat();
100+
83101
in = dir.openInput("vectors", IOContext.DEFAULT);
84102
binaryQueries = new byte[numVectors][dims];
85103
for (byte[] binaryVector : binaryVectors) {
@@ -105,18 +123,66 @@ public void scoreFromArray(Blackhole bh) throws IOException {
105123
in.seek(0);
106124
for (int i = 0; i < numVectors; i++) {
107125
in.readBytes(scratch, 0, dims);
108-
bh.consume(VectorUtil.int4DotProduct(binaryQueries[j], scratch));
126+
int dp = VectorUtil.int4DotProduct(binaryQueries[j], scratch);
127+
in.readFloats(scratchFloats, 0, 3);
128+
float score = scorer.applyCorrections(
129+
queryCorrections.lowerInterval(),
130+
queryCorrections.upperInterval(),
131+
queryCorrections.quantizedComponentSum(),
132+
queryCorrections.additionalCorrection(),
133+
VectorSimilarityFunction.EUCLIDEAN,
134+
centroidDp, // assuming no centroid dot product for this benchmark
135+
scratchFloats[0],
136+
scratchFloats[1],
137+
Short.toUnsignedInt(in.readShort()),
138+
scratchFloats[2],
139+
dp
140+
);
141+
bh.consume(score);
109142
}
110143
}
111144
}
112145

113146
@Benchmark
114147
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
115-
public void scoreFromMemorySegmentOnlyVector(Blackhole bh) throws IOException {
148+
public void scoreFromMemorySegment(Blackhole bh) throws IOException {
116149
for (int j = 0; j < numQueries; j++) {
117150
in.seek(0);
118151
for (int i = 0; i < numVectors; i++) {
119-
bh.consume(scorer.int4DotProduct(binaryQueries[j]));
152+
bh.consume(
153+
scorer.score(
154+
binaryQueries[j],
155+
queryCorrections.lowerInterval(),
156+
queryCorrections.upperInterval(),
157+
queryCorrections.quantizedComponentSum(),
158+
queryCorrections.additionalCorrection(),
159+
VectorSimilarityFunction.EUCLIDEAN,
160+
centroidDp
161+
)
162+
);
163+
}
164+
}
165+
}
166+
167+
@Benchmark
168+
@Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
169+
public void scoreFromMemorySegmentBulk(Blackhole bh) throws IOException {
170+
for (int j = 0; j < numQueries; j++) {
171+
in.seek(0);
172+
for (int i = 0; i < numVectors; i += ES91Int4VectorsScorer.BULK_SIZE) {
173+
scorer.scoreBulk(
174+
binaryQueries[j],
175+
queryCorrections.lowerInterval(),
176+
queryCorrections.upperInterval(),
177+
queryCorrections.quantizedComponentSum(),
178+
queryCorrections.additionalCorrection(),
179+
VectorSimilarityFunction.EUCLIDEAN,
180+
centroidDp,
181+
scores
182+
);
183+
for (float score : scores) {
184+
bh.consume(score);
185+
}
120186
}
121187
}
122188
}

distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/MachineDependentHeap.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ public class MachineDependentHeap {
4040

4141
private static final FeatureFlag NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG = new FeatureFlag("new_ml_memory_computation");
4242

43+
private boolean useNewMlMemoryComputation = false;
44+
4345
public MachineDependentHeap() {}
4446

4547
/**
@@ -55,6 +57,11 @@ public final List<String> determineHeapSettings(
5557
SystemMemoryInfo systemMemoryInfo,
5658
List<String> userDefinedJvmOptions
5759
) throws IOException, InterruptedException {
60+
if (userDefinedJvmOptions.contains("-Des.new_ml_memory_computation_feature_flag_enabled=true")
61+
|| NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG.isEnabled()) {
62+
useNewMlMemoryComputation = true;
63+
}
64+
5865
// TODO: this could be more efficient, to only parse final options once
5966
final Map<String, JvmOption> finalJvmOptions = JvmOption.findFinalOptions(userDefinedJvmOptions);
6067
if (isMaxHeapSpecified(finalJvmOptions) || isMinHeapSpecified(finalJvmOptions) || isInitialHeapSpecified(finalJvmOptions)) {
@@ -107,7 +114,7 @@ protected int getHeapSizeMb(Settings nodeSettings, MachineNodeRole role, long av
107114
case ML_ONLY -> {
108115
double heapFractionBelow16GB = 0.4;
109116
double heapFractionAbove16GB = 0.1;
110-
if (NEW_ML_MEMORY_COMPUTATION_FEATURE_FLAG.isEnabled()) {
117+
if (useNewMlMemoryComputation) {
111118
heapFractionBelow16GB = 0.4 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
112119
heapFractionAbove16GB = 0.1 / (1.0 + JvmErgonomics.DIRECT_MEMORY_TO_HEAP_FACTOR);
113120
}

docs/changelog/130947.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 130947
2+
summary: "[main]Prepare Index Like fix for backport to 9.1 and 8.19"
3+
area: ES|QL
4+
type: bug
5+
issues: []

docs/changelog/131015.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131015
2+
summary: Move streams status actions to cluster:monitor group
3+
area: Data streams
4+
type: bug
5+
issues: []

docs/changelog/131032.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131032
2+
summary: "Fix: `GET _synonyms` returns synonyms with empty rules"
3+
area: Relevance
4+
type: bug
5+
issues: []

docs/changelog/131056.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131056
2+
summary: Add existing shards allocator settings to failure store allowed list
3+
area: Data streams
4+
type: bug
5+
issues: []

docs/changelog/131081.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 131081
2+
summary: Fix knn search error when dimensions are not set
3+
area: Vector Search
4+
type: bug
5+
issues:
6+
- 129550

docs/changelog/131111.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131111
2+
summary: Don't allow field caps to use semantic queries as index filters
3+
area: Search
4+
type: bug
5+
issues: []

docs/changelog/131113.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131113
2+
summary: Including `max_tokens` through the Service API for Anthropic
3+
area: Machine Learning
4+
type: bug
5+
issues: []

docs/reference/query-languages/esql/limitations.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,3 +250,6 @@ Work around this limitation by converting the field to single value with one of
250250
* CSV export from Discover shows no more than 10,000 rows. This limit only applies to the number of rows that are retrieved by the query and displayed in Discover. Queries and aggregations run on the full data set.
251251
* Querying many indices at once without any filters can cause an error in kibana which looks like `[esql] > Unexpected error from Elasticsearch: The content length (536885793) is bigger than the maximum allowed string (536870888)`. The response from {{esql}} is too long. Use [`DROP`](/reference/query-languages/esql/commands/processing-commands.md#esql-drop) or [`KEEP`](/reference/query-languages/esql/commands/processing-commands.md#esql-keep) to limit the number of fields returned.
252252

253+
## Known issues [esql-known-issues]
254+
255+
Refer to [Known issues](/release-notes/known-issues.md) for a list of known issues for {{esql}}.

0 commit comments

Comments
 (0)