Skip to content

Commit 4f810be

Browse files
authored
Merge branch 'main' into sagemaker-elastic-update-fix
2 parents aae6051 + 1832248 commit 4f810be

File tree

7 files changed

+329
-86
lines changed

7 files changed

+329
-86
lines changed

docs/reference/query-languages/esql/_snippets/functions/layout/match_phrase.md

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/src/main/java/org/elasticsearch/index/codec/vectors/CentroidAssignments.java

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
package org.elasticsearch.index.codec.vectors;
1111

12-
record CentroidAssignments(int numCentroids, float[][] centroids, int[][] assignmentsByCluster) {
12+
record CentroidAssignments(int numCentroids, float[][] centroids, int[] assignments, int[] overspillAssignments) {
1313

14-
CentroidAssignments(float[][] centroids, int[][] assignmentsByCluster) {
15-
this(centroids.length, centroids, assignmentsByCluster);
16-
assert centroids.length == assignmentsByCluster.length;
14+
CentroidAssignments(float[][] centroids, int[] assignments, int[] overspillAssignments) {
15+
this(centroids.length, centroids, assignments, overspillAssignments);
16+
assert assignments.length == overspillAssignments.length || overspillAssignments.length == 0
17+
: "assignments and overspillAssignments must have the same length";
1718
}
1819
}

server/src/main/java/org/elasticsearch/index/codec/vectors/DefaultIVFVectorsWriter.java

Lines changed: 283 additions & 35 deletions
Large diffs are not rendered by default.

server/src/main/java/org/elasticsearch/index/codec/vectors/DiskBBQBulkWriter.java

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,34 +9,25 @@
99

1010
package org.elasticsearch.index.codec.vectors;
1111

12-
import org.apache.lucene.index.FloatVectorValues;
1312
import org.apache.lucene.store.IndexOutput;
14-
import org.apache.lucene.util.hnsw.IntToIntFunction;
1513

1614
import java.io.IOException;
1715

18-
import static org.elasticsearch.index.codec.vectors.BQVectorUtils.discretize;
19-
import static org.elasticsearch.index.codec.vectors.BQVectorUtils.packAsBinary;
20-
2116
/**
2217
* Base class for bulk writers that write vectors to disk using the BBQ encoding.
2318
* This class provides the structure for writing vectors in bulk, with specific
2419
* implementations for different bit sizes strategies.
2520
*/
26-
public abstract class DiskBBQBulkWriter {
21+
abstract class DiskBBQBulkWriter {
2722
protected final int bulkSize;
28-
protected final OptimizedScalarQuantizer quantizer;
2923
protected final IndexOutput out;
30-
protected final FloatVectorValues fvv;
3124

32-
protected DiskBBQBulkWriter(int bulkSize, OptimizedScalarQuantizer quantizer, FloatVectorValues fvv, IndexOutput out) {
25+
protected DiskBBQBulkWriter(int bulkSize, IndexOutput out) {
3326
this.bulkSize = bulkSize;
34-
this.quantizer = quantizer;
3527
this.out = out;
36-
this.fvv = fvv;
3728
}
3829

39-
public abstract void writeOrds(IntToIntFunction ords, int count, float[] centroid) throws IOException;
30+
abstract void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOException;
4031

4132
private static void writeCorrections(OptimizedScalarQuantizer.QuantizationResult[] corrections, IndexOutput out) throws IOException {
4233
for (OptimizedScalarQuantizer.QuantizationResult correction : corrections) {
@@ -64,39 +55,31 @@ private static void writeCorrection(OptimizedScalarQuantizer.QuantizationResult
6455
out.writeShort((short) targetComponentSum);
6556
}
6657

67-
public static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
68-
private final byte[] binarized;
69-
private final int[] initQuantized;
58+
static class OneBitDiskBBQBulkWriter extends DiskBBQBulkWriter {
7059
private final OptimizedScalarQuantizer.QuantizationResult[] corrections;
7160

72-
public OneBitDiskBBQBulkWriter(int bulkSize, OptimizedScalarQuantizer quantizer, FloatVectorValues fvv, IndexOutput out) {
73-
super(bulkSize, quantizer, fvv, out);
74-
this.binarized = new byte[discretize(fvv.dimension(), 64) / 8];
75-
this.initQuantized = new int[fvv.dimension()];
61+
OneBitDiskBBQBulkWriter(int bulkSize, IndexOutput out) {
62+
super(bulkSize, out);
7663
this.corrections = new OptimizedScalarQuantizer.QuantizationResult[bulkSize];
7764
}
7865

7966
@Override
80-
public void writeOrds(IntToIntFunction ords, int count, float[] centroid) throws IOException {
81-
int limit = count - bulkSize + 1;
67+
void writeVectors(DefaultIVFVectorsWriter.QuantizedVectorValues qvv) throws IOException {
68+
int limit = qvv.count() - bulkSize + 1;
8269
int i = 0;
8370
for (; i < limit; i += bulkSize) {
8471
for (int j = 0; j < bulkSize; j++) {
85-
int ord = ords.apply(i + j);
86-
float[] fv = fvv.vectorValue(ord);
87-
corrections[j] = quantizer.scalarQuantize(fv, initQuantized, (byte) 1, centroid);
88-
packAsBinary(initQuantized, binarized);
89-
out.writeBytes(binarized, binarized.length);
72+
byte[] qv = qvv.next();
73+
corrections[j] = qvv.getCorrections();
74+
out.writeBytes(qv, qv.length);
9075
}
9176
writeCorrections(corrections, out);
9277
}
9378
// write tail
94-
for (; i < count; ++i) {
95-
int ord = ords.apply(i);
96-
float[] fv = fvv.vectorValue(ord);
97-
OptimizedScalarQuantizer.QuantizationResult correction = quantizer.scalarQuantize(fv, initQuantized, (byte) 1, centroid);
98-
packAsBinary(initQuantized, binarized);
99-
out.writeBytes(binarized, binarized.length);
79+
for (; i < qvv.count(); ++i) {
80+
byte[] qv = qvv.next();
81+
OptimizedScalarQuantizer.QuantizationResult correction = qvv.getCorrections();
82+
out.writeBytes(qv, qv.length);
10083
writeCorrection(correction, out);
10184
}
10285
}

server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,18 @@ abstract long[] buildAndWritePostingsLists(
139139
CentroidSupplier centroidSupplier,
140140
FloatVectorValues floatVectorValues,
141141
IndexOutput postingsOutput,
142-
int[][] assignmentsByCluster
142+
int[] assignments,
143+
int[] overspillAssignments
144+
) throws IOException;
145+
146+
abstract long[] buildAndWritePostingsLists(
147+
FieldInfo fieldInfo,
148+
CentroidSupplier centroidSupplier,
149+
FloatVectorValues floatVectorValues,
150+
IndexOutput postingsOutput,
151+
MergeState mergeState,
152+
int[] assignments,
153+
int[] overspillAssignments
143154
) throws IOException;
144155

145156
abstract CentroidSupplier createCentroidSupplier(
@@ -174,7 +185,8 @@ public final void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
174185
centroidSupplier,
175186
floatVectorValues,
176187
ivfClusters,
177-
centroidAssignments.assignmentsByCluster()
188+
centroidAssignments.assignments(),
189+
centroidAssignments.overspillAssignments()
178190
);
179191
// write posting lists
180192
writeMeta(fieldWriter.fieldInfo, centroidOffset, centroidLength, offsets, globalCentroid);
@@ -284,7 +296,8 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
284296
final long centroidOffset;
285297
final long centroidLength;
286298
final int numCentroids;
287-
final int[][] assignmentsByCluster;
299+
final int[] assignments;
300+
final int[] overspillAssignments;
288301
final float[] calculatedGlobalCentroid = new float[fieldInfo.getVectorDimension()];
289302
String centroidTempName = null;
290303
IndexOutput centroidTemp = null;
@@ -300,7 +313,8 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
300313
calculatedGlobalCentroid
301314
);
302315
numCentroids = centroidAssignments.numCentroids();
303-
assignmentsByCluster = centroidAssignments.assignmentsByCluster();
316+
assignments = centroidAssignments.assignments();
317+
overspillAssignments = centroidAssignments.overspillAssignments();
304318
success = true;
305319
} finally {
306320
if (success == false && centroidTempName != null) {
@@ -337,7 +351,9 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws
337351
centroidSupplier,
338352
floatVectorValues,
339353
ivfClusters,
340-
assignmentsByCluster
354+
mergeState,
355+
assignments,
356+
overspillAssignments
341357
);
342358
assert offsets.length == centroidSupplier.size();
343359
writeMeta(fieldInfo, centroidOffset, centroidLength, offsets, calculatedGlobalCentroid);

x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSenderIT.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.elasticsearch.action.index.IndexRequest;
1111
import org.elasticsearch.action.support.WriteRequest;
1212
import org.elasticsearch.cluster.metadata.IndexMetadata;
13+
import org.elasticsearch.cluster.metadata.ProjectId;
1314
import org.elasticsearch.common.settings.Settings;
1415
import org.elasticsearch.common.util.CollectionUtils;
1516
import org.elasticsearch.compute.operator.exchange.ExchangeService;
@@ -131,12 +132,8 @@ public void testRetryOnShardMovement() {
131132
(handler, request, channel, task) -> {
132133
// move index shard
133134
if (shouldMove.compareAndSet(true, false)) {
134-
var currentShardNodeId = clusterService().state()
135-
.routingTable()
136-
.index("index-1")
137-
.shard(0)
138-
.primaryShard()
139-
.currentNodeId();
135+
var shardRouting = clusterService().state().routingTable(ProjectId.DEFAULT).shardRoutingTable("index-1", 0);
136+
var currentShardNodeId = shardRouting.primaryShard().currentNodeId();
140137
assertAcked(
141138
client().admin()
142139
.indices()

x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/fulltext/MatchPhrase.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,7 @@ public class MatchPhrase extends FullTextFunction implements OptionalArgument, P
9090

9191
@FunctionInfo(
9292
returnType = "boolean",
93-
appliesTo = {
94-
@FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.UNAVAILABLE, version = "9.0"),
95-
@FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.1.0") },
93+
appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.GA, version = "9.1.0") },
9694
description = """
9795
Use `MATCH_PHRASE` to perform a [`match_phrase`](/reference/query-languages/query-dsl/query-dsl-match-query-phrase.md) on the
9896
specified field.

0 commit comments

Comments
 (0)