Skip to content

Commit 3776a01

Browse files
committed
Use a separate option
1 parent 799daaa commit 3776a01

File tree

5 files changed

+60
-46
lines changed

5 files changed

+60
-46
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
2727
import org.apache.lucene.index.SegmentReadState;
2828
import org.apache.lucene.index.SegmentWriteState;
29-
import org.elasticsearch.core.SuppressForbidden;
3029
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
3130

3231
import java.io.IOException;

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 44 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,8 @@ private DenseVectorIndexOptions defaultIndexOptions(boolean defaultInt8Hnsw, boo
388388
return new BBQHnswIndexOptions(
389389
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
390390
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH,
391-
null
391+
new RescoreVector(DEFAULT_OVERSAMPLE),
392+
false
392393
);
393394
} else if (defaultInt8Hnsw) {
394395
return new Int8HnswIndexOptions(
@@ -1622,6 +1623,8 @@ public boolean supportsDimension(int dims) {
16221623
public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map<String, ?> indexOptionsMap, IndexVersion indexVersion) {
16231624
Object mNode = indexOptionsMap.remove("m");
16241625
Object efConstructionNode = indexOptionsMap.remove("ef_construction");
1626+
Object useDirectIONode = indexOptionsMap.remove("use_direct_io");
1627+
16251628
if (mNode == null) {
16261629
mNode = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
16271630
}
@@ -1630,12 +1633,19 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map<String, ?
16301633
}
16311634
int m = XContentMapValues.nodeIntegerValue(mNode);
16321635
int efConstruction = XContentMapValues.nodeIntegerValue(efConstructionNode);
1636+
16331637
RescoreVector rescoreVector = null;
16341638
if (hasRescoreIndexVersion(indexVersion)) {
16351639
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1640+
if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) {
1641+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1642+
}
16361643
}
1644+
1645+
boolean useDirectIO = XContentMapValues.nodeBooleanValue("use_direct_io", false);
1646+
16371647
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
1638-
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector);
1648+
return new BBQHnswIndexOptions(m, efConstruction, rescoreVector, useDirectIO);
16391649
}
16401650

16411651
@Override
@@ -1654,6 +1664,9 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map<String, ?
16541664
RescoreVector rescoreVector = null;
16551665
if (hasRescoreIndexVersion(indexVersion)) {
16561666
rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1667+
if (rescoreVector == null && defaultOversampleForBBQ(indexVersion)) {
1668+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1669+
}
16571670
}
16581671
MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap);
16591672
return new BBQFlatIndexOptions(rescoreVector);
@@ -1688,6 +1701,9 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map<String, ?
16881701
}
16891702
}
16901703
RescoreVector rescoreVector = RescoreVector.fromIndexOptions(indexOptionsMap, indexVersion);
1704+
if (rescoreVector == null) {
1705+
rescoreVector = new RescoreVector(DEFAULT_OVERSAMPLE);
1706+
}
16911707
Object nProbeNode = indexOptionsMap.remove("default_n_probe");
16921708
int nProbe = -1;
16931709
if (nProbeNode != null) {
@@ -2165,18 +2181,19 @@ public String toString() {
21652181
public static class BBQHnswIndexOptions extends QuantizedIndexOptions {
21662182
private final int m;
21672183
private final int efConstruction;
2184+
private final boolean useDirectIO;
21682185

2169-
public BBQHnswIndexOptions(int m, int efConstruction, RescoreVector rescoreVector) {
2186+
public BBQHnswIndexOptions(int m, int efConstruction, RescoreVector rescoreVector, boolean useDirectIO) {
21702187
super(VectorIndexType.BBQ_HNSW, rescoreVector);
21712188
this.m = m;
21722189
this.efConstruction = efConstruction;
2190+
this.useDirectIO = useDirectIO;
21732191
}
21742192

21752193
@Override
21762194
KnnVectorsFormat getVectorsFormat(ElementType elementType) {
21772195
assert elementType == ElementType.FLOAT;
2178-
boolean directIO = rescoreVector != null && rescoreVector.useDirectIO != null && rescoreVector.useDirectIO;
2179-
return new ES818HnswBinaryQuantizedVectorsFormat(m, efConstruction, directIO);
2196+
return new ES818HnswBinaryQuantizedVectorsFormat(m, efConstruction, useDirectIO);
21802197
}
21812198

21822199
@Override
@@ -2187,12 +2204,15 @@ public boolean updatableTo(DenseVectorIndexOptions update) {
21872204
@Override
21882205
boolean doEquals(DenseVectorIndexOptions other) {
21892206
BBQHnswIndexOptions that = (BBQHnswIndexOptions) other;
2190-
return m == that.m && efConstruction == that.efConstruction && Objects.equals(rescoreVector, that.rescoreVector);
2207+
return m == that.m
2208+
&& efConstruction == that.efConstruction
2209+
&& Objects.equals(rescoreVector, that.rescoreVector)
2210+
&& useDirectIO == that.useDirectIO;
21912211
}
21922212

21932213
@Override
21942214
int doHashCode() {
2195-
return Objects.hash(m, efConstruction, rescoreVector);
2215+
return Objects.hash(m, efConstruction, rescoreVector, useDirectIO);
21962216
}
21972217

21982218
@Override
@@ -2206,6 +2226,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
22062226
builder.field("type", type);
22072227
builder.field("m", m);
22082228
builder.field("ef_construction", efConstruction);
2229+
if (useDirectIO) {
2230+
builder.field("use_direct_io", true);
2231+
}
22092232
if (rescoreVector != null) {
22102233
rescoreVector.toXContent(builder, params);
22112234
}
@@ -2335,46 +2358,36 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
23352358
}
23362359
}
23372360

2338-
public record RescoreVector(Float oversample, Boolean useDirectIO) implements ToXContentObject {
2361+
public record RescoreVector(float oversample) implements ToXContentObject {
23392362
static final String NAME = "rescore_vector";
23402363
static final String OVERSAMPLE = "oversample";
2341-
static final String DIRECT_IO = "direct_io";
23422364

23432365
static RescoreVector fromIndexOptions(Map<String, ?> indexOptionsMap, IndexVersion indexVersion) {
23442366
Object rescoreVectorNode = indexOptionsMap.remove(NAME);
23452367
if (rescoreVectorNode == null) {
23462368
return null;
23472369
}
23482370
Map<String, Object> mappedNode = XContentMapValues.nodeMapValue(rescoreVectorNode, NAME);
2349-
2350-
Float oversampleValue = null;
23512371
Object oversampleNode = mappedNode.get(OVERSAMPLE);
2352-
if (oversampleNode != null) {
2353-
oversampleValue = (float) XContentMapValues.nodeDoubleValue(oversampleNode);
2354-
if (oversampleValue == 0 && allowsZeroRescore(indexVersion) == false) {
2355-
throw new IllegalArgumentException("oversample must be greater than 1");
2356-
}
2357-
if (oversampleValue < 1 && oversampleValue != 0) {
2358-
throw new IllegalArgumentException("oversample must be greater than 1 or exactly 0");
2359-
} else if (oversampleValue > 10) {
2360-
throw new IllegalArgumentException("oversample must be less than or equal to 10");
2361-
}
2372+
if (oversampleNode == null) {
2373+
throw new IllegalArgumentException("Invalid rescore_vector value. Missing required field " + OVERSAMPLE);
23622374
}
2363-
2364-
Boolean directIO = (Boolean) mappedNode.get(DIRECT_IO);
2365-
2366-
return new RescoreVector(oversampleValue, directIO);
2375+
float oversampleValue = (float) XContentMapValues.nodeDoubleValue(oversampleNode);
2376+
if (oversampleValue == 0 && allowsZeroRescore(indexVersion) == false) {
2377+
throw new IllegalArgumentException("oversample must be greater than 1");
2378+
}
2379+
if (oversampleValue < 1 && oversampleValue != 0) {
2380+
throw new IllegalArgumentException("oversample must be greater than 1 or exactly 0");
2381+
} else if (oversampleValue > 10) {
2382+
throw new IllegalArgumentException("oversample must be less than or equal to 10");
2383+
}
2384+
return new RescoreVector(oversampleValue);
23672385
}
23682386

23692387
@Override
23702388
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
23712389
builder.startObject(NAME);
2372-
if (oversample != null) {
2373-
builder.field(OVERSAMPLE, oversample);
2374-
}
2375-
if (useDirectIO != null) {
2376-
builder.field(DIRECT_IO, useDirectIO);
2377-
}
2390+
builder.field(OVERSAMPLE, oversample);
23782391
builder.endObject();
23792392
return builder;
23802393
}
@@ -2716,10 +2729,6 @@ && isNotUnitVector(squaredMagnitude)) {
27162729
&& quantizedIndexOptions.rescoreVector != null) {
27172730
oversample = quantizedIndexOptions.rescoreVector.oversample;
27182731
}
2719-
if (oversample == null) {
2720-
oversample = DEFAULT_OVERSAMPLE;
2721-
}
2722-
27232732
boolean rescore = needsRescore(oversample);
27242733
if (rescore) {
27252734
// Will get k * oversample for rescoring, and get the top k

server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldTypeTests.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,10 +55,7 @@ public DenseVectorFieldTypeTests() {
5555
}
5656

5757
private static DenseVectorFieldMapper.RescoreVector randomRescoreVector() {
58-
return new DenseVectorFieldMapper.RescoreVector(
59-
randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false),
60-
randomOptionalBoolean()
61-
);
58+
return new DenseVectorFieldMapper.RescoreVector(randomBoolean() ? 0 : randomFloatBetween(1.0F, 10.0F, false));
6259
}
6360

6461
private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsNonQuantized() {
@@ -95,7 +92,8 @@ public static DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsA
9592
new DenseVectorFieldMapper.BBQHnswIndexOptions(
9693
randomIntBetween(1, 100),
9794
randomIntBetween(1, 10_000),
98-
randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector())
95+
randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()),
96+
randomBoolean()
9997
),
10098
new DenseVectorFieldMapper.BBQFlatIndexOptions(randomFrom((DenseVectorFieldMapper.RescoreVector) null, randomRescoreVector()))
10199
);
@@ -121,7 +119,12 @@ private DenseVectorFieldMapper.DenseVectorIndexOptions randomIndexOptionsHnswQua
121119
randomFrom((Float) null, 0f, (float) randomDoubleBetween(0.9, 1.0, true)),
122120
rescoreVector
123121
),
124-
new DenseVectorFieldMapper.BBQHnswIndexOptions(randomIntBetween(1, 100), randomIntBetween(1, 10_000), rescoreVector)
122+
new DenseVectorFieldMapper.BBQHnswIndexOptions(
123+
randomIntBetween(1, 100),
124+
randomIntBetween(1, 10_000),
125+
rescoreVector,
126+
randomBoolean()
127+
)
125128
);
126129
}
127130

@@ -666,7 +669,7 @@ public void testRescoreOversampleQueryOverrides() {
666669
3,
667670
true,
668671
VectorSimilarity.COSINE,
669-
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.1f, 9.9f, false), null)),
672+
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(randomFloatBetween(1.1f, 9.9f, false))),
670673
Collections.emptyMap(),
671674
false
672675
);
@@ -695,7 +698,7 @@ public void testRescoreOversampleQueryOverrides() {
695698
3,
696699
true,
697700
VectorSimilarity.COSINE,
698-
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(0f, null)),
701+
randomIndexOptionsHnswQuantized(new DenseVectorFieldMapper.RescoreVector(0)),
699702
Collections.emptyMap(),
700703
false
701704
);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1254,7 +1254,8 @@ static boolean indexVersionDefaultsToBbqHnsw(IndexVersion indexVersion) {
12541254
public static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorIndexOptions() {
12551255
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
12561256
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
1257-
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, null);
1257+
DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE);
1258+
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector, false);
12581259
}
12591260

12601261
static SemanticTextIndexOptions defaultIndexOptions(IndexVersion indexVersionCreated, MinimalServiceSettings modelSettings) {

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapperTests.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getChunksFieldName;
106106
import static org.elasticsearch.xpack.inference.mapper.SemanticTextField.getEmbeddingsFieldName;
107107
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_ELSER_2_INFERENCE_ID;
108+
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.DEFAULT_RESCORE_OVERSAMPLE;
108109
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldMapper.INDEX_OPTIONS_FIELD;
109110
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettings;
110111
import static org.elasticsearch.xpack.inference.mapper.SemanticTextFieldTests.generateRandomChunkingSettingsOtherThan;
@@ -1184,7 +1185,8 @@ private static SemanticTextIndexOptions defaultDenseVectorSemanticIndexOptions()
11841185
private static DenseVectorFieldMapper.DenseVectorIndexOptions defaultBbqHnswDenseVectorIndexOptions() {
11851186
int m = Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN;
11861187
int efConstruction = Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH;
1187-
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, null);
1188+
DenseVectorFieldMapper.RescoreVector rescoreVector = new DenseVectorFieldMapper.RescoreVector(DEFAULT_RESCORE_OVERSAMPLE);
1189+
return new DenseVectorFieldMapper.BBQHnswIndexOptions(m, efConstruction, rescoreVector, false);
11881190
}
11891191

11901192
private static SemanticTextIndexOptions defaultBbqHnswSemanticTextIndexOptions() {

0 commit comments

Comments
 (0)