Skip to content

Commit bf48533

Browse files
committed
Enable direct IO for disk BBQ
1 parent eb8f9cd commit bf48533

File tree

7 files changed

+53
-20
lines changed

7 files changed

+53
-20
lines changed

server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
7474

7575
private String indexVectors(boolean directIO) {
7676
String indexName = "test-vectors-" + directIO;
77-
String type = randomFrom("bbq_hnsw"/*, "bbq_disk"*/);
77+
String type = randomFrom("bbq_hnsw", "bbq_disk");
7878
assertAcked(
7979
prepareCreate(indexName).setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false))
8080
.setMapping(Strings.format("""

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
import org.apache.lucene.codecs.KnnVectorsWriter;
1515
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
1616
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
17-
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
1817
import org.apache.lucene.index.SegmentReadState;
1918
import org.apache.lucene.index.SegmentWriteState;
19+
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
2020
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
21+
import org.elasticsearch.index.codec.vectors.es93.DirectIOCapableLucene99FlatVectorsFormat;
2122

2223
import java.io.IOException;
2324
import java.util.Map;
@@ -55,9 +56,10 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
5556
static final String IVF_META_EXTENSION = "mivf";
5657

5758
public static final int VERSION_START = 0;
58-
public static final int VERSION_CURRENT = VERSION_START;
59+
public static final int VERSION_DIRECT_IO = 1;
60+
public static final int VERSION_CURRENT = VERSION_DIRECT_IO;
5961

60-
private static final Lucene99FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(
62+
private static final FlatVectorsFormat rawVectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(
6163
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
6264
);
6365
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(rawVectorFormat.getName(), rawVectorFormat);
@@ -74,8 +76,13 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
7476

7577
private final int vectorPerCluster;
7678
private final int centroidsPerParentCluster;
79+
private final boolean useDirectIO;
7780

7881
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster) {
82+
this(vectorPerCluster, centroidsPerParentCluster, false);
83+
}
84+
85+
public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentCluster, boolean useDirectIO) {
7986
super(NAME);
8087
if (vectorPerCluster < MIN_VECTORS_PER_CLUSTER || vectorPerCluster > MAX_VECTORS_PER_CLUSTER) {
8188
throw new IllegalArgumentException(
@@ -99,6 +106,7 @@ public ES920DiskBBQVectorsFormat(int vectorPerCluster, int centroidsPerParentClu
99106
}
100107
this.vectorPerCluster = vectorPerCluster;
101108
this.centroidsPerParentCluster = centroidsPerParentCluster;
109+
this.useDirectIO = useDirectIO;
102110
}
103111

104112
/** Constructs a format using the given graph construction parameters and scalar quantization. */
@@ -110,6 +118,7 @@ public ES920DiskBBQVectorsFormat() {
110118
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
111119
return new ES920DiskBBQVectorsWriter(
112120
rawVectorFormat.getName(),
121+
useDirectIO,
113122
state,
114123
rawVectorFormat.fieldsWriter(state),
115124
vectorPerCluster,
@@ -119,10 +128,16 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
119128

120129
@Override
121130
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
122-
return new ES920DiskBBQVectorsReader(state, f -> {
131+
return new ES920DiskBBQVectorsReader(state, (f, dio) -> {
123132
var format = supportedFormats.get(f);
124133
if (format == null) return null;
125-
return format.fieldsReader(state);
134+
135+
if (format instanceof DirectIOCapableFlatVectorsFormat diof) {
136+
return diof.fieldsReader(state, dio);
137+
} else {
138+
assert dio == false : format + " is not DirectIO capable";
139+
return format.fieldsReader(state);
140+
}
126141
});
127142
}
128143

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,12 @@
99

1010
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

12-
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1312
import org.apache.lucene.index.FieldInfo;
1413
import org.apache.lucene.index.SegmentReadState;
1514
import org.apache.lucene.index.VectorSimilarityFunction;
1615
import org.apache.lucene.search.KnnCollector;
1716
import org.apache.lucene.store.IndexInput;
1817
import org.apache.lucene.util.Bits;
19-
import org.apache.lucene.util.IOFunction;
2018
import org.apache.lucene.util.VectorUtil;
2119
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2220
import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue;
@@ -40,7 +38,7 @@
4038
*/
4139
public class ES920DiskBBQVectorsReader extends IVFVectorsReader {
4240

43-
public ES920DiskBBQVectorsReader(SegmentReadState state, IOFunction<String, FlatVectorsReader> getFormatReader) throws IOException {
41+
ES920DiskBBQVectorsReader(SegmentReadState state, GetFormatReader getFormatReader) throws IOException {
4442
super(state, getFormatReader);
4543
}
4644

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,13 @@ public class ES920DiskBBQVectorsWriter extends IVFVectorsWriter {
5353

5454
public ES920DiskBBQVectorsWriter(
5555
String rawVectorFormatName,
56+
boolean useDirectIOReads,
5657
SegmentWriteState state,
5758
FlatVectorsWriter rawVectorDelegate,
5859
int vectorPerCluster,
5960
int centroidsPerParentCluster
6061
) throws IOException {
61-
super(state, rawVectorFormatName, rawVectorDelegate);
62+
super(state, rawVectorFormatName, useDirectIOReads, rawVectorDelegate);
6263
this.vectorPerCluster = vectorPerCluster;
6364
this.centroidsPerParentCluster = centroidsPerParentCluster;
6465
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import org.apache.lucene.store.IOContext;
3030
import org.apache.lucene.store.IndexInput;
3131
import org.apache.lucene.util.Bits;
32-
import org.apache.lucene.util.IOFunction;
3332
import org.elasticsearch.core.IOUtils;
3433
import org.elasticsearch.search.vectors.IVFKnnSearchStrategy;
3534

@@ -43,6 +42,7 @@
4342

4443
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
4544
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;
45+
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.VERSION_DIRECT_IO;
4646

4747
/**
4848
* Reader for IVF vectors. This reader is used to read the IVF vectors from the index.
@@ -55,8 +55,13 @@ public abstract class IVFVectorsReader extends KnnVectorsReader {
5555
protected final IntObjectHashMap<FieldEntry> fields;
5656
private final Map<String, FlatVectorsReader> rawVectorReaders;
5757

58+
@FunctionalInterface
59+
protected interface GetFormatReader {
60+
FlatVectorsReader getReader(String formatName, boolean useDirectIO) throws IOException;
61+
}
62+
5863
@SuppressWarnings("this-escape")
59-
protected IVFVectorsReader(SegmentReadState state, IOFunction<String, FlatVectorsReader> getFormatReader) throws IOException {
64+
protected IVFVectorsReader(SegmentReadState state, GetFormatReader getFormatReader) throws IOException {
6065
this.state = state;
6166
this.fieldInfos = state.fieldInfos;
6267
this.fields = new IntObjectHashMap<>();
@@ -80,7 +85,7 @@ protected IVFVectorsReader(SegmentReadState state, IOFunction<String, FlatVector
8085
state.segmentInfo.getId(),
8186
state.segmentSuffix
8287
);
83-
readers = readFields(ivfMeta, getFormatReader);
88+
readers = readFields(ivfMeta, getFormatReader, versionMeta);
8489
} catch (Throwable exception) {
8590
priorE = exception;
8691
} finally {
@@ -153,7 +158,7 @@ private static IndexInput openDataInput(
153158
}
154159
}
155160

156-
private Map<String, FlatVectorsReader> readFields(ChecksumIndexInput meta, IOFunction<String, FlatVectorsReader> loadReader)
161+
private Map<String, FlatVectorsReader> readFields(ChecksumIndexInput meta, GetFormatReader loadReader, int versionMeta)
157162
throws IOException {
158163
Map<String, FlatVectorsReader> readers = new HashMap<>();
159164
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
@@ -162,11 +167,11 @@ private Map<String, FlatVectorsReader> readFields(ChecksumIndexInput meta, IOFun
162167
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
163168
}
164169

165-
FieldEntry fieldEntry = readField(meta, info);
170+
FieldEntry fieldEntry = readField(meta, info, versionMeta);
166171

167172
FlatVectorsReader reader = readers.get(fieldEntry.rawVectorFormatName);
168173
if (reader == null) {
169-
reader = loadReader.apply(fieldEntry.rawVectorFormatName);
174+
reader = loadReader.getReader(fieldEntry.rawVectorFormatName, fieldEntry.useDirectIOReads);
170175
if (reader == null) {
171176
throw new IllegalStateException("Cannot find flat vector format: " + fieldEntry.rawVectorFormatName);
172177
}
@@ -178,8 +183,9 @@ private Map<String, FlatVectorsReader> readFields(ChecksumIndexInput meta, IOFun
178183
return readers;
179184
}
180185

181-
private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException {
186+
private FieldEntry readField(IndexInput input, FieldInfo info, int versionMeta) throws IOException {
182187
final String rawVectorFormat = input.readString();
188+
final boolean useDirectIOReads = versionMeta >= VERSION_DIRECT_IO && input.readByte() == 1;
183189
final VectorEncoding vectorEncoding = readVectorEncoding(input);
184190
final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input);
185191
if (similarityFunction != info.getVectorSimilarityFunction()) {
@@ -207,6 +213,7 @@ private FieldEntry readField(IndexInput input, FieldInfo info) throws IOExceptio
207213
}
208214
return new FieldEntry(
209215
rawVectorFormat,
216+
useDirectIOReads,
210217
similarityFunction,
211218
vectorEncoding,
212219
numCentroids,
@@ -387,6 +394,7 @@ public void close() throws IOException {
387394

388395
protected record FieldEntry(
389396
String rawVectorFormatName,
397+
boolean useDirectIOReads,
390398
VectorSimilarityFunction similarityFunction,
391399
VectorEncoding vectorEncoding,
392400
int numCentroids,

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,12 +53,18 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
5353
private final IndexOutput ivfCentroids, ivfClusters;
5454
private final IndexOutput ivfMeta;
5555
private final String rawVectorFormatName;
56+
private final boolean useDirectIOReads;
5657
private final FlatVectorsWriter rawVectorDelegate;
5758

5859
@SuppressWarnings("this-escape")
59-
protected IVFVectorsWriter(SegmentWriteState state, String rawVectorFormatName, FlatVectorsWriter rawVectorDelegate)
60-
throws IOException {
60+
protected IVFVectorsWriter(
61+
SegmentWriteState state,
62+
String rawVectorFormatName,
63+
boolean useDirectIOReads,
64+
FlatVectorsWriter rawVectorDelegate
65+
) throws IOException {
6166
this.rawVectorFormatName = rawVectorFormatName;
67+
this.useDirectIOReads = useDirectIOReads;
6268
this.rawVectorDelegate = rawVectorDelegate;
6369
final String metaFileName = IndexFileNames.segmentFileName(
6470
state.segmentInfo.name,
@@ -497,6 +503,7 @@ private void writeMeta(
497503
) throws IOException {
498504
ivfMeta.writeInt(field.number);
499505
ivfMeta.writeString(rawVectorFormatName);
506+
ivfMeta.writeByte(useDirectIOReads ? (byte) 1 : 0);
500507
ivfMeta.writeInt(field.getVectorEncoding().ordinal());
501508
ivfMeta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
502509
ivfMeta.writeInt(numCentroids);

server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2174,7 +2174,11 @@ static class BBQIVFIndexOptions extends QuantizedIndexOptions {
21742174
@Override
21752175
KnnVectorsFormat getVectorsFormat(ElementType elementType) {
21762176
assert elementType == ElementType.FLOAT;
2177-
return new ES920DiskBBQVectorsFormat(clusterSize, ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
2177+
return new ES920DiskBBQVectorsFormat(
2178+
clusterSize,
2179+
ES920DiskBBQVectorsFormat.DEFAULT_CENTROIDS_PER_PARENT_CLUSTER,
2180+
onDiskRescore
2181+
);
21782182
}
21792183

21802184
@Override

0 commit comments

Comments
 (0)