Skip to content

Commit 40811eb

Browse files
committed
Use a boolean fields for directIO or not
1 parent 46b1486 commit 40811eb

File tree

10 files changed

+105
-108
lines changed

10 files changed

+105
-108
lines changed

server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ protected Collection<Class<? extends Plugin>> nodePlugins() {
7474

7575
private String indexVectors(boolean directIO) {
7676
String indexName = "test-vectors-" + directIO;
77-
String type = randomFrom("bbq_hnsw", "bbq_disk");
77+
String type = randomFrom("bbq_hnsw"/*, "bbq_disk"*/);
7878
assertAcked(
7979
prepareCreate(indexName).setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false))
8080
.setMapping(Strings.format("""
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors;
11+
12+
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
13+
import org.apache.lucene.index.SegmentReadState;
14+
15+
import java.io.IOException;
16+
17+
public interface DirectIOCapableFlatVectorsFormat {
18+
FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException;
19+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 4 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,9 @@
1818
import org.apache.lucene.index.SegmentReadState;
1919
import org.apache.lucene.index.SegmentWriteState;
2020
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
21-
import org.elasticsearch.index.codec.vectors.es93.DirectIOLucene99FlatVectorsFormat;
2221

2322
import java.io.IOException;
2423
import java.util.Map;
25-
import java.util.concurrent.atomic.AtomicReference;
2624

2725
/**
2826
* Codec format for Inverted File Vector indexes. This index expects to break the dimensional space
@@ -59,18 +57,10 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
5957
public static final int VERSION_START = 0;
6058
public static final int VERSION_CURRENT = VERSION_START;
6159

62-
private static final FlatVectorsFormat defaultRawVectorFormat = new Lucene99FlatVectorsFormat(
60+
private static final Lucene99FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(
6361
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
6462
);
65-
private static final FlatVectorsFormat directIORawVectorFormat = new DirectIOLucene99FlatVectorsFormat(
66-
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
67-
);
68-
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(
69-
defaultRawVectorFormat.getName(),
70-
defaultRawVectorFormat,
71-
directIORawVectorFormat.getName(),
72-
directIORawVectorFormat
73-
);
63+
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(rawVectorFormat.getName(), rawVectorFormat);
7464

7565
// This dynamically sets the cluster probe based on the `k` requested and the number of clusters.
7666
// useful when searching with 'efSearch' type parameters instead of requiring a specific ratio.
@@ -116,26 +106,12 @@ public ES920DiskBBQVectorsFormat() {
116106
this(DEFAULT_VECTORS_PER_CLUSTER, DEFAULT_CENTROIDS_PER_PARENT_CLUSTER);
117107
}
118108

119-
private final AtomicReference<FlatVectorsFormat> flatVectorsFormat = new AtomicReference<>();
120-
121-
public void useDirectIO() {
122-
if (flatVectorsFormat.compareAndSet(null, directIORawVectorFormat) == false) {
123-
throw new IllegalStateException("Flat vector format has already been set");
124-
}
125-
}
126-
127109
@Override
128110
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
129-
var rawFormat = flatVectorsFormat.get();
130-
if (rawFormat == null) {
131-
rawFormat = defaultRawVectorFormat;
132-
flatVectorsFormat.set(rawFormat);
133-
}
134-
135111
return new ES920DiskBBQVectorsWriter(
136-
rawFormat.getName(),
112+
rawVectorFormat.getName(),
137113
state,
138-
rawFormat.fieldsWriter(state),
114+
rawVectorFormat.fieldsWriter(state),
139115
vectorPerCluster,
140116
centroidsPerParentCluster
141117
);
Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,10 @@
11
/*
2-
* @notice
3-
* Licensed to the Apache Software Foundation (ASF) under one or more
4-
* contributor license agreements. See the NOTICE file distributed with
5-
* this work for additional information regarding copyright ownership.
6-
* The ASF licenses this file to You under the Apache License, Version 2.0
7-
* (the "License"); you may not use this file except in compliance with
8-
* the License. You may obtain a copy of the License at
9-
*
10-
* http://www.apache.org/licenses/LICENSE-2.0
11-
*
12-
* Unless required by applicable law or agreed to in writing, software
13-
* distributed under the License is distributed on an "AS IS" BASIS,
14-
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15-
* See the License for the specific language governing permissions and
16-
* limitations under the License.
17-
*
18-
* Modifications copyright (C) 2024 Elasticsearch B.V.
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
198
*/
209
package org.elasticsearch.index.codec.vectors.es93;
2110

@@ -31,29 +20,22 @@
3120
import org.apache.lucene.store.MergeInfo;
3221
import org.elasticsearch.common.util.set.Sets;
3322
import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat;
23+
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
3424
import org.elasticsearch.index.codec.vectors.MergeReaderWrapper;
3525
import org.elasticsearch.index.codec.vectors.es818.DirectIOHint;
3626
import org.elasticsearch.index.store.FsDirectoryFactory;
3727

3828
import java.io.IOException;
3929
import java.util.Set;
4030

41-
/**
42-
* Copied from Lucene99FlatVectorsFormat.
43-
*
44-
* This is copied to change the implementation of {@link #fieldsReader} only.
45-
*/
46-
public class DirectIOLucene99FlatVectorsFormat extends AbstractFlatVectorsFormat {
47-
48-
static final String NAME = "DirectIOLucene99FlatVectorsFormat";
31+
public class DirectIOCapableLucene99FlatVectorsFormat extends AbstractFlatVectorsFormat implements DirectIOCapableFlatVectorsFormat {
4932

50-
public static final int VERSION_START = 0;
51-
public static final int VERSION_CURRENT = VERSION_START;
33+
static final String NAME = "Lucene99FlatVectorsFormat";
5234

5335
private final FlatVectorsScorer vectorsScorer;
5436

5537
/** Constructs a format */
56-
public DirectIOLucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) {
38+
public DirectIOCapableLucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) {
5739
super(NAME);
5840
this.vectorsScorer = vectorsScorer;
5941
}
@@ -68,13 +50,18 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio
6850
return new Lucene99FlatVectorsWriter(state, vectorsScorer);
6951
}
7052

71-
static boolean shouldUseDirectIO(SegmentReadState state) {
53+
static boolean canUseDirectIO(SegmentReadState state) {
7254
return FsDirectoryFactory.isHybridFs(state.directory);
7355
}
7456

7557
@Override
7658
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
77-
if (shouldUseDirectIO(state) && state.context.context() == IOContext.Context.DEFAULT) {
59+
return fieldsReader(state, false);
60+
}
61+
62+
@Override
63+
public FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException {
64+
if (state.context.context() == IOContext.Context.DEFAULT && useDirectIO && canUseDirectIO(state)) {
7865
// only override the context for the random-access use case
7966
SegmentReadState directIOState = new SegmentReadState(
8067
state.directory,
@@ -84,7 +71,6 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException
8471
state.segmentSuffix
8572
);
8673
// Use mmap for merges and direct I/O for searches.
87-
// TODO: Open the mmap file with sequential access instead of random (current behavior).
8874
return new MergeReaderWrapper(
8975
new Lucene99FlatVectorsReader(directIOState, vectorsScorer),
9076
new Lucene99FlatVectorsReader(state, vectorsScorer)

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93BinaryQuantizedVectorsFormat.java

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,13 @@
2020
package org.elasticsearch.index.codec.vectors.es93;
2121

2222
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
23-
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
2423
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
2524
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
2625
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
27-
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
2826
import org.apache.lucene.index.SegmentReadState;
2927
import org.apache.lucene.index.SegmentWriteState;
3028
import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat;
29+
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
3130
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
3231
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryFlatVectorsScorer;
3332
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsReader;
@@ -88,21 +87,19 @@
8887
* <li>The sparse vector information, if required, mapping vector ordinal to doc ID
8988
* </ul>
9089
*/
91-
public class ES93BinaryQuantizedVectorsFormat extends AbstractFlatVectorsFormat {
90+
public class ES93BinaryQuantizedVectorsFormat extends AbstractFlatVectorsFormat implements DirectIOCapableFlatVectorsFormat {
9291

9392
public static final String NAME = "ES93BinaryQuantizedVectorsFormat";
9493

95-
private final FlatVectorsFormat rawVectorFormat;
94+
private final DirectIOCapableLucene99FlatVectorsFormat rawVectorFormat;
9695

9796
private static final ES818BinaryFlatVectorsScorer scorer = new ES818BinaryFlatVectorsScorer(
9897
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
9998
);
10099

101-
public ES93BinaryQuantizedVectorsFormat(boolean useDirectIO) {
102-
super(useDirectIO ? "DirectIO" + NAME : NAME);
103-
rawVectorFormat = useDirectIO
104-
? new DirectIOLucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer())
105-
: new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
100+
public ES93BinaryQuantizedVectorsFormat() {
101+
super(NAME);
102+
rawVectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
106103
}
107104

108105
@Override
@@ -119,4 +116,9 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio
119116
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
120117
return new ES818BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), scorer);
121118
}
119+
120+
@Override
121+
public FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException {
122+
return new ES818BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state, useDirectIO), scorer);
123+
}
122124
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericFlatVectorsReader.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import org.apache.lucene.index.SegmentReadState;
2020
import org.apache.lucene.search.AcceptDocs;
2121
import org.apache.lucene.search.KnnCollector;
22-
import org.apache.lucene.util.IOFunction;
2322
import org.apache.lucene.util.hnsw.RandomVectorScorer;
2423
import org.elasticsearch.core.IOUtils;
2524

@@ -35,7 +34,12 @@ class ES93GenericFlatVectorsReader extends FlatVectorsReader {
3534

3635
private final FlatVectorsReader vectorsReader;
3736

38-
ES93GenericFlatVectorsReader(SegmentReadState state, IOFunction<String, FlatVectorsReader> getFormatReader) throws IOException {
37+
@FunctionalInterface
38+
interface GetFormatReader {
39+
FlatVectorsReader getReader(String formatName, boolean useDirectIO) throws IOException;
40+
}
41+
42+
ES93GenericFlatVectorsReader(SegmentReadState state, GetFormatReader getFormatReader) throws IOException {
3943
super(null); // Hacks ahoy!
4044
// read in the meta information
4145
final String metaFileName = IndexFileNames.segmentFileName(
@@ -57,9 +61,12 @@ class ES93GenericFlatVectorsReader extends FlatVectorsReader {
5761
state.segmentSuffix
5862
);
5963
String innerFormatName = metaIn.readString();
60-
reader = getFormatReader.apply(innerFormatName);
64+
byte useDirectIO = metaIn.readByte();
65+
reader = getFormatReader.getReader(innerFormatName, useDirectIO == 1);
6166
if (reader == null) {
62-
throw new IllegalStateException("Cannot find knn vector format: " + innerFormatName);
67+
throw new IllegalStateException(
68+
"Cannot find knn vector format [" + innerFormatName + "]" + (useDirectIO == 1 ? " with directIO" : "")
69+
);
6370
}
6471
} catch (Throwable exception) {
6572
priorE = exception;

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericFlatVectorsWriter.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ class ES93GenericFlatVectorsWriter extends FlatVectorsWriter {
3333
private final FlatVectorsWriter rawVectorWriter;
3434

3535
@SuppressWarnings("this-escape")
36-
ES93GenericFlatVectorsWriter(String knnFormatName, SegmentWriteState state, FlatVectorsWriter rawWriter) throws IOException {
36+
ES93GenericFlatVectorsWriter(String knnFormatName, boolean useDirectIOReads, SegmentWriteState state, FlatVectorsWriter rawWriter)
37+
throws IOException {
3738
super(rawWriter.getFlatVectorScorer());
3839
this.rawVectorWriter = rawWriter;
3940
final String metaFileName = IndexFileNames.segmentFileName(
@@ -46,6 +47,7 @@ class ES93GenericFlatVectorsWriter extends FlatVectorsWriter {
4647
CodecUtil.writeIndexHeader(metaOut, META_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
4748
// write the format name used for this segment
4849
metaOut.writeString(knnFormatName);
50+
metaOut.writeByte(useDirectIOReads ? (byte) 1 : 0);
4951
} catch (Throwable t) {
5052
IOUtils.closeWhileHandlingException(this);
5153
throw t;

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericHnswVectorsFormat.java

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.apache.lucene.index.SegmentReadState;
2929
import org.apache.lucene.index.SegmentWriteState;
3030
import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat;
31+
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
3132

3233
import java.io.IOException;
3334
import java.util.Map;
@@ -61,16 +62,19 @@ protected final FlatVectorsFormat flatVectorsFormat() {
6162

6263
protected abstract FlatVectorsFormat writeFlatVectorsFormat();
6364

65+
protected abstract boolean useDirectIOReads();
66+
6467
protected abstract Map<String, FlatVectorsFormat> supportedReadFlatVectorsFormats();
6568

6669
@Override
6770
public final KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
6871
var flatFormat = writeFlatVectorsFormat();
72+
boolean directIO = useDirectIOReads();
6973
return new Lucene99HnswVectorsWriter(
7074
state,
7175
maxConn,
7276
beamWidth,
73-
new ES93GenericFlatVectorsWriter(flatFormat.getName(), state, flatFormat.fieldsWriter(state)),
77+
new ES93GenericFlatVectorsWriter(flatFormat.getName(), directIO, state, flatFormat.fieldsWriter(state)),
7478
numMergeWorkers,
7579
mergeExec
7680
);
@@ -79,10 +83,16 @@ public final KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExc
7983
@Override
8084
public final KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
8185
var readFormats = supportedReadFlatVectorsFormats();
82-
return new Lucene99HnswVectorsReader(state, new ES93GenericFlatVectorsReader(state, f -> {
86+
return new Lucene99HnswVectorsReader(state, new ES93GenericFlatVectorsReader(state, (f, dio) -> {
8387
var format = readFormats.get(f);
8488
if (format == null) return null;
85-
return format.fieldsReader(state);
89+
90+
if (format instanceof DirectIOCapableFlatVectorsFormat diof) {
91+
return diof.fieldsReader(state, dio);
92+
} else {
93+
assert dio == false : format + " is not DirectIO capable";
94+
return format.fieldsReader(state);
95+
}
8696
}));
8797
}
8898

0 commit comments

Comments
 (0)