Skip to content

Commit 38c05ea

Browse files
authored
Add base classes for generic HNSW formats (elastic#135343)
These base classes will be used for future HNSW formats that support direct IO and more flat formats in a generic way
1 parent 6b55b5d commit 38c05ea

25 files changed

+879
-340
lines changed

server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java

Lines changed: 0 additions & 146 deletions
This file was deleted.

server/src/main/java/module-info.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,8 @@
463463
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
464464
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
465465
org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat,
466-
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat;
466+
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat,
467+
org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat;
467468

468469
provides org.apache.lucene.codecs.Codec
469470
with
@@ -492,5 +493,6 @@
492493
exports org.elasticsearch.inference.telemetry;
493494
exports org.elasticsearch.index.codec.vectors.diskbbq to org.elasticsearch.test.knn;
494495
exports org.elasticsearch.index.codec.vectors.cluster to org.elasticsearch.test.knn;
496+
exports org.elasticsearch.index.codec.vectors.es93 to org.elasticsearch.test.knn;
495497
exports org.elasticsearch.search.crossproject;
496498
}

server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,11 @@
1111

1212
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
1313
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
14-
import org.elasticsearch.core.SuppressForbidden;
1514

1615
import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT;
1716

1817
public abstract class AbstractFlatVectorsFormat extends FlatVectorsFormat {
1918

20-
public static final boolean USE_DIRECT_IO = getUseDirectIO();
21-
22-
@SuppressForbidden(
23-
reason = "TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993"
24-
)
25-
private static boolean getUseDirectIO() {
26-
return Boolean.parseBoolean(System.getProperty("vector.rescoring.directio", "false"));
27-
}
28-
2919
protected AbstractFlatVectorsFormat(String name) {
3020
super(name);
3121
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors;
11+
12+
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
13+
import org.apache.lucene.index.SegmentReadState;
14+
15+
import java.io.IOException;
16+
17+
public abstract class DirectIOCapableFlatVectorsFormat extends AbstractFlatVectorsFormat {
18+
protected DirectIOCapableFlatVectorsFormat(String name) {
19+
super(name);
20+
}
21+
22+
public abstract FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException;
23+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,12 @@
1414
import org.apache.lucene.codecs.KnnVectorsWriter;
1515
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
1616
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
17-
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1817
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
1918
import org.apache.lucene.index.SegmentReadState;
2019
import org.apache.lucene.index.SegmentWriteState;
21-
import org.elasticsearch.common.util.Maps;
2220
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2321

2422
import java.io.IOException;
25-
import java.util.Collections;
2623
import java.util.Map;
2724

2825
/**
@@ -60,7 +57,7 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
6057
public static final int VERSION_START = 0;
6158
public static final int VERSION_CURRENT = VERSION_START;
6259

63-
private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(
60+
private static final Lucene99FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(
6461
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
6562
);
6663
private static final Map<String, FlatVectorsFormat> supportedFormats = Map.of(rawVectorFormat.getName(), rawVectorFormat);
@@ -122,12 +119,11 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
122119

123120
@Override
124121
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
125-
Map<String, FlatVectorsReader> readers = Maps.newHashMapWithExpectedSize(supportedFormats.size());
126-
for (var fe : supportedFormats.entrySet()) {
127-
readers.put(fe.getKey(), fe.getValue().fieldsReader(state));
128-
}
129-
130-
return new ES920DiskBBQVectorsReader(state, Collections.unmodifiableMap(readers));
122+
return new ES920DiskBBQVectorsReader(state, f -> {
123+
var format = supportedFormats.get(f);
124+
if (format == null) return null;
125+
return format.fieldsReader(state);
126+
});
131127
}
132128

133129
@Override

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import org.apache.lucene.search.KnnCollector;
1717
import org.apache.lucene.store.IndexInput;
1818
import org.apache.lucene.util.Bits;
19+
import org.apache.lucene.util.IOFunction;
1920
import org.apache.lucene.util.VectorUtil;
2021
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2122
import org.elasticsearch.index.codec.vectors.cluster.NeighborQueue;
@@ -39,8 +40,8 @@
3940
*/
4041
public class ES920DiskBBQVectorsReader extends IVFVectorsReader {
4142

42-
public ES920DiskBBQVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader> rawVectorsReader) throws IOException {
43-
super(state, rawVectorsReader);
43+
public ES920DiskBBQVectorsReader(SegmentReadState state, IOFunction<String, FlatVectorsReader> getFormatReader) throws IOException {
44+
super(state, getFormatReader);
4445
}
4546

4647
public CentroidIterator getPostingListPrefetchIterator(CentroidIterator centroidIterator, IndexInput postingListSlice)

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,15 @@
2929
import org.apache.lucene.store.IOContext;
3030
import org.apache.lucene.store.IndexInput;
3131
import org.apache.lucene.util.Bits;
32+
import org.apache.lucene.util.IOFunction;
3233
import org.elasticsearch.core.IOUtils;
3334
import org.elasticsearch.search.vectors.IVFKnnSearchStrategy;
3435

3536
import java.io.Closeable;
3637
import java.io.IOException;
3738
import java.util.ArrayList;
3839
import java.util.Collections;
40+
import java.util.HashMap;
3941
import java.util.List;
4042
import java.util.Map;
4143

@@ -54,11 +56,10 @@ public abstract class IVFVectorsReader extends KnnVectorsReader {
5456
private final Map<String, FlatVectorsReader> rawVectorReaders;
5557

5658
@SuppressWarnings("this-escape")
57-
protected IVFVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader> rawVectorReaders) throws IOException {
59+
protected IVFVectorsReader(SegmentReadState state, IOFunction<String, FlatVectorsReader> getFormatReader) throws IOException {
5860
this.state = state;
5961
this.fieldInfos = state.fieldInfos;
6062
this.fields = new IntObjectHashMap<>();
61-
this.rawVectorReaders = rawVectorReaders;
6263
String meta = IndexFileNames.segmentFileName(
6364
state.segmentInfo.name,
6465
state.segmentSuffix,
@@ -69,6 +70,7 @@ protected IVFVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader
6970
boolean success = false;
7071
try (ChecksumIndexInput ivfMeta = state.directory.openChecksumInput(meta)) {
7172
Throwable priorE = null;
73+
Map<String, FlatVectorsReader> readers = null;
7274
try {
7375
versionMeta = CodecUtil.checkIndexHeader(
7476
ivfMeta,
@@ -78,12 +80,13 @@ protected IVFVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader
7880
state.segmentInfo.getId(),
7981
state.segmentSuffix
8082
);
81-
readFields(ivfMeta);
83+
readers = readFields(ivfMeta, getFormatReader);
8284
} catch (Throwable exception) {
8385
priorE = exception;
8486
} finally {
8587
CodecUtil.checkFooter(ivfMeta, priorE);
8688
}
89+
this.rawVectorReaders = readers;
8790
ivfCentroids = openDataInput(
8891
state,
8992
versionMeta,
@@ -150,14 +153,29 @@ private static IndexInput openDataInput(
150153
}
151154
}
152155

153-
private void readFields(ChecksumIndexInput meta) throws IOException {
156+
private Map<String, FlatVectorsReader> readFields(ChecksumIndexInput meta, IOFunction<String, FlatVectorsReader> loadReader)
157+
throws IOException {
158+
Map<String, FlatVectorsReader> readers = new HashMap<>();
154159
for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
155160
final FieldInfo info = fieldInfos.fieldInfo(fieldNumber);
156161
if (info == null) {
157162
throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
158163
}
159-
fields.put(info.number, readField(meta, info));
164+
165+
FieldEntry fieldEntry = readField(meta, info);
166+
167+
FlatVectorsReader reader = readers.get(fieldEntry.rawVectorFormatName);
168+
if (reader == null) {
169+
reader = loadReader.apply(fieldEntry.rawVectorFormatName);
170+
if (reader == null) {
171+
throw new IllegalStateException("Cannot find flat vector format: " + fieldEntry.rawVectorFormatName);
172+
}
173+
readers.put(fieldEntry.rawVectorFormatName, reader);
174+
}
175+
176+
fields.put(info.number, fieldEntry);
160177
}
178+
return readers;
161179
}
162180

163181
private FieldEntry readField(IndexInput input, FieldInfo info) throws IOException {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsFormat.java

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,12 @@
1414
import org.apache.lucene.codecs.KnnVectorsWriter;
1515
import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil;
1616
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
17-
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1817
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
1918
import org.apache.lucene.index.SegmentReadState;
2019
import org.apache.lucene.index.SegmentWriteState;
21-
import org.elasticsearch.common.util.Maps;
2220
import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer;
2321

2422
import java.io.IOException;
25-
import java.util.Collections;
2623
import java.util.Map;
2724

2825
/**
@@ -122,12 +119,11 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
122119

123120
@Override
124121
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
125-
Map<String, FlatVectorsReader> readers = Maps.newHashMapWithExpectedSize(supportedFormats.size());
126-
for (var fe : supportedFormats.entrySet()) {
127-
readers.put(fe.getKey(), fe.getValue().fieldsReader(state));
128-
}
129-
130-
return new ESNextDiskBBQVectorsReader(state, Collections.unmodifiableMap(readers));
122+
return new ESNextDiskBBQVectorsReader(state, f -> {
123+
var format = supportedFormats.get(f);
124+
if (format == null) return null;
125+
return format.fieldsReader(state);
126+
});
131127
}
132128

133129
@Override

0 commit comments

Comments
 (0)