Skip to content

Commit 207475c

Browse files
committed
Create a format for bbq_hnsw with direct IO support
1 parent 38c05ea commit 207475c

13 files changed

+435
-173
lines changed

server/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,7 @@
464464
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
465465
org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat,
466466
org.elasticsearch.index.codec.vectors.diskbbq.next.ESNextDiskBBQVectorsFormat,
467+
org.elasticsearch.index.codec.vectors.es93.ES93BinaryQuantizedVectorsFormat,
467468
org.elasticsearch.index.codec.vectors.es93.ES93HnswBinaryQuantizedVectorsFormat;
468469

469470
provides org.apache.lucene.codecs.Codec

server/src/main/java/org/elasticsearch/index/codec/vectors/DirectIOCapableFlatVectorsFormat.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,10 @@ protected DirectIOCapableFlatVectorsFormat(String name) {
1919
super(name);
2020
}
2121

22+
@Override
23+
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
24+
return fieldsReader(state, false);
25+
}
26+
2227
public abstract FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException;
2328
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93BinaryQuantizedVectorsFormat.java

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsWriter;
3333

3434
import java.io.IOException;
35+
import java.util.Map;
3536

3637
/**
3738
* Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10
@@ -86,19 +87,33 @@
8687
* <li>The sparse vector information, if required, mapping vector ordinal to doc ID
8788
* </ul>
8889
*/
89-
public class ES93BinaryQuantizedVectorsFormat extends DirectIOCapableFlatVectorsFormat {
90+
public class ES93BinaryQuantizedVectorsFormat extends ES93GenericFlatVectorsFormat {
9091

9192
public static final String NAME = "ES93BinaryQuantizedVectorsFormat";
9293

93-
private final DirectIOCapableLucene99FlatVectorsFormat rawVectorFormat;
94+
private static final DirectIOCapableFlatVectorsFormat rawVectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(
95+
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
96+
);
97+
98+
private static final Map<String, DirectIOCapableFlatVectorsFormat> supportedFormats = Map.of(
99+
rawVectorFormat.getName(),
100+
rawVectorFormat
101+
);
94102

95103
private static final ES818BinaryFlatVectorsScorer scorer = new ES818BinaryFlatVectorsScorer(
96104
FlatVectorScorerUtil.getLucene99FlatVectorsScorer()
97105
);
98106

107+
private final boolean useDirectIO;
108+
99109
public ES93BinaryQuantizedVectorsFormat() {
100110
super(NAME);
101-
rawVectorFormat = new DirectIOCapableLucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer());
111+
this.useDirectIO = false;
112+
}
113+
114+
public ES93BinaryQuantizedVectorsFormat(boolean useDirectIO) {
115+
super(NAME);
116+
this.useDirectIO = useDirectIO;
102117
}
103118

104119
@Override
@@ -107,17 +122,27 @@ protected FlatVectorsScorer flatVectorsScorer() {
107122
}
108123

109124
@Override
110-
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
111-
return new ES818BinaryQuantizedVectorsWriter(scorer, rawVectorFormat.fieldsWriter(state), state);
125+
protected boolean useDirectIOReads() {
126+
return useDirectIO;
112127
}
113128

114129
@Override
115-
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
116-
return new ES818BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), scorer);
130+
protected DirectIOCapableFlatVectorsFormat writeFlatVectorsFormat() {
131+
return rawVectorFormat;
117132
}
118133

119134
@Override
120-
public FlatVectorsReader fieldsReader(SegmentReadState state, boolean useDirectIO) throws IOException {
121-
return new ES818BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state, useDirectIO), scorer);
135+
protected Map<String, DirectIOCapableFlatVectorsFormat> supportedReadFlatVectorsFormats() {
136+
return supportedFormats;
137+
}
138+
139+
@Override
140+
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
141+
return new ES818BinaryQuantizedVectorsWriter(scorer, super.fieldsWriter(state), state);
142+
}
143+
144+
@Override
145+
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
146+
return new ES818BinaryQuantizedVectorsReader(state, super.fieldsReader(state), scorer);
122147
}
123148
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.index.codec.vectors.es93;
11+
12+
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
13+
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
14+
import org.apache.lucene.index.SegmentReadState;
15+
import org.apache.lucene.index.SegmentWriteState;
16+
import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat;
17+
import org.elasticsearch.index.codec.vectors.DirectIOCapableFlatVectorsFormat;
18+
19+
import java.io.IOException;
20+
import java.util.Map;
21+
22+
public abstract class ES93GenericFlatVectorsFormat extends AbstractFlatVectorsFormat {
23+
24+
static final String VECTOR_FORMAT_INFO_EXTENSION = "vfi";
25+
static final String META_CODEC_NAME = "ES93GenericFlatVectorsFormatMeta";
26+
27+
public static final int VERSION_START = 0;
28+
public static final int VERSION_CURRENT = VERSION_START;
29+
30+
private static final GenericFormatMetaInformation META = new GenericFormatMetaInformation(
31+
VECTOR_FORMAT_INFO_EXTENSION,
32+
META_CODEC_NAME,
33+
VERSION_START,
34+
VERSION_CURRENT
35+
);
36+
37+
public ES93GenericFlatVectorsFormat(String name) {
38+
super(name);
39+
}
40+
41+
protected abstract DirectIOCapableFlatVectorsFormat writeFlatVectorsFormat();
42+
43+
protected abstract boolean useDirectIOReads();
44+
45+
protected abstract Map<String, DirectIOCapableFlatVectorsFormat> supportedReadFlatVectorsFormats();
46+
47+
@Override
48+
public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
49+
var flatFormat = writeFlatVectorsFormat();
50+
boolean directIO = useDirectIOReads();
51+
return new ES93GenericFlatVectorsWriter(META, flatFormat.getName(), directIO, state, flatFormat.fieldsWriter(state));
52+
}
53+
54+
@Override
55+
public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException {
56+
var readFormats = supportedReadFlatVectorsFormats();
57+
return new ES93GenericFlatVectorsReader(META, state, (f, dio) -> {
58+
var format = readFormats.get(f);
59+
if (format == null) return null;
60+
return format.fieldsReader(state, dio);
61+
});
62+
}
63+
64+
@Override
65+
public String toString() {
66+
return getName()
67+
+ "(name="
68+
+ getName()
69+
+ ", writeFlatVectorFormat="
70+
+ writeFlatVectorsFormat()
71+
+ ", readFlatVectorsFormats="
72+
+ supportedReadFlatVectorsFormats().values()
73+
+ ")";
74+
}
75+
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericFlatVectorsReader.java

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,6 @@
2525
import java.io.IOException;
2626
import java.util.Map;
2727

28-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.META_CODEC_NAME;
29-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.VECTOR_FORMAT_INFO_EXTENSION;
30-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.VERSION_CURRENT;
31-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.VERSION_START;
32-
3328
class ES93GenericFlatVectorsReader extends FlatVectorsReader {
3429

3530
private final FlatVectorsReader vectorsReader;
@@ -39,24 +34,21 @@ interface GetFormatReader {
3934
FlatVectorsReader getReader(String formatName, boolean useDirectIO) throws IOException;
4035
}
4136

42-
ES93GenericFlatVectorsReader(SegmentReadState state, GetFormatReader getFormatReader) throws IOException {
43-
super(null); // Hacks ahoy!
37+
ES93GenericFlatVectorsReader(GenericFormatMetaInformation metaInfo, SegmentReadState state, GetFormatReader getFormatReader)
38+
throws IOException {
39+
super(null); // we can set this properly with flexible constructor bodies
4440
// read in the meta information
45-
final String metaFileName = IndexFileNames.segmentFileName(
46-
state.segmentInfo.name,
47-
state.segmentSuffix,
48-
VECTOR_FORMAT_INFO_EXTENSION
49-
);
41+
final String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaInfo.extension());
5042
int versionMeta = -1;
5143
FlatVectorsReader reader = null;
5244
try (var metaIn = state.directory.openChecksumInput(metaFileName)) {
5345
Throwable priorE = null;
5446
try {
5547
versionMeta = CodecUtil.checkIndexHeader(
5648
metaIn,
57-
META_CODEC_NAME,
58-
VERSION_START,
59-
VERSION_CURRENT,
49+
metaInfo.codecName(),
50+
metaInfo.versionStart(),
51+
metaInfo.versionCurrent(),
6052
state.segmentInfo.getId(),
6153
state.segmentSuffix
6254
);

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericFlatVectorsWriter.java

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,31 @@
2323

2424
import java.io.IOException;
2525

26-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.META_CODEC_NAME;
27-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.VECTOR_FORMAT_INFO_EXTENSION;
28-
import static org.elasticsearch.index.codec.vectors.es93.ES93GenericHnswVectorsFormat.VERSION_CURRENT;
29-
3026
class ES93GenericFlatVectorsWriter extends FlatVectorsWriter {
3127

3228
private final IndexOutput metaOut;
3329
private final FlatVectorsWriter rawVectorWriter;
3430

3531
@SuppressWarnings("this-escape")
36-
ES93GenericFlatVectorsWriter(String knnFormatName, boolean useDirectIOReads, SegmentWriteState state, FlatVectorsWriter rawWriter)
37-
throws IOException {
32+
ES93GenericFlatVectorsWriter(
33+
GenericFormatMetaInformation metaInfo,
34+
String knnFormatName,
35+
boolean useDirectIOReads,
36+
SegmentWriteState state,
37+
FlatVectorsWriter rawWriter
38+
) throws IOException {
3839
super(rawWriter.getFlatVectorScorer());
3940
this.rawVectorWriter = rawWriter;
40-
final String metaFileName = IndexFileNames.segmentFileName(
41-
state.segmentInfo.name,
42-
state.segmentSuffix,
43-
VECTOR_FORMAT_INFO_EXTENSION
44-
);
41+
final String metaFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaInfo.extension());
4542
try {
4643
this.metaOut = state.directory.createOutput(metaFileName, state.context);
47-
CodecUtil.writeIndexHeader(metaOut, META_CODEC_NAME, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix);
44+
CodecUtil.writeIndexHeader(
45+
metaOut,
46+
metaInfo.codecName(),
47+
metaInfo.versionCurrent(),
48+
state.segmentInfo.getId(),
49+
state.segmentSuffix
50+
);
4851
// write the format name used for this segment
4952
metaOut.writeString(knnFormatName);
5053
metaOut.writeByte(useDirectIOReads ? (byte) 1 : 0);

server/src/main/java/org/elasticsearch/index/codec/vectors/es93/ES93GenericHnswVectorsFormat.java

Lines changed: 0 additions & 114 deletions
This file was deleted.

0 commit comments

Comments
 (0)