Skip to content

Commit b7f8ad3

Browse files
committed
Store the raw format name in field metadata
1 parent bd10e90 commit b7f8ad3

File tree

5 files changed

+51
-8
lines changed

5 files changed

+51
-8
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ public class ES920DiskBBQVectorsFormat extends KnnVectorsFormat {
5353
public static final String CLUSTER_EXTENSION = "clivf";
5454
static final String IVF_META_EXTENSION = "mivf";
5555

56+
static final String RAW_VECTOR_FORMAT = "raw_vector_format";
57+
5658
public static final int VERSION_START = 0;
5759
public static final int VERSION_CURRENT = VERSION_START;
5860

@@ -106,12 +108,18 @@ public ES920DiskBBQVectorsFormat() {
106108

107109
@Override
108110
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
109-
return new ES920DiskBBQVectorsWriter(state, rawVectorFormat.fieldsWriter(state), vectorPerCluster, centroidsPerParentCluster);
111+
return new ES920DiskBBQVectorsWriter(
112+
rawVectorFormat.getName(),
113+
state,
114+
rawVectorFormat.fieldsWriter(state),
115+
vectorPerCluster,
116+
centroidsPerParentCluster
117+
);
110118
}
111119

112120
@Override
113121
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
114-
return new ES920DiskBBQVectorsReader(state, rawVectorFormat.fieldsReader(state));
122+
return new ES920DiskBBQVectorsReader(state);
115123
}
116124

117125
@Override

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsReader.java

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

12+
import org.apache.lucene.codecs.KnnVectorsFormat;
1213
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
1314
import org.apache.lucene.index.FieldInfo;
1415
import org.apache.lucene.index.SegmentReadState;
@@ -25,13 +26,15 @@
2526
import org.elasticsearch.simdvec.ESVectorUtil;
2627

2728
import java.io.IOException;
29+
import java.util.HashMap;
2830
import java.util.Map;
2931

3032
import static org.apache.lucene.codecs.lucene102.Lucene102BinaryQuantizedVectorsFormat.QUERY_BITS;
3133
import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
3234
import static org.elasticsearch.index.codec.vectors.BQSpaceUtils.transposeHalfByte;
3335
import static org.elasticsearch.index.codec.vectors.BQVectorUtils.discretize;
3436
import static org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer.DEFAULT_LAMBDA;
37+
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.RAW_VECTOR_FORMAT;
3538
import static org.elasticsearch.simdvec.ES91OSQVectorsScorer.BULK_SIZE;
3639

3740
/**
@@ -40,8 +43,27 @@
4043
*/
4144
public class ES920DiskBBQVectorsReader extends IVFVectorsReader implements OffHeapStats {
4245

43-
public ES920DiskBBQVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
44-
super(state, rawVectorsReader);
46+
public ES920DiskBBQVectorsReader(SegmentReadState state) throws IOException {
47+
super(state, loadReaders(state));
48+
}
49+
50+
private static Map<String, FlatVectorsReader> loadReaders(SegmentReadState state) throws IOException {
51+
Map<String, FlatVectorsReader> readers = new HashMap<>();
52+
for (FieldInfo fi : state.fieldInfos) {
53+
if (fi.hasVectorValues()) {
54+
String formatName = fi.getAttribute(RAW_VECTOR_FORMAT);
55+
if (formatName == null) {
56+
throw new IllegalArgumentException("Field does not have " + RAW_VECTOR_FORMAT);
57+
}
58+
readers.put(
59+
fi.name,
60+
(FlatVectorsReader) KnnVectorsFormat.forName(formatName)
61+
.fieldsReader(state)
62+
);
63+
}
64+
}
65+
66+
return Map.copyOf(readers);
4567
}
4668

4769
CentroidIterator getPostingListPrefetchIterator(CentroidIterator centroidIterator, IndexInput postingListSlice) throws IOException {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.index.codec.vectors.diskbbq;
1111

12+
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
1213
import org.apache.lucene.codecs.hnsw.FlatVectorsWriter;
1314
import org.apache.lucene.index.FieldInfo;
1415
import org.apache.lucene.index.FloatVectorValues;
@@ -39,6 +40,8 @@
3940
import java.util.AbstractList;
4041
import java.util.Arrays;
4142

43+
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.RAW_VECTOR_FORMAT;
44+
4245
/**
4346
* Default implementation of {@link IVFVectorsWriter}. It uses {@link HierarchicalKMeans} algorithm to
4447
* partition the vector space, and then stores the centroids and posting list in a sequential
@@ -47,20 +50,29 @@
4750
public class ES920DiskBBQVectorsWriter extends IVFVectorsWriter {
4851
private static final Logger logger = LogManager.getLogger(ES920DiskBBQVectorsWriter.class);
4952

53+
private final String rawVectorFormatName;
5054
private final int vectorPerCluster;
5155
private final int centroidsPerParentCluster;
5256

5357
public ES920DiskBBQVectorsWriter(
58+
String rawVectorFormatName,
5459
SegmentWriteState state,
5560
FlatVectorsWriter rawVectorDelegate,
5661
int vectorPerCluster,
5762
int centroidsPerParentCluster
5863
) throws IOException {
5964
super(state, rawVectorDelegate);
65+
this.rawVectorFormatName = rawVectorFormatName;
6066
this.vectorPerCluster = vectorPerCluster;
6167
this.centroidsPerParentCluster = centroidsPerParentCluster;
6268
}
6369

70+
@Override
71+
public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
72+
fieldInfo.putAttribute(RAW_VECTOR_FORMAT, rawVectorFormatName);
73+
return super.addField(fieldInfo);
74+
}
75+
6476
@Override
6577
CentroidOffsetAndLength buildAndWritePostingsLists(
6678
FieldInfo fieldInfo,

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsReader.java

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.elasticsearch.search.vectors.IVFKnnSearchStrategy;
3434

3535
import java.io.IOException;
36+
import java.util.Map;
3637

3738
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS;
3839
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.DYNAMIC_VISIT_RATIO;
@@ -46,14 +47,14 @@ public abstract class IVFVectorsReader extends KnnVectorsReader {
4647
private final SegmentReadState state;
4748
private final FieldInfos fieldInfos;
4849
protected final IntObjectHashMap<FieldEntry> fields;
49-
private final FlatVectorsReader rawVectorsReader;
50+
private final Map<String, FlatVectorsReader> rawVectorReaders;
5051

5152
@SuppressWarnings("this-escape")
52-
protected IVFVectorsReader(SegmentReadState state, FlatVectorsReader rawVectorsReader) throws IOException {
53+
protected IVFVectorsReader(SegmentReadState state, Map<String, FlatVectorsReader> rawVectorReaders) throws IOException {
5354
this.state = state;
5455
this.fieldInfos = state.fieldInfos;
55-
this.rawVectorsReader = rawVectorsReader;
5656
this.fields = new IntObjectHashMap<>();
57+
this.rawVectorReaders = rawVectorReaders;
5758
String meta = IndexFileNames.segmentFileName(
5859
state.segmentInfo.name,
5960
state.segmentSuffix,

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ protected IVFVectorsWriter(SegmentWriteState state, FlatVectorsWriter rawVectorD
107107
}
108108

109109
@Override
110-
public final KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
110+
public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
111111
if (fieldInfo.getVectorSimilarityFunction() == VectorSimilarityFunction.COSINE) {
112112
throw new IllegalArgumentException("IVF does not support cosine similarity");
113113
}

0 commit comments

Comments
 (0)