Skip to content

Commit a92fed8

Browse files
committed
Add bwc format test
1 parent 19b1d40 commit a92fed8

File tree

5 files changed

+87
-9
lines changed

5 files changed

+87
-9
lines changed

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormat.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,19 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException
128128
);
129129
}
130130

131+
// for testing
132+
KnnVectorsWriter version0FieldsWriter(SegmentWriteState state) throws IOException {
133+
return new ES920DiskBBQVectorsWriter(
134+
state,
135+
rawVectorFormat.getName(),
136+
null,
137+
rawVectorFormat.fieldsWriter(state),
138+
vectorPerCluster,
139+
centroidsPerParentCluster,
140+
VERSION_START
141+
);
142+
}
143+
131144
@Override
132145
public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
133146
return new ES920DiskBBQVectorsReader(state, (f, dio) -> {

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsWriter.java

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,27 @@ public ES920DiskBBQVectorsWriter(
5959
int vectorPerCluster,
6060
int centroidsPerParentCluster
6161
) throws IOException {
62-
super(state, rawVectorFormatName, useDirectIOReads, rawVectorDelegate);
62+
this(
63+
state,
64+
rawVectorFormatName,
65+
useDirectIOReads,
66+
rawVectorDelegate,
67+
vectorPerCluster,
68+
centroidsPerParentCluster,
69+
ES920DiskBBQVectorsFormat.VERSION_CURRENT
70+
);
71+
}
72+
73+
ES920DiskBBQVectorsWriter(
74+
SegmentWriteState state,
75+
String rawVectorFormatName,
76+
Boolean useDirectIOReads,
77+
FlatVectorsWriter rawVectorDelegate,
78+
int vectorPerCluster,
79+
int centroidsPerParentCluster,
80+
int writeVersion
81+
) throws IOException {
82+
super(state, rawVectorFormatName, useDirectIOReads, rawVectorDelegate, writeVersion);
6383
this.vectorPerCluster = vectorPerCluster;
6484
this.centroidsPerParentCluster = centroidsPerParentCluster;
6585
}

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/IVFVectorsWriter.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,16 +53,22 @@ public abstract class IVFVectorsWriter extends KnnVectorsWriter {
5353
private final IndexOutput ivfCentroids, ivfClusters;
5454
private final IndexOutput ivfMeta;
5555
private final String rawVectorFormatName;
56-
private final boolean useDirectIOReads;
56+
private final Boolean useDirectIOReads;
5757
private final FlatVectorsWriter rawVectorDelegate;
5858

5959
@SuppressWarnings("this-escape")
6060
protected IVFVectorsWriter(
6161
SegmentWriteState state,
6262
String rawVectorFormatName,
63-
boolean useDirectIOReads,
64-
FlatVectorsWriter rawVectorDelegate
63+
Boolean useDirectIOReads,
64+
FlatVectorsWriter rawVectorDelegate,
65+
int writeVersion
6566
) throws IOException {
67+
// if version >= VERSION_DIRECT_IO, useDirectIOReads should have a value
68+
if ((writeVersion >= ES920DiskBBQVectorsFormat.VERSION_DIRECT_IO) == (useDirectIOReads == null)) throw new IllegalArgumentException(
69+
"Write version " + writeVersion + " does not match direct IO value " + useDirectIOReads
70+
);
71+
6672
this.rawVectorFormatName = rawVectorFormatName;
6773
this.useDirectIOReads = useDirectIOReads;
6874
this.rawVectorDelegate = rawVectorDelegate;
@@ -88,23 +94,23 @@ protected IVFVectorsWriter(
8894
CodecUtil.writeIndexHeader(
8995
ivfMeta,
9096
ES920DiskBBQVectorsFormat.NAME,
91-
ES920DiskBBQVectorsFormat.VERSION_CURRENT,
97+
writeVersion,
9298
state.segmentInfo.getId(),
9399
state.segmentSuffix
94100
);
95101
ivfCentroids = state.directory.createOutput(ivfCentroidsFileName, state.context);
96102
CodecUtil.writeIndexHeader(
97103
ivfCentroids,
98104
ES920DiskBBQVectorsFormat.NAME,
99-
ES920DiskBBQVectorsFormat.VERSION_CURRENT,
105+
writeVersion,
100106
state.segmentInfo.getId(),
101107
state.segmentSuffix
102108
);
103109
ivfClusters = state.directory.createOutput(ivfClustersFileName, state.context);
104110
CodecUtil.writeIndexHeader(
105111
ivfClusters,
106112
ES920DiskBBQVectorsFormat.NAME,
107-
ES920DiskBBQVectorsFormat.VERSION_CURRENT,
113+
writeVersion,
108114
state.segmentInfo.getId(),
109115
state.segmentSuffix
110116
);
@@ -503,7 +509,9 @@ private void writeMeta(
503509
) throws IOException {
504510
ivfMeta.writeInt(field.number);
505511
ivfMeta.writeString(rawVectorFormatName);
506-
ivfMeta.writeByte(useDirectIOReads ? (byte) 1 : 0);
512+
if (useDirectIOReads != null) {
513+
ivfMeta.writeByte(useDirectIOReads ? (byte) 1 : 0);
514+
}
507515
ivfMeta.writeInt(field.getVectorEncoding().ordinal());
508516
ivfMeta.writeInt(distFuncToOrd(field.getVectorSimilarityFunction()));
509517
ivfMeta.writeInt(numCentroids);

server/src/main/java/org/elasticsearch/index/codec/vectors/diskbbq/next/ESNextDiskBBQVectorsWriter.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public ESNextDiskBBQVectorsWriter(
6767
int vectorPerCluster,
6868
int centroidsPerParentCluster
6969
) throws IOException {
70-
super(state, rawVectorFormatName, useDirectIOReads, rawVectorDelegate);
70+
super(state, rawVectorFormatName, useDirectIOReads, rawVectorDelegate, ESNextDiskBBQVectorsFormat.VERSION_CURRENT);
7171
this.vectorPerCluster = vectorPerCluster;
7272
this.centroidsPerParentCluster = centroidsPerParentCluster;
7373
}

server/src/test/java/org/elasticsearch/index/codec/vectors/diskbbq/ES920DiskBBQVectorsFormatTests.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,20 @@
1414
import org.apache.lucene.codecs.FilterCodec;
1515
import org.apache.lucene.codecs.KnnVectorsFormat;
1616
import org.apache.lucene.codecs.KnnVectorsReader;
17+
import org.apache.lucene.codecs.KnnVectorsWriter;
1718
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
1819
import org.apache.lucene.document.Document;
1920
import org.apache.lucene.document.KnnFloatVectorField;
2021
import org.apache.lucene.index.CodecReader;
2122
import org.apache.lucene.index.DirectoryReader;
23+
import org.apache.lucene.index.FloatVectorValues;
2224
import org.apache.lucene.index.IndexReader;
2325
import org.apache.lucene.index.IndexWriter;
26+
import org.apache.lucene.index.IndexWriterConfig;
27+
import org.apache.lucene.index.KnnVectorValues;
2428
import org.apache.lucene.index.LeafReader;
2529
import org.apache.lucene.index.LeafReaderContext;
30+
import org.apache.lucene.index.SegmentWriteState;
2631
import org.apache.lucene.index.VectorEncoding;
2732
import org.apache.lucene.index.VectorSimilarityFunction;
2833
import org.apache.lucene.search.AcceptDocs;
@@ -39,6 +44,7 @@
3944
import java.util.concurrent.atomic.AtomicBoolean;
4045

4146
import static java.lang.String.format;
47+
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
4248
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_CENTROIDS_PER_PARENT_CLUSTER;
4349
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MAX_VECTORS_PER_CLUSTER;
4450
import static org.elasticsearch.index.codec.vectors.diskbbq.ES920DiskBBQVectorsFormat.MIN_CENTROIDS_PER_PARENT_CLUSTER;
@@ -168,6 +174,37 @@ public void testSimpleOffHeapSize() throws IOException {
168174
}
169175
}
170176

177+
public void testDirectIOBackwardsCompatibleRead() throws IOException {
178+
try (Directory dir = newDirectory()) {
179+
IndexWriterConfig bwcConfig = newIndexWriterConfig();
180+
bwcConfig.setCodec(TestUtil.alwaysKnnVectorsFormat(new ES920DiskBBQVectorsFormat() {
181+
@Override
182+
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
183+
return version0FieldsWriter(state);
184+
}
185+
}));
186+
187+
try (IndexWriter w = new IndexWriter(dir, bwcConfig)) {
188+
// just testing the metadata here, don't need to do anything fancy
189+
float[] vector = randomVector(1024);
190+
Document doc = new Document();
191+
doc.add(new KnnFloatVectorField("f", vector, VectorSimilarityFunction.EUCLIDEAN));
192+
w.addDocument(doc);
193+
w.commit();
194+
195+
try (IndexReader reader = DirectoryReader.open(w)) {
196+
LeafReader r = getOnlyLeafReader(reader);
197+
FloatVectorValues vectorValues = r.getFloatVectorValues("f");
198+
KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator();
199+
assertEquals(0, iterator.nextDoc());
200+
assertArrayEquals(vector, vectorValues.vectorValue(0), 0);
201+
assertEquals(NO_MORE_DOCS, iterator.nextDoc());
202+
}
203+
}
204+
205+
}
206+
}
207+
171208
public void testFewVectorManyTimes() throws IOException {
172209
int numDifferentVectors = random().nextInt(1, 20);
173210
float[][] vectors = new float[numDifferentVectors][];

0 commit comments

Comments
 (0)