Skip to content

Commit b008aea

Browse files
committed
Add to KnnIndexTester
1 parent f105dc8 commit b008aea

File tree

8 files changed

+97
-29
lines changed

8 files changed

+97
-29
lines changed

qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ record CmdLineArgs(
4949
float filterSelectivity,
5050
long seed,
5151
VectorSimilarityFunction vectorSpace,
52+
int rawVectorSize,
5253
int quantizeBits,
5354
VectorEncoding vectorEncoding,
5455
int dimensions,
@@ -75,6 +76,7 @@ record CmdLineArgs(
7576
static final ParseField FORCE_MERGE_FIELD = new ParseField("force_merge");
7677
static final ParseField VECTOR_SPACE_FIELD = new ParseField("vector_space");
7778
static final ParseField QUANTIZE_BITS_FIELD = new ParseField("quantize_bits");
79+
static final ParseField RAW_VECTOR_SIZE_FIELD = new ParseField("raw_vector_size");
7880
static final ParseField VECTOR_ENCODING_FIELD = new ParseField("vector_encoding");
7981
static final ParseField DIMENSIONS_FIELD = new ParseField("dimensions");
8082
static final ParseField EARLY_TERMINATION_FIELD = new ParseField("early_termination");
@@ -108,6 +110,7 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException {
108110
PARSER.declareBoolean(Builder::setReindex, REINDEX_FIELD);
109111
PARSER.declareBoolean(Builder::setForceMerge, FORCE_MERGE_FIELD);
110112
PARSER.declareString(Builder::setVectorSpace, VECTOR_SPACE_FIELD);
113+
PARSER.declareInt(Builder::setRawVectorSize, RAW_VECTOR_SIZE_FIELD);
111114
PARSER.declareInt(Builder::setQuantizeBits, QUANTIZE_BITS_FIELD);
112115
PARSER.declareString(Builder::setVectorEncoding, VECTOR_ENCODING_FIELD);
113116
PARSER.declareInt(Builder::setDimensions, DIMENSIONS_FIELD);
@@ -143,6 +146,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
143146
builder.field(REINDEX_FIELD.getPreferredName(), reindex);
144147
builder.field(FORCE_MERGE_FIELD.getPreferredName(), forceMerge);
145148
builder.field(VECTOR_SPACE_FIELD.getPreferredName(), vectorSpace.name().toLowerCase(Locale.ROOT));
149+
builder.field(RAW_VECTOR_SIZE_FIELD.getPreferredName(), rawVectorSize);
146150
builder.field(QUANTIZE_BITS_FIELD.getPreferredName(), quantizeBits);
147151
builder.field(VECTOR_ENCODING_FIELD.getPreferredName(), vectorEncoding.name().toLowerCase(Locale.ROOT));
148152
builder.field(DIMENSIONS_FIELD.getPreferredName(), dimensions);
@@ -176,6 +180,7 @@ static class Builder {
176180
private boolean reindex = false;
177181
private boolean forceMerge = false;
178182
private VectorSimilarityFunction vectorSpace = VectorSimilarityFunction.EUCLIDEAN;
183+
private int rawVectorSize = 32;
179184
private int quantizeBits = 8;
180185
private VectorEncoding vectorEncoding = VectorEncoding.FLOAT32;
181186
private int dimensions;
@@ -278,6 +283,11 @@ public Builder setVectorSpace(String vectorSpace) {
278283
return this;
279284
}
280285

286+
public Builder setRawVectorSize(int rawVectorSize) {
287+
this.rawVectorSize = rawVectorSize;
288+
return this;
289+
}
290+
281291
public Builder setQuantizeBits(int quantizeBits) {
282292
this.quantizeBits = quantizeBits;
283293
return this;
@@ -343,6 +353,7 @@ public CmdLineArgs build() {
343353
filterSelectivity,
344354
seed,
345355
vectorSpace,
356+
rawVectorSize,
346357
quantizeBits,
347358
vectorEncoding,
348359
dimensions,

qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import org.elasticsearch.index.codec.vectors.IVFVectorsFormat;
3131
import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat;
3232
import org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat;
33+
import org.elasticsearch.index.codec.vectors.es92.ES92BinaryQuantizedBFloat16VectorsFormat;
34+
import org.elasticsearch.index.codec.vectors.es92.ES92HnswBinaryQuantizedBFloat16VectorsFormat;
3335
import org.elasticsearch.logging.Level;
3436
import org.elasticsearch.logging.LogManager;
3537
import org.elasticsearch.logging.Logger;
@@ -105,9 +107,17 @@ static Codec createCodec(CmdLineArgs args) {
105107
} else {
106108
if (args.quantizeBits() == 1) {
107109
if (args.indexType() == IndexType.FLAT) {
108-
format = new ES818BinaryQuantizedVectorsFormat();
110+
if (args.rawVectorSize() == 16) {
111+
format = new ES92BinaryQuantizedBFloat16VectorsFormat();
112+
} else {
113+
format = new ES818BinaryQuantizedVectorsFormat();
114+
}
109115
} else {
110-
format = new ES818HnswBinaryQuantizedVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
116+
if (args.rawVectorSize() == 16) {
117+
format = new ES92HnswBinaryQuantizedBFloat16VectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
118+
} else {
119+
format = new ES818HnswBinaryQuantizedVectorsFormat(args.hnswM(), args.hnswEfConstruction(), 1, null);
120+
}
111121
}
112122
} else if (args.quantizeBits() < 32) {
113123
if (args.indexType() == IndexType.FLAT) {

server/src/main/java/module-info.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -487,5 +487,6 @@
487487
exports org.elasticsearch.index.codec.perfield;
488488
exports org.elasticsearch.index.codec.vectors to org.elasticsearch.test.knn;
489489
exports org.elasticsearch.index.codec.vectors.es818 to org.elasticsearch.test.knn;
490+
exports org.elasticsearch.index.codec.vectors.es92 to org.elasticsearch.test.knn;
490491
exports org.elasticsearch.inference.telemetry;
491492
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es92/ES92BFloat16FlatVectorsReader.java

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,26 @@
2424
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
2525
import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues;
2626
import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration;
27-
import org.apache.lucene.index.*;
27+
import org.apache.lucene.index.ByteVectorValues;
28+
import org.apache.lucene.index.CorruptIndexException;
29+
import org.apache.lucene.index.FieldInfo;
30+
import org.apache.lucene.index.FieldInfos;
31+
import org.apache.lucene.index.FloatVectorValues;
32+
import org.apache.lucene.index.IndexFileNames;
33+
import org.apache.lucene.index.SegmentReadState;
34+
import org.apache.lucene.index.VectorEncoding;
35+
import org.apache.lucene.index.VectorSimilarityFunction;
2836
import org.apache.lucene.internal.hppc.IntObjectHashMap;
29-
import org.apache.lucene.store.*;
30-
import org.apache.lucene.util.IOUtils;
37+
import org.apache.lucene.store.ChecksumIndexInput;
38+
import org.apache.lucene.store.DataAccessHint;
39+
import org.apache.lucene.store.FileDataHint;
40+
import org.apache.lucene.store.FileTypeHint;
41+
import org.apache.lucene.store.IOContext;
42+
import org.apache.lucene.store.IndexInput;
43+
import org.apache.lucene.store.ReadAdvice;
3144
import org.apache.lucene.util.RamUsageEstimator;
3245
import org.apache.lucene.util.hnsw.RandomVectorScorer;
46+
import org.elasticsearch.core.IOUtils;
3347

3448
import java.io.IOException;
3549
import java.io.UncheckedIOException;

server/src/main/java/org/elasticsearch/index/codec/vectors/es92/ES92BFloat16FlatVectorsWriter.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@
4343
import org.apache.lucene.store.IndexInput;
4444
import org.apache.lucene.store.IndexOutput;
4545
import org.apache.lucene.util.ArrayUtil;
46-
import org.apache.lucene.util.IOUtils;
4746
import org.apache.lucene.util.RamUsageEstimator;
4847
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
4948
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
5049
import org.apache.lucene.util.hnsw.UpdateableRandomVectorScorer;
50+
import org.elasticsearch.core.IOUtils;
5151

5252
import java.io.Closeable;
5353
import java.io.IOException;
@@ -292,7 +292,11 @@ public CloseableRandomVectorScorerSupplier mergeOneFieldToIndex(FieldInfo fieldI
292292
} finally {
293293
if (success == false) {
294294
IOUtils.closeWhileHandlingException(vectorDataInput, tempVectorData);
295-
IOUtils.deleteFilesIgnoringExceptions(segmentWriteState.directory, tempVectorData.getName());
295+
try {
296+
segmentWriteState.directory.deleteFile(tempVectorData.getName());
297+
} catch (Exception e) {
298+
// ignore
299+
}
296300
}
297301
}
298302
}

server/src/main/java/org/elasticsearch/index/codec/vectors/es92/OffHeapBFloat16VectorValues.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ public float[] vectorValue(int targetOrd) throws IOException {
9393
return value;
9494
}
9595

96-
public static OffHeapBFloat16VectorValues load(
96+
static OffHeapBFloat16VectorValues load(
9797
VectorSimilarityFunction vectorSimilarityFunction,
9898
FlatVectorsScorer flatVectorsScorer,
9999
OrdToDocDISIReaderConfiguration configuration,
@@ -129,9 +129,9 @@ public static OffHeapBFloat16VectorValues load(
129129
* Dense vector values that are stored off-heap. This is the most common case when every doc has a
130130
* vector.
131131
*/
132-
public static class DenseOffHeapVectorValues extends OffHeapBFloat16VectorValues {
132+
static class DenseOffHeapVectorValues extends OffHeapBFloat16VectorValues {
133133

134-
public DenseOffHeapVectorValues(
134+
DenseOffHeapVectorValues(
135135
int dimension,
136136
int size,
137137
IndexInput slice,
@@ -188,7 +188,7 @@ private static class SparseOffHeapVectorValues extends OffHeapBFloat16VectorValu
188188
private final IndexInput dataIn;
189189
private final OrdToDocDISIReaderConfiguration configuration;
190190

191-
public SparseOffHeapVectorValues(
191+
SparseOffHeapVectorValues(
192192
OrdToDocDISIReaderConfiguration configuration,
193193
IndexInput dataIn,
194194
IndexInput slice,
@@ -269,7 +269,7 @@ public DocIdSetIterator iterator() {
269269

270270
private static class EmptyOffHeapVectorValues extends OffHeapBFloat16VectorValues {
271271

272-
public EmptyOffHeapVectorValues(int dimension, FlatVectorsScorer flatVectorsScorer, VectorSimilarityFunction similarityFunction) {
272+
EmptyOffHeapVectorValues(int dimension, FlatVectorsScorer flatVectorsScorer, VectorSimilarityFunction similarityFunction) {
273273
super(dimension, 0, null, 0, flatVectorsScorer, similarityFunction);
274274
}
275275

server/src/test/java/org/elasticsearch/index/codec/vectors/es92/ES92BinaryQuantizedBFloat16VectorsFormatTests.java

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,14 +27,35 @@
2727
import org.apache.lucene.document.Document;
2828
import org.apache.lucene.document.Field;
2929
import org.apache.lucene.document.KnnFloatVectorField;
30-
import org.apache.lucene.index.*;
30+
import org.apache.lucene.index.CodecReader;
31+
import org.apache.lucene.index.DirectoryReader;
32+
import org.apache.lucene.index.FloatVectorValues;
33+
import org.apache.lucene.index.IndexReader;
34+
import org.apache.lucene.index.IndexWriter;
35+
import org.apache.lucene.index.IndexWriterConfig;
36+
import org.apache.lucene.index.KnnVectorValues;
37+
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.SoftDeletesRetentionMergePolicy;
39+
import org.apache.lucene.index.Term;
40+
import org.apache.lucene.index.VectorSimilarityFunction;
3141
import org.apache.lucene.misc.store.DirectIODirectory;
32-
import org.apache.lucene.search.*;
42+
import org.apache.lucene.search.FieldExistsQuery;
43+
import org.apache.lucene.search.IndexSearcher;
44+
import org.apache.lucene.search.KnnFloatVectorQuery;
45+
import org.apache.lucene.search.MatchAllDocsQuery;
46+
import org.apache.lucene.search.Query;
47+
import org.apache.lucene.search.TermQuery;
48+
import org.apache.lucene.search.TopDocs;
49+
import org.apache.lucene.search.TotalHits;
3350
import org.apache.lucene.search.join.BitSetProducer;
3451
import org.apache.lucene.search.join.CheckJoinIndex;
3552
import org.apache.lucene.search.join.DiversifyingChildrenFloatKnnVectorQuery;
3653
import org.apache.lucene.search.join.QueryBitSetProducer;
37-
import org.apache.lucene.store.*;
54+
import org.apache.lucene.store.Directory;
55+
import org.apache.lucene.store.FSDirectory;
56+
import org.apache.lucene.store.IOContext;
57+
import org.apache.lucene.store.IndexOutput;
58+
import org.apache.lucene.store.MMapDirectory;
3859
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
3960
import org.apache.lucene.tests.store.MockDirectoryWrapper;
4061
import org.apache.lucene.tests.util.TestUtil;
@@ -52,12 +73,15 @@
5273
import org.elasticsearch.index.shard.ShardPath;
5374
import org.elasticsearch.index.store.FsDirectoryFactory;
5475
import org.elasticsearch.test.IndexSettingsModule;
55-
import org.junit.Ignore;
5676

5777
import java.io.IOException;
5878
import java.nio.file.Files;
5979
import java.nio.file.Path;
60-
import java.util.*;
80+
import java.util.ArrayList;
81+
import java.util.Arrays;
82+
import java.util.List;
83+
import java.util.Locale;
84+
import java.util.OptionalLong;
6185

6286
import static java.lang.String.format;
6387
import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
@@ -187,20 +211,17 @@ public void testSearchWithVisitedLimit() {
187211
// visited limit is not respected, as it is brute force search
188212
}
189213

214+
// bfloat16 makes the results of these tests slightly out of bounds
190215
@Override
191-
@Ignore // bfloat16 makes the results slightly out of bounds
192216
public void testWriterRamEstimate() throws Exception {}
193217

194218
@Override
195-
@Ignore // bfloat16 makes the results slightly out of bounds
196219
public void testRandom() throws Exception {}
197220

198221
@Override
199-
@Ignore // bfloat16 makes the results slightly out of bounds
200222
public void testVectorValuesReportCorrectDocs() throws Exception {}
201223

202224
@Override
203-
@Ignore // bfloat16 makes the results slightly out of bounds
204225
public void testSparseVectors() throws Exception {}
205226

206227
public void testQuantizedVectorsWriteAndRead() throws IOException {

server/src/test/java/org/elasticsearch/index/codec/vectors/es92/ES92HnswBinaryQuantizedBFloat16VectorsFormatTests.java

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,22 @@
2727
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
2828
import org.apache.lucene.document.Document;
2929
import org.apache.lucene.document.KnnFloatVectorField;
30-
import org.apache.lucene.index.*;
30+
import org.apache.lucene.index.CodecReader;
31+
import org.apache.lucene.index.DirectoryReader;
32+
import org.apache.lucene.index.FloatVectorValues;
33+
import org.apache.lucene.index.IndexReader;
34+
import org.apache.lucene.index.IndexWriter;
35+
import org.apache.lucene.index.IndexWriterConfig;
36+
import org.apache.lucene.index.KnnVectorValues;
37+
import org.apache.lucene.index.LeafReader;
38+
import org.apache.lucene.index.VectorSimilarityFunction;
3139
import org.apache.lucene.misc.store.DirectIODirectory;
3240
import org.apache.lucene.search.TopDocs;
33-
import org.apache.lucene.store.*;
41+
import org.apache.lucene.store.Directory;
42+
import org.apache.lucene.store.FSDirectory;
43+
import org.apache.lucene.store.IOContext;
44+
import org.apache.lucene.store.IndexOutput;
45+
import org.apache.lucene.store.MMapDirectory;
3446
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
3547
import org.apache.lucene.tests.store.MockDirectoryWrapper;
3648
import org.apache.lucene.tests.util.TestUtil;
@@ -45,7 +57,6 @@
4557
import org.elasticsearch.index.shard.ShardPath;
4658
import org.elasticsearch.index.store.FsDirectoryFactory;
4759
import org.elasticsearch.test.IndexSettingsModule;
48-
import org.junit.Ignore;
4960

5061
import java.io.IOException;
5162
import java.nio.file.Files;
@@ -140,24 +151,20 @@ public void testVectorSimilarityFuncs() {
140151
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
141152
}
142153

154+
// bfloat16 makes the results of these tests slightly out of bounds
143155
@Override
144-
@Ignore // bfloat16 makes the results slightly out of bounds
145156
public void testWriterRamEstimate() throws Exception {}
146157

147158
@Override
148-
@Ignore // bfloat16 makes the results slightly out of bounds
149159
public void testRandom() throws Exception {}
150160

151161
@Override
152-
@Ignore // bfloat16 makes the results slightly out of bounds
153162
public void testRandomWithUpdatesAndGraph() throws Exception {}
154163

155164
@Override
156-
@Ignore // bfloat16 makes the results slightly out of bounds
157165
public void testVectorValuesReportCorrectDocs() throws Exception {}
158166

159167
@Override
160-
@Ignore // bfloat16 makes the results slightly out of bounds
161168
public void testSparseVectors() throws Exception {}
162169

163170
public void testSimpleOffHeapSize() throws IOException {

0 commit comments

Comments
 (0)