Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
switch (info.getVectorEncoding()) {
case BYTE -> Byte.BYTES;
case FLOAT32 -> Float.BYTES;
case FLOAT16 -> Short.BYTES;
};
long vectorBytes = Math.multiplyExact((long) dimension, byteSize);
long numBytes = Math.multiplyExact(vectorBytes, fieldEntry.size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ static OffHeapFloatVectorValues load(
switch (fieldEntry.vectorEncoding()) {
case BYTE -> fieldEntry.dimension();
case FLOAT32 -> fieldEntry.dimension() * Float.BYTES;
case FLOAT16 -> fieldEntry.dimension() * Short.BYTES;
};
if (fieldEntry.docsWithFieldOffset() == -1) {
return new DenseOffHeapVectorValues(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ private void validateFieldEntry(FieldInfo info, FieldEntry fieldEntry) {
switch (info.getVectorEncoding()) {
case BYTE -> Byte.BYTES;
case FLOAT32 -> Float.BYTES;
case FLOAT16 -> Short.BYTES;
};
long vectorBytes = Math.multiplyExact((long) dimension, byteSize);
long numBytes = Math.multiplyExact(vectorBytes, fieldEntry.size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.KnnFloatVectorQuery;
Expand Down Expand Up @@ -186,4 +187,9 @@ public void testQuantizedVectorsWriteAndRead() throws IOException {
}
}
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.TopDocs;
Expand Down Expand Up @@ -177,4 +178,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ private void writeField(FieldWriter<?> fieldData, int maxDoc) throws IOException
switch (fieldData.fieldInfo.getVectorEncoding()) {
case BYTE -> writeByteVectors(fieldData);
case FLOAT32 -> writeFloat32Vectors(fieldData);
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
}
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;

Expand Down Expand Up @@ -240,6 +241,7 @@ private void writeSortingField(FieldWriter<?> fieldData, int maxDoc, Sorter.DocM
switch (fieldData.fieldInfo.getVectorEncoding()) {
case BYTE -> writeSortedByteVectors(fieldData, ordMap);
case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap);
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;

Expand Down Expand Up @@ -404,6 +406,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
writeVectorData(
tempVectorData,
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
CodecUtil.writeFooter(tempVectorData);
IOUtils.close(tempVectorData);
Expand Down Expand Up @@ -460,6 +463,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
yield hnswGraphBuilder.build(vectorValues.size());
}
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
writeGraph(graph);
}
Expand Down Expand Up @@ -660,6 +664,7 @@ public float[] copyValue(float[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
}
};
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
}

Expand All @@ -681,6 +686,7 @@ public float[] copyValue(float[] value) {
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
FloatVectorValues.fromFloats((List<float[]>) vectors, dim));
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
hnswGraphBuilder =
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;

public class TestLucene94HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
Expand All @@ -38,4 +39,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene94RWHnswVectorsFormat(name=Lucene94RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ private void writeField(FieldWriter<?> fieldData, int maxDoc) throws IOException
switch (fieldData.fieldInfo.getVectorEncoding()) {
case BYTE -> writeByteVectors(fieldData);
case FLOAT32 -> writeFloat32Vectors(fieldData);
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
}
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;

Expand Down Expand Up @@ -245,6 +246,7 @@ private void writeSortingField(FieldWriter<?> fieldData, int maxDoc, Sorter.DocM
switch (fieldData.fieldInfo.getVectorEncoding()) {
case BYTE -> writeSortedByteVectors(fieldData, ordMap);
case FLOAT32 -> writeSortedFloat32Vectors(fieldData, ordMap);
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
long vectorDataLength = vectorData.getFilePointer() - vectorDataOffset;

Expand Down Expand Up @@ -431,6 +433,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
writeVectorData(
tempVectorData,
MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState));
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
CodecUtil.writeFooter(tempVectorData);
IOUtils.close(tempVectorData);
Expand Down Expand Up @@ -475,8 +478,11 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
vectorDataInput,
byteSize,
defaultFlatVectorScorer,
fieldInfo.getVectorSimilarityFunction()));
fieldInfo.getVectorSimilarityFunction(),
VectorEncoding.FLOAT32));
break;
case FLOAT16:
throw new UnsupportedOperationException("FLOAT16 is not supported");
default:
throw new IllegalArgumentException(
"Unsupported vector encoding: " + fieldInfo.getVectorEncoding());
Expand All @@ -498,6 +504,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
case FLOAT32 ->
mergedVectorValues =
KnnVectorsWriter.MergedVectorValues.mergeFloatVectorValues(fieldInfo, mergeState);
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
}
graph =
merger.merge(
Expand Down Expand Up @@ -709,6 +716,7 @@ public float[] copyValue(float[] value) {
return ArrayUtil.copyOfSubArray(value, 0, dim);
}
};
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
}

Expand All @@ -729,6 +737,7 @@ public float[] copyValue(float[] value) {
defaultFlatVectorScorer.getRandomVectorScorerSupplier(
fieldInfo.getVectorSimilarityFunction(),
FloatVectorValues.fromFloats((List<float[]>) vectors, dim));
case FLOAT16 -> throw new UnsupportedOperationException("FLOAT16 is not supported");
};
hnswGraphBuilder =
HnswGraphBuilder.create(scorerSupplier, M, beamWidth, HnswGraphBuilder.randSeed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;

public class TestLucene95HnswVectorsFormat extends BaseKnnVectorsFormatTestCase {
Expand All @@ -38,4 +39,9 @@ public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
"Lucene95RWHnswVectorsFormat(name=Lucene95RWHnswVectorsFormat, maxConn=10, beamWidth=20)";
assertEquals(expectedString, customCodec.getKnnVectorsFormatForField("bogus_field").toString());
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.AcceptDocs;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -369,4 +370,9 @@ public void testVectorSimilarityFuncs() {
var expectedValues = Arrays.stream(VectorSimilarityFunction.values()).toList();
assertEquals(Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS, expectedValues);
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
import org.apache.lucene.tests.util.TestUtil;
Expand Down Expand Up @@ -64,4 +65,9 @@ public void testSimpleOffHeapSize() throws IOException {
}
}
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
Expand Down Expand Up @@ -407,4 +408,9 @@ public void testRandomWithUpdatesAndGraph() {
public void testSearchWithVisitedLimit() {
// search not supported
}

@Override
protected VectorEncoding randomVectorEncoding() {
return random().nextBoolean() ? VectorEncoding.BYTE : VectorEncoding.FLOAT32;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
Expand Down Expand Up @@ -326,7 +327,8 @@ static KnnVectorValues vectorValues(
in.slice("test", 0, in.length()),
byteSize,
new ThrowingFlatVectorScorer(),
sim);
sim,
VectorEncoding.FLOAT32);
}

static final class ThrowingFlatVectorScorer implements FlatVectorsScorer {
Expand Down
3 changes: 2 additions & 1 deletion lucene/core/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
@SuppressWarnings("module") // the test framework is compiled after the core...
module org.apache.lucene.core {
requires java.logging;
requires static jdk.management; // this is optional but explicit declaration is recommended
requires static jdk.management;
requires java.desktop; // this is optional but explicit declaration is recommended

exports org.apache.lucene.analysis.standard;
exports org.apache.lucene.analysis.tokenattributes;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ public byte[] copyValue(byte[] vectorValue) {
}
};
break;
case FLOAT16:
throw new UnsupportedOperationException("FLOAT16 is not supported");
default:
throw new UnsupportedOperationException();
}
Expand Down Expand Up @@ -105,6 +107,8 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
: bufferedByteVectorValues;
writeField(fieldData.fieldInfo, byteVectorValues, maxDoc);
break;
case FLOAT16:
throw new UnsupportedOperationException("FLOAT16 is not supported");
}
}
}
Expand Down Expand Up @@ -207,6 +211,8 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
MergedVectorValues.mergeByteVectorValues(fieldInfo, mergeState);
writeField(fieldInfo, byteVectorValues, mergeState.segmentInfo.maxDoc());
break;
case FLOAT16:
throw new UnsupportedOperationException("FLOAT16 is not supported");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
byteWriter.addValue(doc, mergedBytes.vectorValue(iter.index()));
}
}
case FLOAT32 -> {
case FLOAT32, FLOAT16 -> {
KnnFieldVectorsWriter<float[]> floatWriter =
(KnnFieldVectorsWriter<float[]>) addField(fieldInfo);
FloatVectorValues mergedFloats =
Expand Down Expand Up @@ -215,13 +215,19 @@ public static void mapOldOrdToNewOrd(
public static final class MergedVectorValues {
private MergedVectorValues() {}

private static void validateFieldEncoding(FieldInfo fieldInfo, VectorEncoding expected) {
private static void validateFieldEncoding(FieldInfo fieldInfo, VectorEncoding... expected) {
assert fieldInfo != null && fieldInfo.hasVectorValues();
VectorEncoding fieldEncoding = fieldInfo.getVectorEncoding();
if (fieldEncoding != expected) {
throw new UnsupportedOperationException(
"Cannot merge vectors encoded as [" + fieldEncoding + "] as " + expected);
for (VectorEncoding exp : expected) {
if (fieldEncoding == exp) {
return;
}
}
throw new UnsupportedOperationException(
"Cannot merge vectors encoded as ["
+ fieldEncoding
+ "] as "
+ Arrays.toString(expected));
}

/**
Expand Down Expand Up @@ -267,8 +273,8 @@ private static <V, S> List<S> mergeVectorValues(
/** Returns a merged view over all the segment's {@link FloatVectorValues}. */
public static FloatVectorValues mergeFloatVectorValues(
FieldInfo fieldInfo, MergeState mergeState) throws IOException {
validateFieldEncoding(fieldInfo, VectorEncoding.FLOAT32);
return new MergedFloat32VectorValues(
validateFieldEncoding(fieldInfo, VectorEncoding.FLOAT32, VectorEncoding.FLOAT16);
return new MergedFloatVectorValues(
mergeVectorValues(
mergeState.knnVectorsReaders,
mergeState.docMaps,
Expand All @@ -294,15 +300,15 @@ public static ByteVectorValues mergeByteVectorValues(FieldInfo fieldInfo, MergeS
mergeState);
}

static class MergedFloat32VectorValues extends FloatVectorValues {
static class MergedFloatVectorValues extends FloatVectorValues {
private final List<FloatVectorValuesSub> subs;
private final DocIDMerger<FloatVectorValuesSub> docIdMerger;
private final int size;
private int docId = -1;
private int lastOrd = -1;
FloatVectorValuesSub current;

private MergedFloat32VectorValues(List<FloatVectorValuesSub> subs, MergeState mergeState)
private MergedFloatVectorValues(List<FloatVectorValuesSub> subs, MergeState mergeState)
throws IOException {
this.subs = subs;
docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
VectorSimilarityFunction similarityFunction, KnnVectorValues vectorValues)
throws IOException {
switch (vectorValues.getEncoding()) {
case FLOAT32 -> {
case FLOAT32, FLOAT16 -> {
return new FloatScoringSupplier((FloatVectorValues) vectorValues, similarityFunction);
}
case BYTE -> {
Expand Down
Loading