From 129bc37ff5615b7488cdd3c5e28e401ec1bc205f Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 11 Aug 2025 16:45:42 +0100 Subject: [PATCH 1/4] Pull out common subclasses for our custom flat & hnsw vector formats --- .../vectors/AbstractFlatVectorsFormat.java | 44 +++++++ .../vectors/AbstractHnswVectorsFormat.java | 111 ++++++++++++++++++ ...ES814HnswScalarQuantizedVectorsFormat.java | 46 ++------ .../vectors/ES815HnswBitVectorsFormat.java | 47 ++------ .../{es818 => }/MergeReaderWrapper.java | 8 +- .../es816/ES816BinaryFlatVectorsScorer.java | 4 +- .../ES816BinaryQuantizedVectorsFormat.java | 19 ++- ...ES816HnswBinaryQuantizedVectorsFormat.java | 58 ++------- .../DirectIOLucene99FlatVectorsFormat.java | 17 +-- .../ES818BinaryQuantizedVectorsFormat.java | 28 ++--- ...ES818HnswBinaryQuantizedVectorsFormat.java | 67 ++--------- ...S818BinaryQuantizedVectorsFormatTests.java | 1 + 12 files changed, 224 insertions(+), 226 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java rename server/src/main/java/org/elasticsearch/index/codec/vectors/{es818 => }/MergeReaderWrapper.java (92%) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java new file mode 100644 index 0000000000000..d438361e22e1c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; +import org.elasticsearch.core.SuppressForbidden; + +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public abstract class AbstractFlatVectorsFormat extends FlatVectorsFormat { + + protected static final boolean USE_DIRECT_IO = getUseDirectIO(); + + @SuppressForbidden( + reason = "TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993" + ) + private static boolean getUseDirectIO() { + return Boolean.parseBoolean(System.getProperty("vector.rescoring.directio", "false")); + } + + protected AbstractFlatVectorsFormat(String name) { + super(name); + } + + protected abstract FlatVectorsScorer flatVectorsScorer(); + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return getName() + "(name=" + getName() + ", flatVectorScorer=" + flatVectorsScorer() + ")"; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java new file mode 100644 index 0000000000000..768f2495d2c41 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java @@ -0,0 +1,111 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec.vectors; + +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.apache.lucene.search.TaskExecutor; +import org.apache.lucene.util.hnsw.HnswGraph; + +import java.util.concurrent.ExecutorService; + +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_BEAM_WIDTH; +import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_MAX_CONN; +import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; + +public abstract class AbstractHnswVectorsFormat extends KnnVectorsFormat { + + /** + * Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to + * {@link Lucene99HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details. + */ + protected final int maxConn; + + /** + * The number of candidate neighbors to track while searching the graph for each newly inserted + * node. Defaults to {@link Lucene99HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph} + * for details. + */ + protected final int beamWidth; + + protected final int numMergeWorkers; + protected final TaskExecutor mergeExec; + + /** Constructs a format using default graph construction parameters */ + protected AbstractHnswVectorsFormat(String name) { + this(name, DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null); + } + + /** + * Constructs a format using the given graph construction parameters. + * + * @param maxConn the maximum number of connections to a node in the HNSW graph + * @param beamWidth the size of the queue maintained during graph construction. + */ + protected AbstractHnswVectorsFormat(String name, int maxConn, int beamWidth) { + this(name, maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null); + } + + /** + * Constructs a format using the given graph construction parameters and scalar quantization. + * + * @param maxConn the maximum number of connections to a node in the HNSW graph + * @param beamWidth the size of the queue maintained during graph construction. + * @param numMergeWorkers number of workers (threads) that will be used when doing merge. If + * larger than 1, a non-null {@link ExecutorService} must be passed as mergeExec + * @param mergeExec the {@link ExecutorService} that will be used by ALL vector writers that are + * generated by this format to do the merge + */ + protected AbstractHnswVectorsFormat(String name, int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) { + super(name); + if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { + throw new IllegalArgumentException( + "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn + ); + } + if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { + throw new IllegalArgumentException( + "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth + ); + } + this.maxConn = maxConn; + this.beamWidth = beamWidth; + if (numMergeWorkers == 1 && mergeExec != null) { + throw new IllegalArgumentException("No executor service is needed as we'll use single thread to merge"); + } + this.numMergeWorkers = numMergeWorkers; + if (mergeExec != null) { + this.mergeExec = new TaskExecutor(mergeExec); + } else { + this.mergeExec = null; + } + } + + protected abstract KnnVectorsFormat flatVectorsFormat(); + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_DIMS_COUNT; + } + + @Override + public String toString() { + return getName() + "(name=" + getName() + ", maxConn=" + + maxConn + + ", beamWidth=" + + beamWidth + + ", flatVectorFormat=" + + flatVectorsFormat() + + ")"; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java index 6bb32d8e1ef52..1bbc34f52665b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java @@ -24,17 +24,10 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; -public final class ES814HnswScalarQuantizedVectorsFormat extends KnnVectorsFormat { +public final class ES814HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { static final String NAME = "ES814HnswScalarQuantizedVectorsFormat"; - static final int MAXIMUM_MAX_CONN = 512; - static final int MAXIMUM_BEAM_WIDTH = 3200; - - private final int maxConn; - - private final int beamWidth; - /** The format for storing, reading, merging vectors on disk */ private final FlatVectorsFormat flatVectorsFormat; @@ -43,45 +36,22 @@ public ES814HnswScalarQuantizedVectorsFormat() { } public ES814HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth, Float confidenceInterval, int bits, boolean compress) { - super(NAME); - if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { - throw new IllegalArgumentException( - "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn - ); - } - if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { - throw new IllegalArgumentException( - "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth - ); - } - this.maxConn = maxConn; - this.beamWidth = beamWidth; + super(NAME, maxConn, beamWidth); this.flatVectorsFormat = new ES814ScalarQuantizedVectorsFormat(confidenceInterval, bits, compress); } @Override - public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null); - } - - @Override - public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); + protected KnnVectorsFormat flatVectorsFormat() { + return flatVectorsFormat; } @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), numMergeWorkers, mergeExec); } @Override - public String toString() { - return "ES814HnswScalarQuantizedVectorsFormat(name=ES814HnswScalarQuantizedVectorsFormat, maxConn=" - + maxConn - + ", beamWidth=" - + beamWidth - + ", flatVectorFormat=" - + flatVectorsFormat - + ")"; + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java index 186dfcbeb5d52..7f313496db123 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java @@ -22,61 +22,32 @@ import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; -public class ES815HnswBitVectorsFormat extends KnnVectorsFormat { +public class ES815HnswBitVectorsFormat extends AbstractHnswVectorsFormat { static final String NAME = "ES815HnswBitVectorsFormat"; - static final int MAXIMUM_MAX_CONN = 512; - static final int MAXIMUM_BEAM_WIDTH = 3200; - - private final int maxConn; - private final int beamWidth; - private static final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat(); public ES815HnswBitVectorsFormat() { - this(16, 100); - } - - public ES815HnswBitVectorsFormat(int maxConn, int beamWidth) { super(NAME); - if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { - throw new IllegalArgumentException( - "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn - ); - } - if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { - throw new IllegalArgumentException( - "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth - ); - } - this.maxConn = maxConn; - this.beamWidth = beamWidth; } - @Override - public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { - return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null); + public ES815HnswBitVectorsFormat(int maxConn, int beamWidth) { + super(NAME, maxConn, beamWidth); } @Override - public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { - return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); + protected KnnVectorsFormat flatVectorsFormat() { + return flatVectorsFormat; } @Override - public String toString() { - return "ES815HnswBitVectorsFormat(name=ES815HnswBitVectorsFormat, maxConn=" - + maxConn - + ", beamWidth=" - + beamWidth - + ", flatVectorFormat=" - + flatVectorsFormat - + ")"; + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), numMergeWorkers, mergeExec); } @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/MergeReaderWrapper.java similarity index 92% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/MergeReaderWrapper.java index e74b0aad12723..b8376f02e7f47 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/MergeReaderWrapper.java @@ -7,7 +7,7 @@ * License v3.0 only", or the "Server Side Public License, v 1". */ -package org.elasticsearch.index.codec.vectors.es818; +package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.index.ByteVectorValues; @@ -25,19 +25,19 @@ import java.util.Collection; import java.util.Map; -class MergeReaderWrapper extends FlatVectorsReader implements OffHeapStats { +public class MergeReaderWrapper extends FlatVectorsReader implements OffHeapStats { private final FlatVectorsReader mainReader; private final FlatVectorsReader mergeReader; - protected MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mergeReader) { + public MergeReaderWrapper(FlatVectorsReader mainReader, FlatVectorsReader mergeReader) { super(mainReader.getFlatVectorScorer()); this.mainReader = mainReader; this.mergeReader = mergeReader; } // For testing - FlatVectorsReader getMainReader() { + public FlatVectorsReader getMainReader() { return mainReader; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java index daea6358ff3d5..41d86daa08db5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java @@ -37,10 +37,10 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; /** Vector scorer over binarized vector values */ -class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { +public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { private final FlatVectorsScorer nonQuantizedDelegate; - ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { + public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { this.nonQuantizedDelegate = nonQuantizedDelegate; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java index 61b6edc474d1f..64116658b0db6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java @@ -22,10 +22,12 @@ import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat; import java.io.IOException; @@ -34,7 +36,7 @@ /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ -public class ES816BinaryQuantizedVectorsFormat extends FlatVectorsFormat { +public class ES816BinaryQuantizedVectorsFormat extends AbstractFlatVectorsFormat { public static final String BINARIZED_VECTOR_COMPONENT = "BVEC"; public static final String NAME = "ES816BinaryQuantizedVectorsFormat"; @@ -60,6 +62,11 @@ public ES816BinaryQuantizedVectorsFormat() { super(NAME); } + @Override + protected FlatVectorsScorer flatVectorsScorer() { + return scorer; + } + @Override public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { throw new UnsupportedOperationException(); @@ -69,14 +76,4 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new ES816BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), scorer); } - - @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; - } - - @Override - public String toString() { - return "ES816BinaryQuantizedVectorsFormat(name=" + NAME + ", flatVectorScorer=" + scorer + ")"; - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java index 1dbb4e432b188..0efabff21534c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java @@ -28,6 +28,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.util.hnsw.HnswGraph; +import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -42,29 +43,16 @@ /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ -public class ES816HnswBinaryQuantizedVectorsFormat extends KnnVectorsFormat { +public class ES816HnswBinaryQuantizedVectorsFormat extends AbstractHnswVectorsFormat { public static final String NAME = "ES816HnswBinaryQuantizedVectorsFormat"; - /** - * Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to - * {@link Lucene99HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details. - */ - protected final int maxConn; - - /** - * The number of candidate neighbors to track while searching the graph for each newly inserted - * node. Defaults to {@link Lucene99HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph} - * for details. - */ - protected final int beamWidth; - /** The format for storing, reading, merging vectors on disk */ private static final FlatVectorsFormat flatVectorsFormat = new ES816BinaryQuantizedVectorsFormat(); /** Constructs a format using default graph construction parameters */ public ES816HnswBinaryQuantizedVectorsFormat() { - this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null); + super(NAME); } /** @@ -74,7 +62,7 @@ public ES816HnswBinaryQuantizedVectorsFormat() { * @param beamWidth the size of the queue maintained during graph construction. */ public ES816HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth) { - this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null); + super(NAME, maxConn, beamWidth); } /** @@ -88,22 +76,12 @@ public ES816HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth) { * generated by this format to do the merge */ public ES816HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) { - super(NAME); - if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { - throw new IllegalArgumentException( - "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn - ); - } - if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { - throw new IllegalArgumentException( - "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth - ); - } - this.maxConn = maxConn; - this.beamWidth = beamWidth; - if (numMergeWorkers == 1 && mergeExec != null) { - throw new IllegalArgumentException("No executor service is needed as we'll use single thread to merge"); - } + super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec); + } + + @Override + protected KnnVectorsFormat flatVectorsFormat() { + return flatVectorsFormat; } @Override @@ -115,20 +93,4 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); } - - @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; - } - - @Override - public String toString() { - return "ES816HnswBinaryQuantizedVectorsFormat(name=ES816HnswBinaryQuantizedVectorsFormat, maxConn=" - + maxConn - + ", beamWidth=" - + beamWidth - + ", flatVectorFormat=" - + flatVectorsFormat - + ")"; - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java index a03cd3fc9fe8e..08ffbf02041af 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java @@ -29,6 +29,8 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; +import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat; +import org.elasticsearch.index.codec.vectors.MergeReaderWrapper; import org.elasticsearch.index.store.FsDirectoryFactory; import java.io.IOException; @@ -39,7 +41,7 @@ * This is copied to change the implementation of {@link #fieldsReader} only. * The codec format itself is not changed, so we keep the original {@link #NAME} */ -public class DirectIOLucene99FlatVectorsFormat extends FlatVectorsFormat { +public class DirectIOLucene99FlatVectorsFormat extends AbstractFlatVectorsFormat { static final String NAME = "Lucene99FlatVectorsFormat"; static final String META_CODEC_NAME = "Lucene99FlatVectorsFormatMeta"; @@ -50,7 +52,6 @@ public class DirectIOLucene99FlatVectorsFormat extends FlatVectorsFormat { public static final int VERSION_START = 0; public static final int VERSION_CURRENT = VERSION_START; - static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; private final FlatVectorsScorer vectorsScorer; /** Constructs a format */ @@ -59,13 +60,18 @@ public DirectIOLucene99FlatVectorsFormat(FlatVectorsScorer vectorsScorer) { this.vectorsScorer = vectorsScorer; } + @Override + protected FlatVectorsScorer flatVectorsScorer() { + return vectorsScorer; + } + @Override public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { return new Lucene99FlatVectorsWriter(state, vectorsScorer); } static boolean shouldUseDirectIO(SegmentReadState state) { - assert ES818BinaryQuantizedVectorsFormat.USE_DIRECT_IO; + assert USE_DIRECT_IO; return FsDirectoryFactory.isHybridFs(state.directory) && FilterDirectory.unwrap(state.directory) instanceof DirectIOIndexInputSupplier; } @@ -83,9 +89,4 @@ public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException return new Lucene99FlatVectorsReader(state, vectorsScorer); } } - - @Override - public String toString() { - return "ES818FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')'; - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java index 9a31ff42a7c5c..5ceb2c309087f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java @@ -22,11 +22,13 @@ import org.apache.lucene.codecs.hnsw.FlatVectorScorerUtil; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; +import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import java.io.IOException; @@ -86,9 +88,7 @@ *
  • The sparse vector information, if required, mapping vector ordinal to doc ID * */ -public class ES818BinaryQuantizedVectorsFormat extends FlatVectorsFormat { - - public static final boolean USE_DIRECT_IO = getUseDirectIO(); +public class ES818BinaryQuantizedVectorsFormat extends AbstractFlatVectorsFormat { public static final String BINARIZED_VECTOR_COMPONENT = "BVEC"; public static final String NAME = "ES818BinaryQuantizedVectorsFormat"; @@ -101,13 +101,6 @@ public class ES818BinaryQuantizedVectorsFormat extends FlatVectorsFormat { static final String VECTOR_DATA_EXTENSION = "veb"; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; - @SuppressForbidden( - reason = "TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993" - ) - private static boolean getUseDirectIO() { - return Boolean.parseBoolean(System.getProperty("vector.rescoring.directio", "false")); - } - private static final FlatVectorsFormat rawVectorFormat = USE_DIRECT_IO ? new DirectIOLucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()) : new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); @@ -121,6 +114,11 @@ public ES818BinaryQuantizedVectorsFormat() { super(NAME); } + @Override + protected FlatVectorsScorer flatVectorsScorer() { + return scorer; + } + @Override public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { return new ES818BinaryQuantizedVectorsWriter(scorer, rawVectorFormat.fieldsWriter(state), state); @@ -130,14 +128,4 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new ES818BinaryQuantizedVectorsReader(state, rawVectorFormat.fieldsReader(state), scorer); } - - @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; - } - - @Override - public String toString() { - return "ES818BinaryQuantizedVectorsFormat(name=" + NAME + ", flatVectorScorer=" + scorer + ")"; - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java index 56942017c3cef..b1ea31a6197d6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java @@ -30,6 +30,7 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.search.TaskExecutor; import org.apache.lucene.util.hnsw.HnswGraph; +import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; import java.io.IOException; import java.util.concurrent.ExecutorService; @@ -44,32 +45,16 @@ /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ -public class ES818HnswBinaryQuantizedVectorsFormat extends KnnVectorsFormat { +public class ES818HnswBinaryQuantizedVectorsFormat extends AbstractHnswVectorsFormat { public static final String NAME = "ES818HnswBinaryQuantizedVectorsFormat"; - /** - * Controls how many of the nearest neighbor candidates are connected to the new node. Defaults to - * {@link Lucene99HnswVectorsFormat#DEFAULT_MAX_CONN}. See {@link HnswGraph} for more details. - */ - private final int maxConn; - - /** - * The number of candidate neighbors to track while searching the graph for each newly inserted - * node. Defaults to {@link Lucene99HnswVectorsFormat#DEFAULT_BEAM_WIDTH}. See {@link HnswGraph} - * for details. - */ - private final int beamWidth; - /** The format for storing, reading, merging vectors on disk */ private static final FlatVectorsFormat flatVectorsFormat = new ES818BinaryQuantizedVectorsFormat(); - private final int numMergeWorkers; - private final TaskExecutor mergeExec; - /** Constructs a format using default graph construction parameters */ public ES818HnswBinaryQuantizedVectorsFormat() { - this(DEFAULT_MAX_CONN, DEFAULT_BEAM_WIDTH, DEFAULT_NUM_MERGE_WORKER, null); + super(NAME); } /** @@ -79,7 +64,7 @@ public ES818HnswBinaryQuantizedVectorsFormat() { * @param beamWidth the size of the queue maintained during graph construction. */ public ES818HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth) { - this(maxConn, beamWidth, DEFAULT_NUM_MERGE_WORKER, null); + super(NAME, maxConn, beamWidth); } /** @@ -93,28 +78,12 @@ public ES818HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth) { * generated by this format to do the merge */ public ES818HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, int numMergeWorkers, ExecutorService mergeExec) { - super(NAME); - if (maxConn <= 0 || maxConn > MAXIMUM_MAX_CONN) { - throw new IllegalArgumentException( - "maxConn must be positive and less than or equal to " + MAXIMUM_MAX_CONN + "; maxConn=" + maxConn - ); - } - if (beamWidth <= 0 || beamWidth > MAXIMUM_BEAM_WIDTH) { - throw new IllegalArgumentException( - "beamWidth must be positive and less than or equal to " + MAXIMUM_BEAM_WIDTH + "; beamWidth=" + beamWidth - ); - } - this.maxConn = maxConn; - this.beamWidth = beamWidth; - if (numMergeWorkers == 1 && mergeExec != null) { - throw new IllegalArgumentException("No executor service is needed as we'll use single thread to merge"); - } - this.numMergeWorkers = numMergeWorkers; - if (mergeExec != null) { - this.mergeExec = new TaskExecutor(mergeExec); - } else { - this.mergeExec = null; - } + super(NAME, maxConn, beamWidth, numMergeWorkers, mergeExec); + } + + @Override + protected KnnVectorsFormat flatVectorsFormat() { + return flatVectorsFormat; } @Override @@ -126,20 +95,4 @@ public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { return new Lucene99HnswVectorsReader(state, flatVectorsFormat.fieldsReader(state)); } - - @Override - public int getMaxDimensions(String fieldName) { - return MAX_DIMS_COUNT; - } - - @Override - public String toString() { - return "ES818HnswBinaryQuantizedVectorsFormat(name=ES818HnswBinaryQuantizedVectorsFormat, maxConn=" - + maxConn - + ", beamWidth=" - + beamWidth - + ", flatVectorFormat=" - + flatVectorsFormat - + ")"; - } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 9fa10305562c2..1fd172101b293 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -66,6 +66,7 @@ import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.codec.vectors.BQVectorUtils; +import org.elasticsearch.index.codec.vectors.MergeReaderWrapper; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.elasticsearch.index.shard.ShardId; From 95924deadafa5d22175edaf17dd3fee46485bf28 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 11 Aug 2025 16:09:44 +0000 Subject: [PATCH 2/4] [CI] Auto commit changes from spotless --- .../index/codec/vectors/AbstractHnswVectorsFormat.java | 5 ++++- .../vectors/ES814HnswScalarQuantizedVectorsFormat.java | 1 - .../index/codec/vectors/ES815HnswBitVectorsFormat.java | 2 -- .../es816/ES816BinaryQuantizedVectorsFormat.java | 2 -- .../es816/ES816HnswBinaryQuantizedVectorsFormat.java | 9 --------- .../es818/DirectIOLucene99FlatVectorsFormat.java | 1 - .../es818/ES818BinaryQuantizedVectorsFormat.java | 3 --- .../es818/ES818HnswBinaryQuantizedVectorsFormat.java | 10 ---------- 8 files changed, 4 insertions(+), 29 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java index 768f2495d2c41..2ca55a2c616a3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java @@ -100,7 +100,10 @@ public int getMaxDimensions(String fieldName) { @Override public String toString() { - return getName() + "(name=" + getName() + ", maxConn=" + return getName() + + "(name=" + + getName() + + ", maxConn=" + maxConn + ", beamWidth=" + beamWidth diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java index 1bbc34f52665b..bc3e609f402f9 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java @@ -22,7 +22,6 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; public final class ES814HnswScalarQuantizedVectorsFormat extends AbstractHnswVectorsFormat { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java index 7f313496db123..61d959de291b4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java @@ -20,8 +20,6 @@ import java.io.IOException; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; - public class ES815HnswBitVectorsFormat extends AbstractHnswVectorsFormat { static final String NAME = "ES815HnswBitVectorsFormat"; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java index 64116658b0db6..6888d00608128 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormat.java @@ -31,8 +31,6 @@ import java.io.IOException; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; - /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java index 0efabff21534c..add484e348b7f 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java @@ -23,23 +23,14 @@ import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.util.hnsw.HnswGraph; import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; import java.io.IOException; import java.util.concurrent.ExecutorService; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_BEAM_WIDTH; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_MAX_CONN; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; - /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java index 08ffbf02041af..c68987902e995 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java @@ -19,7 +19,6 @@ */ package org.elasticsearch.index.codec.vectors.es818; -import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java index 5ceb2c309087f..a9601ffc408e8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java @@ -27,14 +27,11 @@ import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.vectors.AbstractFlatVectorsFormat; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import java.io.IOException; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; - /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 * Codec for encoding/decoding binary quantized vectors The binary quantization format used here diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java index b1ea31a6197d6..358ff6606db4d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java @@ -23,25 +23,15 @@ import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; -import org.apache.lucene.search.TaskExecutor; -import org.apache.lucene.util.hnsw.HnswGraph; import org.elasticsearch.index.codec.vectors.AbstractHnswVectorsFormat; import java.io.IOException; import java.util.concurrent.ExecutorService; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_NUM_MERGE_WORKER; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_BEAM_WIDTH; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.MAXIMUM_MAX_CONN; -import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; - /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ From 1d8d757f5a4d7e37edd450b99a3826c502c21668 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 12 Aug 2025 09:28:31 +0100 Subject: [PATCH 3/4] Fix compile --- .../index/codec/vectors/AbstractFlatVectorsFormat.java | 2 +- .../codec/vectors/es816/ES816BinaryFlatVectorsScorer.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java index d438361e22e1c..4bfdbe4c9273a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractFlatVectorsFormat.java @@ -17,7 +17,7 @@ public abstract class AbstractFlatVectorsFormat extends FlatVectorsFormat { - protected static final boolean USE_DIRECT_IO = getUseDirectIO(); + public static final boolean USE_DIRECT_IO = getUseDirectIO(); @SuppressForbidden( reason = "TODO Deprecate any lenient usage of Boolean#parseBoolean https://github.com/elastic/elasticsearch/issues/128993" diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java index 41d86daa08db5..daea6358ff3d5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryFlatVectorsScorer.java @@ -37,10 +37,10 @@ import static org.apache.lucene.index.VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT; /** Vector scorer over binarized vector values */ -public class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { +class ES816BinaryFlatVectorsScorer implements FlatVectorsScorer { private final FlatVectorsScorer nonQuantizedDelegate; - public ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { + ES816BinaryFlatVectorsScorer(FlatVectorsScorer nonQuantizedDelegate) { this.nonQuantizedDelegate = nonQuantizedDelegate; } From 98a38aba7297696a4032df0d58ee64a1c7b78990 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 12 Aug 2025 13:16:58 +0100 Subject: [PATCH 4/4] Change to FlatVectorsFormat --- .../index/codec/vectors/AbstractHnswVectorsFormat.java | 3 ++- .../codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java | 3 +-- .../index/codec/vectors/ES815HnswBitVectorsFormat.java | 3 +-- .../vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java | 3 +-- .../vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java | 3 +-- 5 files changed, 6 insertions(+), 9 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java index 2ca55a2c616a3..618ece60ae251 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/AbstractHnswVectorsFormat.java @@ -10,6 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.search.TaskExecutor; import org.apache.lucene.util.hnsw.HnswGraph; @@ -91,7 +92,7 @@ protected AbstractHnswVectorsFormat(String name, int maxConn, int beamWidth, int } } - protected abstract KnnVectorsFormat flatVectorsFormat(); + protected abstract FlatVectorsFormat flatVectorsFormat(); @Override public int getMaxDimensions(String fieldName) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java index bc3e609f402f9..e072df076413d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormat.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.codec.vectors; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; @@ -40,7 +39,7 @@ public ES814HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth, Float c } @Override - protected KnnVectorsFormat flatVectorsFormat() { + protected FlatVectorsFormat flatVectorsFormat() { return flatVectorsFormat; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java index 61d959de291b4..d174db62adaf4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java @@ -9,7 +9,6 @@ package org.elasticsearch.index.codec.vectors; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; @@ -35,7 +34,7 @@ public ES815HnswBitVectorsFormat(int maxConn, int beamWidth) { } @Override - protected KnnVectorsFormat flatVectorsFormat() { + protected FlatVectorsFormat flatVectorsFormat() { return flatVectorsFormat; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java index add484e348b7f..2f5b425d60119 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormat.java @@ -19,7 +19,6 @@ */ package org.elasticsearch.index.codec.vectors.es816; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; @@ -71,7 +70,7 @@ public ES816HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, int num } @Override - protected KnnVectorsFormat flatVectorsFormat() { + protected FlatVectorsFormat flatVectorsFormat() { return flatVectorsFormat; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java index 358ff6606db4d..0148604467a90 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormat.java @@ -19,7 +19,6 @@ */ package org.elasticsearch.index.codec.vectors.es818; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; @@ -72,7 +71,7 @@ public ES818HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, int num } @Override - protected KnnVectorsFormat flatVectorsFormat() { + protected FlatVectorsFormat flatVectorsFormat() { return flatVectorsFormat; }