Skip to content
Closed
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
4823d30
Copy over Lucene classes
carlosdelest Jun 5, 2025
6f6097d
Create vector format class that references Lucene changes
carlosdelest Jun 5, 2025
01a5be0
Checkstyle
carlosdelest Jun 5, 2025
9f6852e
Change version to 910
carlosdelest Jun 5, 2025
402f480
Some renaming and refactoring
carlosdelest Jun 6, 2025
acbbb13
Change tests to reference the latest codec version
carlosdelest Jun 6, 2025
c0e1b91
Update docs/changelog/129046.yaml
carlosdelest Jun 6, 2025
2d18be6
[CI] Auto commit changes from spotless
Jun 6, 2025
c1bd0ff
Fix changelog
carlosdelest Jun 6, 2025
433ac23
Add test, fix vectors format name and add to module-info
carlosdelest Jun 6, 2025
11088f9
[CI] Auto commit changes from spotless
Jun 6, 2025
934a066
Change KnnVectorsWriter to use in HNSW vector formats, replacing Luce…
carlosdelest Jun 6, 2025
19bc676
Fix test name
carlosdelest Jun 6, 2025
33583da
Merge remote-tracking branch 'carlosdelest/feature/dense-vector-hnsw-…
carlosdelest Jun 6, 2025
c786d14
Spotless
carlosdelest Jun 6, 2025
98234a0
Renaming
carlosdelest Jun 6, 2025
86ff8f1
[CI] Auto commit changes from spotless
Jun 6, 2025
49b666f
Fix javadoc
carlosdelest Jun 6, 2025
97d993c
Merge remote-tracking branch 'carlosdelest/feature/dense-vector-hnsw-…
carlosdelest Jun 6, 2025
a6db01a
Remove ConcurrentHnswMerger and related classes / params
carlosdelest Jun 9, 2025
378111f
Remove FilteredHnswGraphSearcher
carlosdelest Jun 9, 2025
0489e7f
Remove HnswGraph and related classes
carlosdelest Jun 9, 2025
8a0a968
Remove HnswGraphMerger, clean classes
carlosdelest Jun 9, 2025
6373949
Remove NeighborQueue
carlosdelest Jun 9, 2025
a954936
Remove unneeded code from MaxSizedFloatArrayList and MaxSizedIntArray…
carlosdelest Jun 9, 2025
6a0826e
Remove HnswBuilder interface
carlosdelest Jun 9, 2025
457e9a4
Remove HnswUtil
carlosdelest Jun 9, 2025
2d8f30c
Use same codec name, change just writer implementation
carlosdelest Jun 10, 2025
2d8329d
Merge remote-tracking branch 'origin/main' into feature/dense-vector-…
carlosdelest Jun 10, 2025
a595100
Revert server/module-info
carlosdelest Jun 10, 2025
d4c8037
Revert server/module-info
carlosdelest Jun 10, 2025
5009508
Fix vector format name
carlosdelest Jun 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.metadata.DataStream;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
import org.elasticsearch.index.codec.Elasticsearch910Lucene102Codec;
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
Expand Down Expand Up @@ -259,7 +259,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE
config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER);
config.setMergePolicy(new LogByteSizeMergePolicy());
var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled);
config.setCodec(new Elasticsearch900Lucene101Codec() {
config.setCodec(new Elasticsearch910Lucene102Codec() {

@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/129046.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 129046
summary: Add Lucene improvements for HNSW merging heap usage
area: Search
type: enhancement
issues: []
4 changes: 3 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* License v3.0 only", or the "Server Side Public License, v 1".
*/

import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsFormat;
import org.elasticsearch.plugins.internal.RestExtension;
import org.elasticsearch.reservedstate.ReservedStateHandlerProvider;

Expand Down Expand Up @@ -456,14 +457,15 @@
org.elasticsearch.index.codec.vectors.es816.ES816HnswBinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat,
org.elasticsearch.index.codec.vectors.es818.ES818HnswBinaryQuantizedVectorsFormat,
ES910HnswVectorsFormat,
org.elasticsearch.index.codec.vectors.IVFVectorsFormat;

provides org.apache.lucene.codecs.Codec
with
org.elasticsearch.index.codec.Elasticsearch814Codec,
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
org.elasticsearch.index.codec.Elasticsearch910Lucene102Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
Expand Down Expand Up @@ -68,7 +68,7 @@ public Elasticsearch900Codec(Zstd814StoredFieldsFormat.Mode mode) {
this.storedFieldsFormat = mode.getFormat();
this.defaultPostingsFormat = new Lucene912PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
this.defaultKnnVectorsFormat = new ES910HnswVectorsFormat();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.0 relying on Lucene 10.1. This extends the Lucene 10.1 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch910Lucene102Codec extends CodecService.DeduplicateFieldInfosCodec {

static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene101PostingsFormat();

Expand All @@ -36,41 +36,41 @@ public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFiel
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getPostingsFormatForField(field);
return Elasticsearch910Lucene102Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new XPerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getDocValuesFormatForField(field);
return Elasticsearch910Lucene102Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch900Lucene101Codec.this.getKnnVectorsFormatForField(field);
return Elasticsearch910Lucene102Codec.this.getKnnVectorsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch900Lucene101Codec() {
public Elasticsearch910Lucene102Codec() {
this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
}

/**
* Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
* worse space-efficiency or vice-versa.
*/
public Elasticsearch900Lucene101Codec(Zstd814StoredFieldsFormat.Mode mode) {
public Elasticsearch910Lucene102Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch900Lucene101", new Lucene101Codec());
this.storedFieldsFormat = mode.getFormat();
this.defaultPostingsFormat = DEFAULT_POSTINGS_FORMAT;
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
this.defaultKnnVectorsFormat = new ES910HnswVectorsFormat();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.codec.bloomfilter.ES87BloomFilterPostingsFormat;
import org.elasticsearch.index.codec.postings.ES812PostingsFormat;
import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsFormat;
import org.elasticsearch.index.mapper.CompletionFieldMapper;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.Mapper;
Expand All @@ -34,7 +34,7 @@
public class PerFieldFormatSupplier {

private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
private static final KnnVectorsFormat knnVectorsFormat = new ES910HnswVectorsFormat();
private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
Expand All @@ -51,7 +51,7 @@ public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays)
if (mapperService != null
&& mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.USE_LUCENE101_POSTINGS_FORMAT)
&& mapperService.getIndexSettings().getMode().useDefaultPostingsFormat()) {
defaultPostingsFormat = Elasticsearch900Lucene101Codec.DEFAULT_POSTINGS_FORMAT;
defaultPostingsFormat = Elasticsearch910Lucene102Codec.DEFAULT_POSTINGS_FORMAT;
} else {
// our own posting format using PFOR
defaultPostingsFormat = es812PostingsFormat;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec {
public final class PerFieldMapperCodec extends Elasticsearch910Lucene102Codec {

private final PerFieldFormatSupplier formatSupplier;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsWriter;

import java.io.IOException;

Expand Down Expand Up @@ -61,7 +61,7 @@ public ES814HnswScalarQuantizedVectorsFormat(int maxConn, int beamWidth, Float c

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null);
return new ES910HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
import org.apache.lucene.codecs.KnnVectorsWriter;
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsWriter;

import java.io.IOException;

Expand Down Expand Up @@ -56,7 +56,7 @@ public ES815HnswBitVectorsFormat(int maxConn, int beamWidth) {

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null);
return new ES910HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), 1, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@
import org.apache.lucene.codecs.hnsw.FlatVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsWriter;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.search.TaskExecutor;
import org.apache.lucene.util.hnsw.HnswGraph;
import org.elasticsearch.index.codec.vectors.es910.ES910HnswVectorsWriter;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
Expand Down Expand Up @@ -119,7 +119,7 @@ public ES818HnswBinaryQuantizedVectorsFormat(int maxConn, int beamWidth, int num

@Override
public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
return new Lucene99HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), numMergeWorkers, mergeExec);
return new ES910HnswVectorsWriter(state, maxConn, beamWidth, flatVectorsFormat.fieldsWriter(state), numMergeWorkers, mergeExec);
}

@Override
Expand Down
Loading
Loading