From b527f593cd845477bfd9f1f4ec8040d7c101c02a Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Sun, 20 Apr 2025 10:52:26 +0100 Subject: [PATCH 001/184] port minimal 10.3 infra to lucene_snapshot --- .buildkite/pipelines/lucene-snapshot/build-snapshot.yml | 4 +++- docs/Versions.asciidoc | 4 ++-- .../src/main/java/org/elasticsearch/index/IndexVersions.java | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml index 8cf2a8aacbece..20a85c0593f31 100644 --- a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml +++ b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml @@ -2,7 +2,9 @@ steps: - trigger: apache-lucene-build-snapshot label: Trigger pipeline to build lucene snapshot key: lucene-build - if: build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == "" + if: (build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == "") + build: + branch: branch_10x - wait - label: Upload and update lucene snapshot command: .buildkite/scripts/lucene-snapshot/upload-snapshot.sh diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index 58195d7313a5a..aa02432b42b2a 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -1,8 +1,8 @@ include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[] -:lucene_version: 10.2.0 -:lucene_version_path: 10_2_0 +:lucene_version: 10.3.0 +:lucene_version_path: 10_3_0 :jdk: 11.0.2 :jdk_major: 11 :build_type: tar diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 4c07be7d9200b..ebe0ca072c55e 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -160,6 +160,8 @@ private static Version parseUnchecked(String version) { public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0); public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0); + + public static final IndexVersion UPGRADE_TO_LUCENE_10_3_0 = def(9_050_00_0, Version.LUCENE_10_3_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ From 55c2ec7173b65693046cf67be25c0ddaf23adc8d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 20 Apr 2025 10:09:07 +0000 Subject: [PATCH 002/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d18268d3d43 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 90b681c9636fe..84e52de94794b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.2.0 +lucene = 10.3.0-snapshot-d18268d3d43 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cbc22b35d7a35..dae3460520471 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2971,129 +2971,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 262a98b4af5408c86f3718667e66779d522f78d6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 21 Apr 2025 06:11:50 +0000 Subject: [PATCH 003/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d18268d3d43 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index dae3460520471..cd512c0a4cfe4 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,127 +2973,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 8ea85ee633b195ebf7f36b3df2300ec22594f7bd Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Mon, 21 Apr 2025 14:36:33 +0100 Subject: [PATCH 004/184] Add getOffHeapByteSize to ES vector readers (#127104) --- .../index/codec/vectors/ES813FlatVectorFormat.java | 6 ++++++ .../codec/vectors/ES813Int8FlatVectorFormat.java | 6 ++++++ .../vectors/ES814ScalarQuantizedVectorsFormat.java | 6 ++++++ .../es816/ES816BinaryQuantizedVectorsReader.java | 14 ++++++++++++++ .../es818/ES818BinaryQuantizedVectorsReader.java | 14 ++++++++++++++ 5 files changed, 46 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index ab882c8b04648..325188624a2f4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -31,6 +31,7 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import java.io.IOException; +import java.util.Map; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -148,6 +149,11 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return reader.getOffHeapByteSize(fieldInfo); + } + @Override public void close() throws IOException { reader.close(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java index 662e4040511e2..4636a37d14f53 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java @@ -29,6 +29,7 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import java.io.IOException; +import java.util.Map; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -156,6 +157,11 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return reader.getOffHeapByteSize(fieldInfo); + } + @Override public void close() throws IOException { reader.close(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 4c4fd00806954..204d9c6e9e630 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -38,6 +38,7 @@ import org.elasticsearch.simdvec.VectorSimilarityType; import java.io.IOException; +import java.util.Map; import static org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsFormat.DYNAMIC_CONFIDENCE_INTERVAL; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.MAX_DIMS_COUNT; @@ -227,6 +228,11 @@ public void close() throws IOException { public long ramBytesUsed() { return delegate.ramBytesUsed(); } + + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return delegate.getOffHeapByteSize(fieldInfo); + } } static final class ESFlatVectorsScorer implements FlatVectorsScorer { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index fc20809ea7eed..57728e7f33254 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.codec.vectors.es816; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; import org.apache.lucene.index.ByteVectorValues; @@ -51,6 +52,7 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; +import static org.elasticsearch.index.codec.vectors.es816.ES816BinaryQuantizedVectorsFormat.VECTOR_DATA_EXTENSION; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 @@ -253,6 +255,18 @@ public long ramBytesUsed() { return size; } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + var raw = rawVectorsReader.getOffHeapByteSize(fieldInfo); + FieldEntry fe = fields.get(fieldInfo.name); + if (fe == null) { + assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; + return raw; + } + var quant = Map.of(VECTOR_DATA_EXTENSION, fe.vectorDataLength()); + return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, quant); + } + public float[] getCentroid(String field) { FieldEntry fieldEntry = fields.get(field); if (fieldEntry != null) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index 8036b8314cdc1..338c7668ef33d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.codec.vectors.es818; import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.KnnVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; import org.apache.lucene.index.ByteVectorValues; @@ -51,6 +52,7 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; +import static org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat.VECTOR_DATA_EXTENSION; /** * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 @@ -252,6 +254,18 @@ public long ramBytesUsed() { return size; } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + var raw = rawVectorsReader.getOffHeapByteSize(fieldInfo); + FieldEntry fe = fields.get(fieldInfo.name); + if (fe == null) { + assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; + return raw; + } + var quant = Map.of(VECTOR_DATA_EXTENSION, fe.vectorDataLength()); + return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, quant); + } + public float[] getCentroid(String field) { FieldEntry fieldEntry = fields.get(field); if (fieldEntry != null) { From 4e88e93ccf3b204f12de528bdd3849f0533eac84 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 22 Apr 2025 06:12:52 +0000 Subject: [PATCH 005/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b1978943b74 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 84e52de94794b..bd6fda14921fa 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-d18268d3d43 +lucene = 10.3.0-snapshot-b1978943b74 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cd512c0a4cfe4..05f3f90f478dd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2971,129 +2971,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2a48a01b845982aae7cf4c3868d80c8a78f3e28d Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Tue, 22 Apr 2025 16:00:41 +0100 Subject: [PATCH 006/184] Fix index version parsing until 10.2.1 is released --- server/src/main/java/org/elasticsearch/index/IndexVersions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index d27a9f874401f..14c3a93758575 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -160,7 +160,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_SCALED_FLOAT = def(9_020_0_00, Version.LUCENE_10_1_0); public static final IndexVersion USE_LUCENE101_POSTINGS_FORMAT = def(9_021_0_00, Version.LUCENE_10_1_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_2_0 = def(9_022_00_0, Version.LUCENE_10_2_0); - public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1); + public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, parseUnchecked("10.2.1")); public static final IndexVersion UPGRADE_TO_LUCENE_10_3_0 = def(9_050_00_0, Version.LUCENE_10_3_0); /* From a266653397028993bbf90b7ea9856440c4ae3a70 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 23 Apr 2025 06:14:56 +0000 Subject: [PATCH 007/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b1978943b74 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 05f3f90f478dd..a9c2d352755b4 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,127 +2973,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 35c92c5d7a3e8b4925dc6e6ae395adc5f7e794cd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 24 Apr 2025 06:13:13 +0000 Subject: [PATCH 008/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-9920b1e9ddf --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index bd6fda14921fa..41cf300475d33 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-b1978943b74 +lucene = 10.3.0-snapshot-9920b1e9ddf bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a9c2d352755b4..7a924dc5c30e3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2971,129 +2971,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c6a5edf127c667f1a5e717a13e5f397f77be14f5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 25 Apr 2025 06:13:27 +0000 Subject: [PATCH 009/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-bb3167e57c6 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 41cf300475d33..b37775b5b15c8 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-9920b1e9ddf +lucene = 10.3.0-snapshot-bb3167e57c6 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7a924dc5c30e3..f48265dba4172 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2971,129 +2971,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 5cf19609ef8c0f770762ed1e1e01d9d7f4de141f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 26 Apr 2025 06:13:20 +0000 Subject: [PATCH 010/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7b0ca79db30 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index b37775b5b15c8..850e688f9b76d 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-bb3167e57c6 +lucene = 10.3.0-snapshot-7b0ca79db30 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f48265dba4172..f5657f795487b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2971,129 +2971,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 78b707c9a2c5f3b76e3a320db114a9c38e82f57c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 27 Apr 2025 06:15:15 +0000 Subject: [PATCH 011/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7b0ca79db30 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f5657f795487b..43d2f3a4e9e4b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,127 +2973,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 88db358d120807201eee83af9f728d76cad45d96 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 28 Apr 2025 06:13:21 +0000 Subject: [PATCH 012/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7b0ca79db30 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 43d2f3a4e9e4b..098cbb36ab1ff 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,127 +2973,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7aa7dd074771186e0f5d53ae54b99e652cfe58b7 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Thu, 24 Apr 2025 17:16:33 +0100 Subject: [PATCH 013/184] Update off-heap dense vector stats to use Lucene's implementation. Much of the removals here are an effect revert of the reflective code put in place to get the stats internally prior to Lucene 10.3. --- .../server/cli/SystemJvmOptions.java | 7 - .../bootstrap/Elasticsearch.java | 7 +- .../codec/vectors/ES813FlatVectorFormat.java | 9 +- .../vectors/ES813Int8FlatVectorFormat.java | 9 +- .../ES814ScalarQuantizedVectorsFormat.java | 4 +- .../ES816BinaryQuantizedVectorsReader.java | 5 +- .../DirectIOLucene99FlatVectorsReader.java | 18 +- .../ES818BinaryQuantizedVectorsReader.java | 5 +- .../AssertingKnnVectorsReaderReflect.java | 83 ----- .../vectors/reflect/OffHeapByteSizeUtils.java | 92 ------ .../reflect/OffHeapReflectionUtils.java | 302 ------------------ .../codec/vectors/reflect/OffHeapStats.java | 23 -- .../elasticsearch/index/engine/Engine.java | 3 +- .../index/shard/DenseVectorStats.java | 4 +- .../elasticsearch/bootstrap/security.policy | 4 - .../vectors/ES813FlatVectorFormatTests.java | 3 +- .../ES813Int8FlatVectorFormatTests.java | 3 +- ...HnswScalarQuantizedVectorsFormatTests.java | 3 +- .../ES815BitFlatVectorFormatTests.java | 3 +- .../ES815HnswBitVectorsFormatTests.java | 3 +- ...S816BinaryQuantizedVectorsFormatTests.java | 3 +- ...HnswBinaryQuantizedVectorsFormatTests.java | 3 +- ...S818BinaryQuantizedVectorsFormatTests.java | 3 +- ...HnswBinaryQuantizedVectorsFormatTests.java | 3 +- 24 files changed, 34 insertions(+), 568 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java index 5191b60f1f8c9..93ee3850be04f 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java @@ -61,13 +61,6 @@ static List systemJvmOptions(Settings nodeSettings, final Map... classes) { for (final var clazz : classes) { try { - MethodHandles.lookup().ensureInitialized(clazz); + MethodHandles.publicLookup().ensureInitialized(clazz); } catch (IllegalAccessException unexpected) { throw new AssertionError(unexpected); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index 275443b989cfc..325188624a2f4 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -29,8 +29,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.Map; @@ -105,7 +103,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } - static class ES813FlatVectorReader extends KnnVectorsReader implements OffHeapStats { + static class ES813FlatVectorReader extends KnnVectorsReader { private final FlatVectorsReader reader; @@ -160,10 +158,5 @@ public Map getOffHeapByteSize(FieldInfo fieldInfo) { public void close() throws IOException { reader.close(); } - - @Override - public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return OffHeapByteSizeUtils.getOffHeapByteSize(reader, fieldInfo); - } } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java index 88df56b2a89d9..4636a37d14f53 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java @@ -27,8 +27,6 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.Map; @@ -113,7 +111,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE } } - public static class ES813FlatVectorReader extends KnnVectorsReader implements OffHeapStats { + public static class ES813FlatVectorReader extends KnnVectorsReader { private final FlatVectorsReader reader; @@ -168,10 +166,5 @@ public Map getOffHeapByteSize(FieldInfo fieldInfo) { public void close() throws IOException { reader.close(); } - - @Override - public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return OffHeapByteSizeUtils.getOffHeapByteSize(reader, fieldInfo); - } } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 3f2baad122f27..204d9c6e9e630 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -34,8 +34,6 @@ import org.apache.lucene.util.quantization.QuantizedByteVectorValues; import org.apache.lucene.util.quantization.QuantizedVectorsReader; import org.apache.lucene.util.quantization.ScalarQuantizer; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import org.elasticsearch.simdvec.VectorScorerFactory; import org.elasticsearch.simdvec.VectorSimilarityType; @@ -177,7 +175,7 @@ public long ramBytesUsed() { } } - static final class ES814ScalarQuantizedVectorsReader extends FlatVectorsReader implements QuantizedVectorsReader, OffHeapStats { + static final class ES814ScalarQuantizedVectorsReader extends FlatVectorsReader implements QuantizedVectorsReader { final Lucene99ScalarQuantizedVectorsReader delegate; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index 7e492275928a8..5f290a72ba833 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -45,13 +45,10 @@ import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.elasticsearch.index.codec.vectors.BQVectorUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.HashMap; import java.util.Map; -import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; @@ -61,7 +58,7 @@ * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader implements OffHeapStats { +public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES816BinaryQuantizedVectorsReader.class); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java index 5f7dbb31a1ad4..d170d62ceb826 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java @@ -46,6 +46,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.Map; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; @@ -169,6 +170,12 @@ public long ramBytesUsed() { return SHALLOW_SIZE + fields.ramBytesUsed(); } + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + final FieldEntry entry = getFieldEntryOrThrow(fieldInfo.name); + return Map.of(DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION, entry.vectorDataLength()); + } + @Override public void checkIntegrity() throws IOException { CodecUtil.checksumEntireFile(vectorData); @@ -185,12 +192,17 @@ public FlatVectorsReader getMergeInstance() { } } - private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) { + private FieldEntry getFieldEntryOrThrow(String field) { final FieldInfo info = fieldInfos.fieldInfo(field); - final FieldEntry fieldEntry; - if (info == null || (fieldEntry = fields.get(info.number)) == null) { + final FieldEntry entry; + if (info == null || (entry = fields.get(info.number)) == null) { throw new IllegalArgumentException("field=\"" + field + "\" not found"); } + return entry; + } + + private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) { + final FieldEntry fieldEntry = getFieldEntryOrThrow(field); if (fieldEntry.vectorEncoding != expectedEncoding) { throw new IllegalArgumentException( "field=\"" + field + "\" is encoded as: " + fieldEntry.vectorEncoding + " expected: " + expectedEncoding diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index 2ff7664e85e22..8e547f9b3fa20 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -46,13 +46,10 @@ import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapStats; import java.io.IOException; import java.util.HashMap; import java.util.Map; -import java.util.Objects; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; @@ -62,7 +59,7 @@ * Copied from Lucene, replace with Lucene's implementation sometime after Lucene 10 */ @SuppressForbidden(reason = "Lucene classes") -public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader implements OffHeapStats { +public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader { private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ES818BinaryQuantizedVectorsReader.class); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java deleted file mode 100644 index bf47564c11b3a..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/AssertingKnnVectorsReaderReflect.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.vectors.reflect; - -import org.apache.lucene.codecs.KnnVectorsReader; -import org.elasticsearch.core.SuppressForbidden; - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.security.AccessController; -import java.security.PrivilegedAction; - -/** - * Reflective access to unwrap non-accessible delegate in AssertingKnnVectorsReader. - * Remove once KnnVectorsReaders::getOffHeapByteSize is available. - */ -public class AssertingKnnVectorsReaderReflect { - - @SuppressForbidden(reason = "static type is not accessible") - public static KnnVectorsReader unwrapAssertingReader(KnnVectorsReader reader) { - try { - if (ASSERTING_ASSERT_KNN_READER_CLS != null && ASSERTING_ASSERT_KNN_READER_CLS.isAssignableFrom(reader.getClass())) { - return (KnnVectorsReader) GET_VECTOR_INDEX_LENGTH_HANDLE.invoke(reader); - } - } catch (Throwable t) { - handleThrowable(t); - } - return reader; - } - - private static final Class ASSERTING_ASSERT_KNN_READER_CLS = getAssertingReaderOrNull(); - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE = getDelegateFieldHandle(); - - private static Class getAssertingReaderOrNull() { - try { - return Class.forName("org.apache.lucene.tests.codecs.asserting.AssertingKnnVectorsFormat$AssertingKnnVectorsReader"); - } catch (ClassNotFoundException e) { - return null; - } - } - - private static MethodHandle getDelegateFieldHandle() { - try { - var cls = getAssertingReaderOrNull(); - if (cls == null) { - return MethodHandles.throwException(KnnVectorsReader.class, AssertionError.class); - } - var lookup = privilegedPrivateLookupIn(cls, MethodHandles.lookup()); - return lookup.findGetter(cls, "delegate", KnnVectorsReader.class); - } catch (ReflectiveOperationException e) { - throw new AssertionError(e); - } - } - - @SuppressWarnings("removal") - static MethodHandles.Lookup privilegedPrivateLookupIn(Class cls, MethodHandles.Lookup lookup) throws IllegalAccessException { - PrivilegedAction pa = () -> { - try { - return MethodHandles.privateLookupIn(cls, lookup); - } catch (IllegalAccessException e) { - throw new AssertionError("should not happen, check opens", e); - } - }; - return AccessController.doPrivileged(pa); - } - - static void handleThrowable(Throwable t) { - if (t instanceof Error error) { - throw error; - } else if (t instanceof RuntimeException runtimeException) { - throw runtimeException; - } else { - throw new AssertionError(t); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java deleted file mode 100644 index dcd9fc3b12737..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapByteSizeUtils.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.vectors.reflect; - -import org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader; -import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; -import org.apache.lucene.index.FieldInfo; -import org.elasticsearch.index.codec.vectors.es818.DirectIOLucene99FlatVectorsReader; - -import java.util.Map; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * Static utility methods to help retrieve desired off-heap vector index size. - * Remove once KnnVectorsReaders::getOffHeapByteSize is available. - */ -public class OffHeapByteSizeUtils { - - private OffHeapByteSizeUtils() {} // no instances - - public static Map getOffHeapByteSize(KnnVectorsReader reader, FieldInfo fieldInfo) { - reader = AssertingKnnVectorsReaderReflect.unwrapAssertingReader(reader); - switch (reader) { - case OffHeapStats offHeapStats -> { - return offHeapStats.getOffHeapByteSize(fieldInfo); - } - case Lucene99HnswVectorsReader hnswVectorsReader -> { - var graph = OffHeapReflectionUtils.getOffHeapByteSizeL99HNSW(hnswVectorsReader, fieldInfo); - var flat = getOffHeapByteSize(OffHeapReflectionUtils.getFlatVectorsReaderL99HNSW(hnswVectorsReader), fieldInfo); - return mergeOffHeapByteSizeMaps(graph, flat); - } - case Lucene99ScalarQuantizedVectorsReader scalarQuantizedVectorsReader -> { - var quant = OffHeapReflectionUtils.getOffHeapByteSizeSQ(scalarQuantizedVectorsReader, fieldInfo); - var raw = getOffHeapByteSize(OffHeapReflectionUtils.getFlatVectorsReaderSQ(scalarQuantizedVectorsReader), fieldInfo); - return mergeOffHeapByteSizeMaps(quant, raw); - } - case Lucene99FlatVectorsReader flatVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeF99FLT(flatVectorsReader, fieldInfo); - } - case DirectIOLucene99FlatVectorsReader flatVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeF99FLT(flatVectorsReader, fieldInfo); - } - case Lucene95HnswVectorsReader lucene95HnswVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeL95HNSW(lucene95HnswVectorsReader, fieldInfo); - } - case Lucene94HnswVectorsReader lucene94HnswVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeL94HNSW(lucene94HnswVectorsReader, fieldInfo); - } - case Lucene92HnswVectorsReader lucene92HnswVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeL92HNSW(lucene92HnswVectorsReader, fieldInfo); - } - case Lucene91HnswVectorsReader lucene91HnswVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeL91HNSW(lucene91HnswVectorsReader, fieldInfo); - } - case Lucene90HnswVectorsReader lucene90HnswVectorsReader -> { - return OffHeapReflectionUtils.getOffHeapByteSizeL90HNSW(lucene90HnswVectorsReader, fieldInfo); - } - case null, default -> { - assert false : "unexpected reader:" + reader; - } - } - return Map.of(); - } - - /** - * Merges the Maps returned by getOffHeapByteSize(FieldInfo). - * - *

This method is a convenience for aggregating the desired off-heap memory requirements for - * several fields. The keys in the returned map are a union of the keys in the given maps. Entries - * with the same key are summed. - */ - public static Map mergeOffHeapByteSizeMaps(Map map1, Map map2) { - return Stream.of(map1, map2) - .flatMap(map -> map.entrySet().stream()) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, Long::sum)); - } -} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java deleted file mode 100644 index 599a205508385..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapReflectionUtils.java +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.vectors.reflect; - -import org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader; -import org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader; -import org.apache.lucene.codecs.hnsw.FlatVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; -import org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.VectorEncoding; -import org.elasticsearch.core.SuppressForbidden; -import org.elasticsearch.index.codec.vectors.es818.DirectIOLucene99FlatVectorsReader; - -import java.lang.invoke.MethodHandle; -import java.lang.invoke.MethodHandles; -import java.lang.invoke.VarHandle; -import java.security.AccessController; -import java.security.PrivilegedAction; -import java.util.Map; - -import static java.lang.invoke.MethodType.methodType; - -/** - * Reflective access to non-accessible members of Lucene's KnnVectorsReader implementations. - * Remove once KnnVectorsReaders::getOffHeapByteSize is available. - */ -public class OffHeapReflectionUtils { - - private OffHeapReflectionUtils() {} - - static final String FLAT_VECTOR_DATA_EXTENSION = "vec"; - static final String SQ_VECTOR_INDEX_EXTENSION = "veq"; - static final String HNSW_VECTOR_INDEX_EXTENSION = "vex"; - - private static final MethodHandle GET_FIELD_ENTRY_HNDL_SQ; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_SQ; - private static final VarHandle RAW_VECTORS_READER_HNDL_SQ; - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L99FLT; - private static final MethodHandle VECTOR_DATA_LENGTH_HANDLE_L99FLT; - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_DIOL99FLT; - private static final MethodHandle VECTOR_DATA_LENGTH_HANDLE_DIOL99FLT; - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L99HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW; - private static final VarHandle FLAT_VECTORS_READER_HNDL_L99HNSW; - - static final Class L99_SQ_VR_CLS = Lucene99ScalarQuantizedVectorsReader.class; - static final Class L99_FLT_VR_CLS = Lucene99FlatVectorsReader.class; - static final Class DIOL99_FLT_VR_CLS = DirectIOLucene99FlatVectorsReader.class; - static final Class L99_HNSW_VR_CLS = Lucene99HnswVectorsReader.class; - - // old codecs - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L90HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW; - - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L91HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW; - - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L92HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW; - - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L94HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW; - - private static final MethodHandle GET_FIELD_ENTRY_HANDLE_L95HNSW; - private static final MethodHandle GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW; - private static final MethodHandle GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW; - - static final Class L90_HNSW_VR_CLS = Lucene90HnswVectorsReader.class; - static final Class L91_HNSW_VR_CLS = Lucene91HnswVectorsReader.class; - static final Class L92_HNSW_VR_CLS = Lucene92HnswVectorsReader.class; - static final Class L94_HNSW_VR_CLS = Lucene94HnswVectorsReader.class; - static final Class L95_HNSW_VR_CLS = Lucene95HnswVectorsReader.class; - - static { - try { - // Lucene99ScalarQuantizedVectorsReader - var cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99ScalarQuantizedVectorsReader$FieldEntry"); - var lookup = privilegedPrivateLookupIn(L99_SQ_VR_CLS, MethodHandles.lookup()); - var mt = methodType(cls, String.class); - GET_FIELD_ENTRY_HNDL_SQ = lookup.findVirtual(L99_SQ_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_DATA_LENGTH_HANDLE_SQ = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - RAW_VECTORS_READER_HNDL_SQ = lookup.findVarHandle(L99_SQ_VR_CLS, "rawVectorsReader", FlatVectorsReader.class); - // Lucene99FlatVectorsReader - cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L99_FLT_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class, VectorEncoding.class); - GET_FIELD_ENTRY_HANDLE_L99FLT = lookup.findVirtual(L99_FLT_VR_CLS, "getFieldEntry", mt); - VECTOR_DATA_LENGTH_HANDLE_L99FLT = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // DirectIOLucene99FlatVectorsReader - cls = Class.forName("org.elasticsearch.index.codec.vectors.es818.DirectIOLucene99FlatVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(DIOL99_FLT_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class, VectorEncoding.class); - GET_FIELD_ENTRY_HANDLE_DIOL99FLT = lookup.findVirtual(DIOL99_FLT_VR_CLS, "getFieldEntry", mt); - VECTOR_DATA_LENGTH_HANDLE_DIOL99FLT = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // Lucene99HnswVectorsReader - cls = Class.forName("org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L99_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class, VectorEncoding.class); - GET_FIELD_ENTRY_HANDLE_L99HNSW = lookup.findVirtual(L99_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); - lookup = privilegedPrivateLookupIn(L99_HNSW_VR_CLS, MethodHandles.lookup()); - FLAT_VECTORS_READER_HNDL_L99HNSW = lookup.findVarHandle(L99_HNSW_VR_CLS, "flatVectorsReader", FlatVectorsReader.class); - // Lucene90HnswVectorsReader - cls = Class.forName("org.apache.lucene.backward_codecs.lucene90.Lucene90HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L90_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class); - GET_FIELD_ENTRY_HANDLE_L90HNSW = lookup.findVirtual(L90_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW = lookup.findVirtual(cls, "indexDataLength", methodType(long.class)); - GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // Lucene91HnswVectorsReader - cls = Class.forName("org.apache.lucene.backward_codecs.lucene91.Lucene91HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L91_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class); - GET_FIELD_ENTRY_HANDLE_L91HNSW = lookup.findVirtual(L91_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); - GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // Lucene92HnswVectorsReader - cls = Class.forName("org.apache.lucene.backward_codecs.lucene92.Lucene92HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L92_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class); - GET_FIELD_ENTRY_HANDLE_L92HNSW = lookup.findVirtual(L92_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); - GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // Lucene94HnswVectorsReader - cls = Class.forName("org.apache.lucene.backward_codecs.lucene94.Lucene94HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L94_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class, VectorEncoding.class); - GET_FIELD_ENTRY_HANDLE_L94HNSW = lookup.findVirtual(L94_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); - GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - // Lucene95HnswVectorsReader - cls = Class.forName("org.apache.lucene.backward_codecs.lucene95.Lucene95HnswVectorsReader$FieldEntry"); - lookup = privilegedPrivateLookupIn(L95_HNSW_VR_CLS, MethodHandles.lookup()); - mt = methodType(cls, String.class, VectorEncoding.class); - GET_FIELD_ENTRY_HANDLE_L95HNSW = lookup.findVirtual(L95_HNSW_VR_CLS, "getFieldEntry", mt); - GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW = lookup.findVirtual(cls, "vectorIndexLength", methodType(long.class)); - GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW = lookup.findVirtual(cls, "vectorDataLength", methodType(long.class)); - } catch (ReflectiveOperationException e) { - throw new AssertionError(e); - } - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeSQ(Lucene99ScalarQuantizedVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HNDL_SQ.invoke(reader, fieldInfo.name); - long len = (long) GET_VECTOR_DATA_LENGTH_HANDLE_SQ.invoke(entry); - return Map.of(SQ_VECTOR_INDEX_EXTENSION, len); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - static FlatVectorsReader getFlatVectorsReaderSQ(Lucene99ScalarQuantizedVectorsReader reader) { - return (FlatVectorsReader) RAW_VECTORS_READER_HNDL_SQ.get(reader); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeF99FLT(Lucene99FlatVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L99FLT.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); - long len = (long) VECTOR_DATA_LENGTH_HANDLE_L99FLT.invoke(entry); - return Map.of(FLAT_VECTOR_DATA_EXTENSION, len); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeF99FLT(DirectIOLucene99FlatVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_DIOL99FLT.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); - long len = (long) VECTOR_DATA_LENGTH_HANDLE_DIOL99FLT.invoke(entry); - return Map.of(FLAT_VECTOR_DATA_EXTENSION, len); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL99HNSW(Lucene99HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L99HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); - long len = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L99HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, len); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - static FlatVectorsReader getFlatVectorsReaderL99HNSW(Lucene99HnswVectorsReader reader) { - return (FlatVectorsReader) FLAT_VECTORS_READER_HNDL_L99HNSW.get(reader); - } - - // old codecs - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL90HNSW(Lucene90HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L90HNSW.invoke(reader, fieldInfo.name); - long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L90HNSW.invoke(entry); - long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L90HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL91HNSW(Lucene91HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L91HNSW.invoke(reader, fieldInfo.name); - long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L91HNSW.invoke(entry); - long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L91HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL92HNSW(Lucene92HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L92HNSW.invoke(reader, fieldInfo.name); - long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L92HNSW.invoke(entry); - long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L92HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL94HNSW(Lucene94HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L94HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); - long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L94HNSW.invoke(entry); - long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L94HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressForbidden(reason = "static type is not accessible") - static Map getOffHeapByteSizeL95HNSW(Lucene95HnswVectorsReader reader, FieldInfo fieldInfo) { - try { - var entry = GET_FIELD_ENTRY_HANDLE_L95HNSW.invoke(reader, fieldInfo.name, fieldInfo.getVectorEncoding()); - long graph = (long) GET_VECTOR_INDEX_LENGTH_HANDLE_L95HNSW.invoke(entry); - long raw = (long) GET_VECTOR_DATA_LENGTH_HANDLE_L95HNSW.invoke(entry); - return Map.of(HNSW_VECTOR_INDEX_EXTENSION, graph, FLAT_VECTOR_DATA_EXTENSION, raw); - } catch (Throwable t) { - handleThrowable(t); - } - throw new AssertionError("should not reach here"); - } - - @SuppressWarnings("removal") - private static MethodHandles.Lookup privilegedPrivateLookupIn(Class cls, MethodHandles.Lookup lookup) { - PrivilegedAction pa = () -> { - try { - return MethodHandles.privateLookupIn(cls, lookup); - } catch (IllegalAccessException e) { - throw new AssertionError("should not happen, check opens", e); - } - }; - return AccessController.doPrivileged(pa); - } - - private static void handleThrowable(Throwable t) { - if (t instanceof Error error) { - throw error; - } else if (t instanceof RuntimeException runtimeException) { - throw runtimeException; - } else { - throw new AssertionError(t); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java deleted file mode 100644 index 79eb118f389cc..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/reflect/OffHeapStats.java +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.vectors.reflect; - -import org.apache.lucene.index.FieldInfo; - -import java.util.Map; - -/** - * Common interface to unify offHeapByteSize in ES' KnnVectorsReader implementations. - * Remove once KnnVectorsReaders::getOffHeapByteSize is available. - */ -public interface OffHeapStats { - - Map getOffHeapByteSize(FieldInfo fieldInfo); -} diff --git a/server/src/main/java/org/elasticsearch/index/engine/Engine.java b/server/src/main/java/org/elasticsearch/index/engine/Engine.java index 6b3a4060cefad..91a39a4cc7bf8 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -62,7 +62,6 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.codec.FieldInfosWithUsages; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.elasticsearch.index.mapper.DocumentParser; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.LuceneDocument; @@ -334,7 +333,7 @@ private DenseVectorStats getDenseVectorStats(final LeafReader atomicReader, List if (vectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { vectorsReader = fieldsReader.getFieldReader(info.name); } - Map offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(vectorsReader, info); + Map offHeap = vectorsReader.getOffHeapByteSize(info); offHeapStats.put(info.name, offHeap); } } diff --git a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java index 6e74647899080..e60f325e2f5b1 100644 --- a/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java +++ b/server/src/main/java/org/elasticsearch/index/shard/DenseVectorStats.java @@ -9,11 +9,11 @@ package org.elasticsearch.index.shard; +import org.apache.lucene.codecs.KnnVectorsReader; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.elasticsearch.xcontent.ToXContentFragment; import org.elasticsearch.xcontent.XContentBuilder; @@ -92,7 +92,7 @@ public void add(DenseVectorStats other) { } else { this.offHeapStats = Stream.of(this.offHeapStats, other.offHeapStats) .flatMap(map -> map.entrySet().stream()) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, OffHeapByteSizeUtils::mergeOffHeapByteSizeMaps)); + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, KnnVectorsReader::mergeOffHeapByteSizeMaps)); } } } diff --git a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy index 4f3bc1f92060b..55abdc84fc8fb 100644 --- a/server/src/main/resources/org/elasticsearch/bootstrap/security.policy +++ b/server/src/main/resources/org/elasticsearch/bootstrap/security.policy @@ -32,10 +32,6 @@ grant codeBase "${codebase.elasticsearch}" { // for plugin api dynamic settings instances permission java.lang.RuntimePermission "accessClassInPackage.jdk.internal.reflect"; - - // For vector off-heap statistics, remove in Lucene 10.3 - permission java.lang.RuntimePermission "accessDeclaredMembers"; - permission java.lang.reflect.ReflectPermission "suppressAccessChecks"; }; //// Very special jar permissions: diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java index 80b2d5232ca65..9ea13750d8cd7 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java @@ -24,7 +24,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; @@ -66,7 +65,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertEquals(1, offHeap.size()); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java index 99c60c7bcc7f3..6a29bf08ae9a5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java @@ -24,7 +24,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; @@ -66,7 +65,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(2, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertTrue(offHeap.get("veq") > 0L); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java index be715c8ab9175..b064321a70ae8 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java @@ -30,7 +30,6 @@ import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.nio.file.Path; @@ -199,7 +198,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(3, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertEquals(1L, (long) offHeap.get("vex")); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java index b04e74adc53ae..c770a62479d38 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.junit.Before; import java.io.IOException; @@ -60,7 +59,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(1, offHeap.size()); assertTrue(offHeap.get("vec") > 0L); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java index 2e30fa0d16fa2..544dccc73c070 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import org.junit.Before; import java.io.IOException; @@ -60,7 +59,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(2, offHeap.size()); assertTrue(offHeap.get("vec") > 0L); assertEquals(1L, (long) offHeap.get("vex")); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java index a18d86128e90f..6aec7b595ce8b 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java @@ -45,7 +45,6 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Locale; @@ -200,7 +199,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(2, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertTrue(offHeap.get("veb") > 0L); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index 5658935469302..a80240e228efc 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -41,7 +41,6 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Arrays; @@ -147,7 +146,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(3, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertEquals(1L, (long) offHeap.get("vex")); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 4e431cbe71eab..26bd905270f83 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -46,7 +46,6 @@ import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Locale; @@ -200,7 +199,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(2, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertTrue(offHeap.get("veb") > 0L); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java index 63af621fafd31..cf7b710ef3093 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java @@ -41,7 +41,6 @@ import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.vectors.reflect.OffHeapByteSizeUtils; import java.io.IOException; import java.util.Arrays; @@ -151,7 +150,7 @@ public void testSimpleOffHeapSize() throws IOException { knnVectorsReader = fieldsReader.getFieldReader("f"); } var fieldInfo = r.getFieldInfos().fieldInfo("f"); - var offHeap = OffHeapByteSizeUtils.getOffHeapByteSize(knnVectorsReader, fieldInfo); + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); assertEquals(3, offHeap.size()); assertEquals(vector.length * Float.BYTES, (long) offHeap.get("vec")); assertEquals(1L, (long) offHeap.get("vex")); From b9d9f59e55dd1b973621c281fa50b99216789592 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Mon, 28 Apr 2025 18:47:46 +0100 Subject: [PATCH 014/184] Fix QueryPhaseTest that use trackTotalHits (#127470) This commit fixes a couple of test scenarios in QueryPhaseTest that incorrectly assert values around trackTotalHits. Test updated as per apache/lucene#14561 This test fix is needed when running with Lucene >= 10.2.1 --- .../elasticsearch/search/query/QueryPhaseTests.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index 1f74668158e0e..0d7f16211aa51 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -55,6 +55,7 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TotalHits; +import org.apache.lucene.search.TotalHits.Relation; import org.apache.lucene.search.Weight; import org.apache.lucene.search.join.BitSetProducer; import org.apache.lucene.search.join.ScoreMode; @@ -567,12 +568,13 @@ public void testTerminateAfterWithHitsNoHitCountShortcut() throws Exception { // size is lower than terminate_after context.setSize(5); // track_total_hits is lower than terminate_after - context.trackTotalHitsUpTo(randomIntBetween(1, 6)); + int trackTotalHits = randomIntBetween(1, 6); + context.trackTotalHitsUpTo(trackTotalHits); QueryPhase.executeQuery(context); // depending on docs distribution we may or may not be able to honor terminate_after: low scoring hits are skipped via // setMinCompetitiveScore, which bypasses terminate_after until the next leaf collector is pulled, when that happens. assertThat(context.queryResult().terminatedEarly(), either(is(true)).or(is(false))); - assertThat(context.queryResult().topDocs().topDocs.totalHits.value(), equalTo(7L)); + assertThat(context.queryResult().topDocs().topDocs.totalHits.value(), greaterThanOrEqualTo((long) trackTotalHits)); assertThat(context.queryResult().topDocs().topDocs.totalHits.relation(), equalTo(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO)); assertThat(context.queryResult().topDocs().topDocs.scoreDocs.length, equalTo(5)); } @@ -990,7 +992,10 @@ public void testMinScore() throws Exception { context.trackTotalHitsUpTo(5); QueryPhase.addCollectorsAndSearch(context); - assertEquals(10, context.queryResult().topDocs().topDocs.totalHits.value()); + TotalHits totalHits = context.queryResult().topDocs().topDocs.totalHits; + assertThat(totalHits.value(), greaterThanOrEqualTo(5L)); + var expectedRelation = totalHits.value() == 10 ? Relation.EQUAL_TO : Relation.GREATER_THAN_OR_EQUAL_TO; + assertThat(totalHits.relation(), is(expectedRelation)); } } From e211354827517d73caa42112eac0447733331ec0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 29 Apr 2025 06:13:33 +0000 Subject: [PATCH 015/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-4099fae40f5 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index ba8f83f81315c..1c2bf72fe81e4 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-7b0ca79db30 +lucene = 10.3.0-snapshot-4099fae40f5 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 5e3f725d2ad1e..fa39f08cb8d1d 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 124bf4b1293d8be2cabe713e0bb5c1536d9dde50 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 30 Apr 2025 06:12:34 +0000 Subject: [PATCH 016/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-85960129132 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 1c2bf72fe81e4..c5a3ed95e688b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-4099fae40f5 +lucene = 10.3.0-snapshot-85960129132 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index fa39f08cb8d1d..ecd6b02b74411 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 4a098061751e89e67c2487df07d77a9ef97bf1e8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 1 May 2025 06:14:02 +0000 Subject: [PATCH 017/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-96553b7aff7 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c5a3ed95e688b..2e36aadbf4c5e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-85960129132 +lucene = 10.3.0-snapshot-96553b7aff7 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ecd6b02b74411..32a555d36ac34 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 7429c2c40828bab67e5d67505532b91a8a68fa03 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 2 May 2025 06:12:50 +0000 Subject: [PATCH 018/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-9cf497e1667 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 2e36aadbf4c5e..c6c3fdbb68ebd 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-96553b7aff7 +lucene = 10.3.0-snapshot-9cf497e1667 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 32a555d36ac34..26de34e979fa9 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 7bbecb6cba2046e021a5f620fbef7e4b59eb7ce7 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 2 May 2025 08:44:04 +0100 Subject: [PATCH 019/184] Use a hint to specify DirectIO to HybridDirectory (#127601) Remove the forked DirectIO lucene format --- .../elasticsearch/index/IndexVersions.java | 2 +- ...exInputSupplier.java => DirectIOHint.java} | 11 +- .../DirectIOLucene99FlatVectorsFormat.java | 68 +++- .../DirectIOLucene99FlatVectorsReader.java | 358 ------------------ .../ES818BinaryQuantizedVectorsFormat.java | 9 +- .../vectors/es818/MergeReaderWrapper.java | 21 +- .../index/store/FsDirectoryFactory.java | 22 +- .../input/CachedBlobContainerIndexInput.java | 41 +- 8 files changed, 127 insertions(+), 405 deletions(-) rename server/src/main/java/org/elasticsearch/index/codec/vectors/es818/{DirectIOIndexInputSupplier.java => DirectIOHint.java} (59%) delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 5843c562dd5ff..b496e6d861ca6 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -163,7 +163,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion UPGRADE_TO_LUCENE_10_2_1 = def(9_023_00_0, Version.LUCENE_10_2_1); public static final IndexVersion DEFAULT_OVERSAMPLE_VALUE_FOR_BBQ = def(9_024_0_00, Version.LUCENE_10_2_1); public static final IndexVersion SEMANTIC_TEXT_DEFAULTS_TO_BBQ = def(9_025_0_00, Version.LUCENE_10_2_1); - + public static final IndexVersion UPGRADE_TO_LUCENE_10_3_0 = def(9_050_00_0, Version.LUCENE_10_3_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOIndexInputSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOHint.java similarity index 59% rename from server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOIndexInputSupplier.java rename to server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOHint.java index 0640a5dacce65..73b7182911114 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOIndexInputSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOHint.java @@ -10,14 +10,7 @@ package org.elasticsearch.index.codec.vectors.es818; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import java.io.IOException; - -/** - * A hook for {@link DirectIOLucene99FlatVectorsReader} to specify the input should be opened using DirectIO. - * Remove when IOContext allows more extensible payloads to be specified. - */ -public interface DirectIOIndexInputSupplier { - IndexInput openInputDirect(String name, IOContext context) throws IOException; +public enum DirectIOHint implements IOContext.FileOpenHint { + INSTANCE } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java index 02de4d5450d35..a0cd6dbf65688 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java @@ -27,8 +27,14 @@ import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsWriter; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.store.FlushInfo; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.store.ReadAdvice; import java.io.IOException; +import java.util.Optional; +import java.util.Set; /** * Copied from Lucene99FlatVectorsFormat in Lucene 10.1 @@ -61,22 +67,62 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio return new Lucene99FlatVectorsWriter(state, vectorsScorer); } + private static final IOContext DIRECT_IO_CONTEXT = new IOContext() { + @Override + public Context context() { + return Context.DEFAULT; + } + + @Override + public MergeInfo mergeInfo() { + return null; + } + + @Override + public FlushInfo flushInfo() { + return null; + } + + @Override + public Set hints() { + return Set.of(DirectIOHint.INSTANCE); + } + + @Override + public IOContext withHints(FileOpenHint... hints) { + return this; + } + + @Override + public Optional readAdvice() { + return Optional.empty(); + } + + @Override + public IOContext withReadAdvice(ReadAdvice advice) { + return this; + } + }; + @Override public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { - if (DirectIOLucene99FlatVectorsReader.shouldUseDirectIO(state)) { - // Use mmap for merges and direct I/O for searches. - // TODO: Open the mmap file with sequential access instead of random (current behavior). - return new MergeReaderWrapper( - new DirectIOLucene99FlatVectorsReader(state, vectorsScorer), - new Lucene99FlatVectorsReader(state, vectorsScorer) - ); - } else { - return new Lucene99FlatVectorsReader(state, vectorsScorer); - } + SegmentReadState directIOState = new SegmentReadState( + state.directory, + state.segmentInfo, + state.fieldInfos, + DIRECT_IO_CONTEXT, + state.segmentSuffix + ); + // Use mmap for merges and direct I/O for searches. + // TODO: Open the mmap file with sequential access instead of random (current behavior). + return new MergeReaderWrapper( + new Lucene99FlatVectorsReader(directIOState, vectorsScorer), + new Lucene99FlatVectorsReader(state, vectorsScorer) + ); } @Override public String toString() { - return "ES818FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')'; + return "Lucene99FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')'; } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java deleted file mode 100644 index 0d087cb5d73c9..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsReader.java +++ /dev/null @@ -1,358 +0,0 @@ -/* - * @notice - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * Modifications copyright (C) 2025 Elasticsearch B.V. - */ -package org.elasticsearch.index.codec.vectors.es818; - -import org.apache.lucene.codecs.CodecUtil; -import org.apache.lucene.codecs.hnsw.FlatVectorsReader; -import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; -import org.apache.lucene.codecs.lucene95.OffHeapByteVectorValues; -import org.apache.lucene.codecs.lucene95.OffHeapFloatVectorValues; -import org.apache.lucene.codecs.lucene95.OrdToDocDISIReaderConfiguration; -import org.apache.lucene.index.ByteVectorValues; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.FloatVectorValues; -import org.apache.lucene.index.IndexFileNames; -import org.apache.lucene.index.SegmentReadState; -import org.apache.lucene.index.VectorEncoding; -import org.apache.lucene.index.VectorSimilarityFunction; -import org.apache.lucene.internal.hppc.IntObjectHashMap; -import org.apache.lucene.store.ChecksumIndexInput; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.ReadAdvice; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.RamUsageEstimator; -import org.apache.lucene.util.SuppressForbidden; -import org.apache.lucene.util.hnsw.RandomVectorScorer; - -import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Map; - -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readSimilarityFunction; -import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding; - -/** Copied from Lucene99FlatVectorsReader in Lucene 10.2, then modified to support DirectIOIndexInputSupplier */ -@SuppressForbidden(reason = "Copied from lucene") -public class DirectIOLucene99FlatVectorsReader extends FlatVectorsReader { - - private static final boolean USE_DIRECT_IO = Boolean.parseBoolean(System.getProperty("vector.rescoring.directio", "true")); - - private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(DirectIOLucene99FlatVectorsReader.class); - - private final IntObjectHashMap fields = new IntObjectHashMap<>(); - private final IndexInput vectorData; - private final FieldInfos fieldInfos; - - public DirectIOLucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer) throws IOException { - super(scorer); - int versionMeta = readMetadata(state); - this.fieldInfos = state.fieldInfos; - boolean success = false; - try { - vectorData = openDataInput( - state, - versionMeta, - DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION, - DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME, - // Flat formats are used to randomly access vectors from their node ID that is stored - // in the HNSW graph. - state.context.withReadAdvice(ReadAdvice.RANDOM) - ); - success = true; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(this); - } - } - } - - public static boolean shouldUseDirectIO(SegmentReadState state) { - return USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier; - } - - private int readMetadata(SegmentReadState state) throws IOException { - String metaFileName = IndexFileNames.segmentFileName( - state.segmentInfo.name, - state.segmentSuffix, - DirectIOLucene99FlatVectorsFormat.META_EXTENSION - ); - int versionMeta = -1; - try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) { - Throwable priorE = null; - try { - versionMeta = CodecUtil.checkIndexHeader( - meta, - DirectIOLucene99FlatVectorsFormat.META_CODEC_NAME, - DirectIOLucene99FlatVectorsFormat.VERSION_START, - DirectIOLucene99FlatVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - readFields(meta, state.fieldInfos); - } catch (Throwable exception) { - priorE = exception; - } finally { - CodecUtil.checkFooter(meta, priorE); - } - } - return versionMeta; - } - - private static IndexInput openDataInput( - SegmentReadState state, - int versionMeta, - String fileExtension, - String codecName, - IOContext context - ) throws IOException { - String fileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, fileExtension); - // use direct IO for accessing raw vector data for searches - IndexInput in = USE_DIRECT_IO && state.directory instanceof DirectIOIndexInputSupplier did - ? did.openInputDirect(fileName, context) - : state.directory.openInput(fileName, context); - boolean success = false; - try { - int versionVectorData = CodecUtil.checkIndexHeader( - in, - codecName, - DirectIOLucene99FlatVectorsFormat.VERSION_START, - DirectIOLucene99FlatVectorsFormat.VERSION_CURRENT, - state.segmentInfo.getId(), - state.segmentSuffix - ); - if (versionMeta != versionVectorData) { - throw new CorruptIndexException( - "Format versions mismatch: meta=" + versionMeta + ", " + codecName + "=" + versionVectorData, - in - ); - } - CodecUtil.retrieveChecksum(in); - success = true; - return in; - } finally { - if (success == false) { - IOUtils.closeWhileHandlingException(in); - } - } - } - - private void readFields(ChecksumIndexInput meta, FieldInfos infos) throws IOException { - for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) { - FieldInfo info = infos.fieldInfo(fieldNumber); - if (info == null) { - throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta); - } - FieldEntry fieldEntry = FieldEntry.create(meta, info); - fields.put(info.number, fieldEntry); - } - } - - @Override - public long ramBytesUsed() { - return SHALLOW_SIZE + fields.ramBytesUsed(); - } - - @Override - public Map getOffHeapByteSize(FieldInfo fieldInfo) { - final FieldEntry entry = getFieldEntryOrThrow(fieldInfo.name); - return Map.of(DirectIOLucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION, entry.vectorDataLength()); - } - - @Override - public void checkIntegrity() throws IOException { - CodecUtil.checksumEntireFile(vectorData); - } - - @Override - public FlatVectorsReader getMergeInstance() { - try { - // Update the read advice since vectors are guaranteed to be accessed sequentially for merge - this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL); - return this; - } catch (IOException exception) { - throw new UncheckedIOException(exception); - } - } - - private FieldEntry getFieldEntryOrThrow(String field) { - final FieldInfo info = fieldInfos.fieldInfo(field); - final FieldEntry entry; - if (info == null || (entry = fields.get(info.number)) == null) { - throw new IllegalArgumentException("field=\"" + field + "\" not found"); - } - return entry; - } - - private FieldEntry getFieldEntry(String field, VectorEncoding expectedEncoding) { - final FieldEntry fieldEntry = getFieldEntryOrThrow(field); - if (fieldEntry.vectorEncoding != expectedEncoding) { - throw new IllegalArgumentException( - "field=\"" + field + "\" is encoded as: " + fieldEntry.vectorEncoding + " expected: " + expectedEncoding - ); - } - return fieldEntry; - } - - @Override - public FloatVectorValues getFloatVectorValues(String field) throws IOException { - final FieldEntry fieldEntry = getFieldEntry(field, VectorEncoding.FLOAT32); - return OffHeapFloatVectorValues.load( - fieldEntry.similarityFunction, - vectorScorer, - fieldEntry.ordToDoc, - fieldEntry.vectorEncoding, - fieldEntry.dimension, - fieldEntry.vectorDataOffset, - fieldEntry.vectorDataLength, - vectorData - ); - } - - @Override - public ByteVectorValues getByteVectorValues(String field) throws IOException { - final FieldEntry fieldEntry = getFieldEntry(field, VectorEncoding.BYTE); - return OffHeapByteVectorValues.load( - fieldEntry.similarityFunction, - vectorScorer, - fieldEntry.ordToDoc, - fieldEntry.vectorEncoding, - fieldEntry.dimension, - fieldEntry.vectorDataOffset, - fieldEntry.vectorDataLength, - vectorData - ); - } - - @Override - public RandomVectorScorer getRandomVectorScorer(String field, float[] target) throws IOException { - final FieldEntry fieldEntry = getFieldEntry(field, VectorEncoding.FLOAT32); - return vectorScorer.getRandomVectorScorer( - fieldEntry.similarityFunction, - OffHeapFloatVectorValues.load( - fieldEntry.similarityFunction, - vectorScorer, - fieldEntry.ordToDoc, - fieldEntry.vectorEncoding, - fieldEntry.dimension, - fieldEntry.vectorDataOffset, - fieldEntry.vectorDataLength, - vectorData - ), - target - ); - } - - @Override - public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) throws IOException { - final FieldEntry fieldEntry = getFieldEntry(field, VectorEncoding.BYTE); - return vectorScorer.getRandomVectorScorer( - fieldEntry.similarityFunction, - OffHeapByteVectorValues.load( - fieldEntry.similarityFunction, - vectorScorer, - fieldEntry.ordToDoc, - fieldEntry.vectorEncoding, - fieldEntry.dimension, - fieldEntry.vectorDataOffset, - fieldEntry.vectorDataLength, - vectorData - ), - target - ); - } - - @Override - public void finishMerge() throws IOException { - // This makes sure that the access pattern hint is reverted back since HNSW implementation - // needs it - this.vectorData.updateReadAdvice(ReadAdvice.RANDOM); - } - - @Override - public void close() throws IOException { - IOUtils.close(vectorData); - } - - private record FieldEntry( - VectorSimilarityFunction similarityFunction, - VectorEncoding vectorEncoding, - long vectorDataOffset, - long vectorDataLength, - int dimension, - int size, - OrdToDocDISIReaderConfiguration ordToDoc, - FieldInfo info - ) { - - FieldEntry { - if (similarityFunction != info.getVectorSimilarityFunction()) { - throw new IllegalStateException( - "Inconsistent vector similarity function for field=\"" - + info.name - + "\"; " - + similarityFunction - + " != " - + info.getVectorSimilarityFunction() - ); - } - int infoVectorDimension = info.getVectorDimension(); - if (infoVectorDimension != dimension) { - throw new IllegalStateException( - "Inconsistent vector dimension for field=\"" + info.name + "\"; " + infoVectorDimension + " != " + dimension - ); - } - - int byteSize = switch (info.getVectorEncoding()) { - case BYTE -> Byte.BYTES; - case FLOAT32 -> Float.BYTES; - }; - long vectorBytes = Math.multiplyExact((long) infoVectorDimension, byteSize); - long numBytes = Math.multiplyExact(vectorBytes, size); - if (numBytes != vectorDataLength) { - throw new IllegalStateException( - "Vector data length " - + vectorDataLength - + " not matching size=" - + size - + " * dim=" - + dimension - + " * byteSize=" - + byteSize - + " = " - + numBytes - ); - } - } - - static FieldEntry create(IndexInput input, FieldInfo info) throws IOException { - final VectorEncoding vectorEncoding = readVectorEncoding(input); - final VectorSimilarityFunction similarityFunction = readSimilarityFunction(input); - final var vectorDataOffset = input.readVLong(); - final var vectorDataLength = input.readVLong(); - final var dimension = input.readVInt(); - final var size = input.readInt(); - final var ordToDoc = OrdToDocDISIReaderConfiguration.fromStoredMeta(input, size); - return new FieldEntry(similarityFunction, vectorEncoding, vectorDataOffset, vectorDataLength, dimension, size, ordToDoc, info); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java index c3e12a765eb5f..3ab49ab590346 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormat.java @@ -23,6 +23,7 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsFormat; import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.codecs.hnsw.FlatVectorsWriter; +import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; @@ -86,6 +87,8 @@ */ public class ES818BinaryQuantizedVectorsFormat extends FlatVectorsFormat { + private static final boolean USE_DIRECT_IO = Boolean.parseBoolean(System.getProperty("vector.rescoring.directio", "true")); + public static final String BINARIZED_VECTOR_COMPONENT = "BVEC"; public static final String NAME = "ES818BinaryQuantizedVectorsFormat"; @@ -97,9 +100,9 @@ public class ES818BinaryQuantizedVectorsFormat extends FlatVectorsFormat { static final String VECTOR_DATA_EXTENSION = "veb"; static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16; - private static final DirectIOLucene99FlatVectorsFormat rawVectorFormat = new DirectIOLucene99FlatVectorsFormat( - FlatVectorScorerUtil.getLucene99FlatVectorsScorer() - ); + private static final FlatVectorsFormat rawVectorFormat = USE_DIRECT_IO + ? new DirectIOLucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()) + : new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()); private static final ES818BinaryFlatVectorsScorer scorer = new ES818BinaryFlatVectorsScorer( FlatVectorScorerUtil.getLucene99FlatVectorsScorer() diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java index eaafbe7bfc947..eef9f5cc28b07 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java @@ -11,6 +11,7 @@ import org.apache.lucene.codecs.hnsw.FlatVectorsReader; import org.apache.lucene.index.ByteVectorValues; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Accountable; @@ -20,6 +21,7 @@ import java.io.IOException; import java.util.Collection; +import java.util.Map; class MergeReaderWrapper extends FlatVectorsReader { @@ -57,6 +59,16 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { return mainReader.getByteVectorValues(field); } + @Override + public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + mainReader.search(field, target, knnCollector, acceptDocs); + } + + @Override + public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + mainReader.search(field, target, knnCollector, acceptDocs); + } + @Override public FlatVectorsReader getMergeInstance() { return mergeReader; @@ -73,13 +85,8 @@ public Collection getChildResources() { } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - mainReader.search(field, target, knnCollector, acceptDocs); - } - - @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { - mainReader.search(field, target, knnCollector, acceptDocs); + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + return mainReader.getOffHeapByteSize(fieldInfo); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index 76dfee2f7f2de..bc1d469ab3486 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -26,7 +26,7 @@ import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.codec.vectors.es818.DirectIOIndexInputSupplier; +import org.elasticsearch.index.codec.vectors.es818.DirectIOHint; import org.elasticsearch.index.shard.ShardPath; import org.elasticsearch.logging.LogManager; import org.elasticsearch.logging.Logger; @@ -116,7 +116,7 @@ public static boolean isHybridFs(Directory directory) { return unwrap instanceof HybridDirectory; } - static final class HybridDirectory extends NIOFSDirectory implements DirectIOIndexInputSupplier { + static final class HybridDirectory extends NIOFSDirectory { private final MMapDirectory delegate; private final DirectIODirectory directIODelegate; @@ -143,7 +143,11 @@ protected boolean useDirectIO(String name, IOContext context, OptionalLong fileL @Override public IndexInput openInput(String name, IOContext context) throws IOException { - if (useDelegate(name, context)) { + if (directIODelegate != null && context.hints().contains(DirectIOHint.INSTANCE)) { + ensureOpen(); + ensureCanRead(name); + return directIODelegate.openInput(name, context); + } else if (useDelegate(name, context)) { // we need to do these checks on the outer directory since the inner doesn't know about pending deletes ensureOpen(); ensureCanRead(name); @@ -159,18 +163,6 @@ public IndexInput openInput(String name, IOContext context) throws IOException { } } - @Override - public IndexInput openInputDirect(String name, IOContext context) throws IOException { - if (directIODelegate == null) { - return openInput(name, context); - } - // we need to do these checks on the outer directory since the inner doesn't know about pending deletes - ensureOpen(); - ensureCanRead(name); - - return directIODelegate.openInput(name, context); - } - @Override public void close() throws IOException { IOUtils.close(super::close, delegate); diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java index 4711043fff281..e54fe892ecaab 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java @@ -9,7 +9,9 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.MergeInfo; import org.apache.lucene.store.ReadAdvice; import org.elasticsearch.blobcache.BlobCacheUtils; import org.elasticsearch.blobcache.common.ByteRange; @@ -21,6 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Predicate; import java.util.function.Supplier; @@ -36,7 +40,42 @@ public class CachedBlobContainerIndexInput extends MetadataCachingIndexInput { * a complete part of the {@link #fileInfo} at once in the cache and should not be * used for anything else than what the {@link #prefetchPart(int, Supplier)} method does. */ - public static final IOContext CACHE_WARMING_CONTEXT = new IOContext(IOContext.Context.DEFAULT, null, null, ReadAdvice.NORMAL); + public static final IOContext CACHE_WARMING_CONTEXT = new IOContext() { + @Override + public Context context() { + return Context.DEFAULT; + } + + @Override + public MergeInfo mergeInfo() { + return null; + } + + @Override + public FlushInfo flushInfo() { + return null; + } + + @Override + public Set hints() { + return Set.of(); + } + + @Override + public IOContext withHints(FileOpenHint... hints) { + return this; + } + + @Override + public Optional readAdvice() { + return Optional.of(ReadAdvice.NORMAL); + } + + @Override + public IOContext withReadAdvice(ReadAdvice advice) { + return this; + } + }; private static final Logger logger = LogManager.getLogger(CachedBlobContainerIndexInput.class); From bc65772b33e075f31e0eb78b37c6eac5bfd8573f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 3 May 2025 06:13:32 +0000 Subject: [PATCH 020/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-40c3848eb29 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c6c3fdbb68ebd..abfb0310f6a44 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-9cf497e1667 +lucene = 10.3.0-snapshot-40c3848eb29 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a12bd99035937..ebb3b8de769c0 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From bd4edfa67232fc4ad33cff6a7828cd810db8af36 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 4 May 2025 06:13:07 +0000 Subject: [PATCH 021/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-40c3848eb29 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ebb3b8de769c0..655224960e023 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,127 +2953,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7423500ff7a8644ab1f475bdb51974a134d09ef0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 5 May 2025 06:14:17 +0000 Subject: [PATCH 022/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-c50f5ee3d75 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index abfb0310f6a44..c22d3d93533bf 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-40c3848eb29 +lucene = 10.3.0-snapshot-c50f5ee3d75 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 655224960e023..ac4e2799488da 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From acab9da754d09a9faa3bbb27e23889161a322a47 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 6 May 2025 06:14:41 +0000 Subject: [PATCH 023/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b2fbcd25898 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c22d3d93533bf..849df0ca1c919 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-c50f5ee3d75 +lucene = 10.3.0-snapshot-b2fbcd25898 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ac4e2799488da..a5285bd475551 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 5f5407297eef26a79e72439cc9555556be10c2e4 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 6 May 2025 14:07:28 +0100 Subject: [PATCH 024/184] Specify a hint that no special IO behavior should be used (#127606) --- .../store/smb/SmbMmapFsDirectoryFactory.java | 2 +- .../index/StandardIOBehaviorHint.java | 19 ++++++++ .../index/store/FsDirectoryFactory.java | 17 +++++-- .../index/store/FsDirectoryFactoryTests.java | 2 +- .../input/CachedBlobContainerIndexInput.java | 47 ++----------------- 5 files changed, 39 insertions(+), 48 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/StandardIOBehaviorHint.java diff --git a/plugins/store-smb/src/main/java/org/elasticsearch/index/store/smb/SmbMmapFsDirectoryFactory.java b/plugins/store-smb/src/main/java/org/elasticsearch/index/store/smb/SmbMmapFsDirectoryFactory.java index b9f4943b1dab6..711cb8178eee0 100644 --- a/plugins/store-smb/src/main/java/org/elasticsearch/index/store/smb/SmbMmapFsDirectoryFactory.java +++ b/plugins/store-smb/src/main/java/org/elasticsearch/index/store/smb/SmbMmapFsDirectoryFactory.java @@ -25,7 +25,7 @@ public final class SmbMmapFsDirectoryFactory extends FsDirectoryFactory { @Override protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { return new SmbDirectoryWrapper( - setPreload( + setMMapFunctions( new MMapDirectory(location, lockFactory), new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_PRE_LOAD_SETTING)) ) diff --git a/server/src/main/java/org/elasticsearch/index/StandardIOBehaviorHint.java b/server/src/main/java/org/elasticsearch/index/StandardIOBehaviorHint.java new file mode 100644 index 0000000000000..ed32d6def7a32 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/StandardIOBehaviorHint.java @@ -0,0 +1,19 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index; + +import org.apache.lucene.store.IOContext; + +/** + * A hint that no special behavior should be set on open files + */ +public enum StandardIOBehaviorHint implements IOContext.FileOpenHint { + INSTANCE +} diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index bc1d469ab3486..f02c7b6c16158 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -20,12 +20,14 @@ import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.store.NIOFSDirectory; import org.apache.lucene.store.NativeFSLockFactory; +import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.store.SimpleFSLockFactory; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.IndexModule; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.StandardIOBehaviorHint; import org.elasticsearch.index.codec.vectors.es818.DirectIOHint; import org.elasticsearch.index.shard.ShardPath; import org.elasticsearch.logging.LogManager; @@ -36,6 +38,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.HashSet; +import java.util.Optional; import java.util.OptionalLong; import java.util.Set; import java.util.function.BiPredicate; @@ -75,12 +78,12 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index // Use Lucene defaults final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory); if (primaryDirectory instanceof MMapDirectory mMapDirectory) { - return new HybridDirectory(lockFactory, setPreload(mMapDirectory, preLoadExtensions)); + return new HybridDirectory(lockFactory, setMMapFunctions(mMapDirectory, preLoadExtensions)); } else { return primaryDirectory; } case MMAPFS: - return setPreload(new MMapDirectory(location, lockFactory), preLoadExtensions); + return setMMapFunctions(new MMapDirectory(location, lockFactory), preLoadExtensions); case SIMPLEFS: case NIOFS: return new NIOFSDirectory(location, lockFactory); @@ -89,10 +92,18 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index } } + private static Optional overrideReadAdvice(String name, IOContext context) { + if (context.hints().contains(StandardIOBehaviorHint.INSTANCE)) { + return Optional.of(ReadAdvice.NORMAL); + } + return Optional.empty(); + } + /** Sets the preload, if any, on the given directory based on the extensions. Returns the same directory instance. */ // visibility and extensibility for testing - public MMapDirectory setPreload(MMapDirectory mMapDirectory, Set preLoadExtensions) { + public MMapDirectory setMMapFunctions(MMapDirectory mMapDirectory, Set preLoadExtensions) { mMapDirectory.setPreload(getPreloadFunc(preLoadExtensions)); + mMapDirectory.setReadAdviceOverride(FsDirectoryFactory::overrideReadAdvice); return mMapDirectory; } diff --git a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java index b0a14515f2fbc..83109c4f83a63 100644 --- a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java @@ -88,7 +88,7 @@ static class PreLoadExposingFsDirectoryFactory extends FsDirectoryFactory { final Map> preLoadFuncMap = new HashMap<>(); @Override - public MMapDirectory setPreload(MMapDirectory mMapDirectory, Set preLoadExtensions) { + public MMapDirectory setMMapFunctions(MMapDirectory mMapDirectory, Set preLoadExtensions) { var preLoadFunc = FsDirectoryFactory.getPreloadFunc(preLoadExtensions); mMapDirectory.setPreload(preLoadFunc); preLoadFuncMap.put(mMapDirectory, preLoadFunc); diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java index e54fe892ecaab..f82cba61013df 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/CachedBlobContainerIndexInput.java @@ -9,12 +9,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.store.ReadAdvice; import org.elasticsearch.blobcache.BlobCacheUtils; import org.elasticsearch.blobcache.common.ByteRange; +import org.elasticsearch.index.StandardIOBehaviorHint; import org.elasticsearch.index.snapshots.blobstore.BlobStoreIndexShardSnapshot.FileInfo; import org.elasticsearch.xpack.searchablesnapshots.cache.common.CacheFile; import org.elasticsearch.xpack.searchablesnapshots.store.IndexInputStats; @@ -23,8 +21,6 @@ import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; -import java.util.Optional; -import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Predicate; import java.util.function.Supplier; @@ -40,42 +36,7 @@ public class CachedBlobContainerIndexInput extends MetadataCachingIndexInput { * a complete part of the {@link #fileInfo} at once in the cache and should not be * used for anything else than what the {@link #prefetchPart(int, Supplier)} method does. */ - public static final IOContext CACHE_WARMING_CONTEXT = new IOContext() { - @Override - public Context context() { - return Context.DEFAULT; - } - - @Override - public MergeInfo mergeInfo() { - return null; - } - - @Override - public FlushInfo flushInfo() { - return null; - } - - @Override - public Set hints() { - return Set.of(); - } - - @Override - public IOContext withHints(FileOpenHint... hints) { - return this; - } - - @Override - public Optional readAdvice() { - return Optional.of(ReadAdvice.NORMAL); - } - - @Override - public IOContext withReadAdvice(ReadAdvice advice) { - return this; - } - }; + public static final IOContext CACHE_WARMING_CONTEXT = IOContext.DEFAULT.withHints(StandardIOBehaviorHint.INSTANCE); private static final Logger logger = LogManager.getLogger(CachedBlobContainerIndexInput.class); @@ -141,7 +102,7 @@ private CachedBlobContainerIndexInput( @Override protected void readWithoutBlobCache(ByteBuffer b) throws Exception { - ensureContext(ctx -> ctx != CACHE_WARMING_CONTEXT); + ensureContext(ctx -> ctx.hints().contains(StandardIOBehaviorHint.INSTANCE) == false); final long position = getAbsolutePosition(); final int length = b.remaining(); @@ -178,7 +139,7 @@ public long getPersistentCacheInitialLength() throws Exception { * or {@code -1} if the prewarming was cancelled */ public long prefetchPart(final int part, Supplier isCancelled) throws IOException { - ensureContext(ctx -> ctx == CACHE_WARMING_CONTEXT); + ensureContext(ctx -> ctx.hints().contains(StandardIOBehaviorHint.INSTANCE)); if (part >= fileInfo.numberOfParts()) { throw new IllegalArgumentException("Unexpected part number [" + part + "]"); } From 853e22d2990d4e61ab2da633f9936308fab0f585 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 7 May 2025 06:12:35 +0000 Subject: [PATCH 025/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-af62580a673 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 849df0ca1c919..e7a543f5b1ddf 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-b2fbcd25898 +lucene = 10.3.0-snapshot-af62580a673 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a5285bd475551..8ed2a404372c5 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2a5bff5649101bc3472948b04f9737ac96ad5976 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 8 May 2025 06:12:10 +0000 Subject: [PATCH 026/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-4be27fe091f --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e7a543f5b1ddf..5783663732227 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-af62580a673 +lucene = 10.3.0-snapshot-4be27fe091f bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a83bd66840276..b35735658e236 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From b189886d7745e9309f274e923362b64cc966efec Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 9 May 2025 06:12:45 +0000 Subject: [PATCH 027/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-65fd8648eb6 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5783663732227..d4b34acd4de23 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-4be27fe091f +lucene = 10.3.0-snapshot-65fd8648eb6 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b35735658e236..74f2fc8b1ab5e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 6c95ea085cb0f3f5a1ac5e177883ba371d4407b0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 10 May 2025 06:13:03 +0000 Subject: [PATCH 028/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b6851e1b37c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d4b34acd4de23..df5af5a9e225f 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-65fd8648eb6 +lucene = 10.3.0-snapshot-b6851e1b37c bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 74f2fc8b1ab5e..44c1a6d4219e9 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 9f3c7aa7840d685fa295cc460ff8e5e5d1734eac Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 11 May 2025 06:13:06 +0000 Subject: [PATCH 029/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f3db7597926 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index df5af5a9e225f..198ffca90a5be 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-b6851e1b37c +lucene = 10.3.0-snapshot-f3db7597926 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 44c1a6d4219e9..4d0f05d21692b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From db1ecc909c4dabd34b4f122a805c64cc20324ec0 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 12 May 2025 06:13:03 +0000 Subject: [PATCH 030/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-939b4060929 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 198ffca90a5be..c726d36ce0765 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f3db7597926 +lucene = 10.3.0-snapshot-939b4060929 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4d0f05d21692b..b17312d281af8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 3a24aa63bb82730a0269ce5a3a20cb2066bad98e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 13 May 2025 06:13:06 +0000 Subject: [PATCH 031/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-057b0e2f9ae --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c726d36ce0765..9109edda1bb04 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-939b4060929 +lucene = 10.3.0-snapshot-057b0e2f9ae bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b17312d281af8..1b3e38f277284 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 327c7cf806c4008af98c0b1139cf3a8e15a9aba7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 14 May 2025 06:12:49 +0000 Subject: [PATCH 032/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f9647966a58 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 9109edda1bb04..445f797357a7e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-057b0e2f9ae +lucene = 10.3.0-snapshot-f9647966a58 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 1b3e38f277284..cd79f37ea6a85 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 63e4a1853f14775fb298765a1acb669c01c4ccf5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 15 May 2025 06:12:44 +0000 Subject: [PATCH 033/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-5abaf5c6675 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 445f797357a7e..484036820cceb 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f9647966a58 +lucene = 10.3.0-snapshot-5abaf5c6675 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cd79f37ea6a85..e210e06c1122b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From eea77e74122b8b812ac84ba8982bdec72938675d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 16 May 2025 06:14:19 +0000 Subject: [PATCH 034/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-5abaf5c6675 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e210e06c1122b..09a118a6876a8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,127 +2953,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 457724c0cdae48ad2709ac90f5319d3248b3815e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 17 May 2025 06:12:57 +0000 Subject: [PATCH 035/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-1811d90da67 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 484036820cceb..774fafb33e680 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-5abaf5c6675 +lucene = 10.3.0-snapshot-1811d90da67 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 09a118a6876a8..8dd073bf5d7ad 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 58bfb0e4def6389fa2da76874f6ea2a432cc6709 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 18 May 2025 06:12:16 +0000 Subject: [PATCH 036/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-1811d90da67 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 8dd073bf5d7ad..f1b1f18d35db9 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,127 +2953,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 6900152275ca177d6fa1b7bedceda1b2cc217aa4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 19 May 2025 06:14:27 +0000 Subject: [PATCH 037/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7ade2c7e3ff --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 774fafb33e680..c291d703669f3 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-1811d90da67 +lucene = 10.3.0-snapshot-7ade2c7e3ff bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f1b1f18d35db9..9854ee92784d9 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 19e0fa21ac02d25e48f08aa400f6588f750127e6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 20 May 2025 06:18:55 +0000 Subject: [PATCH 038/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7ade2c7e3ff --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9854ee92784d9..315f1afcd44fe 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,127 +2953,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 89e036a28f7f106fe2db8ec94fbdabc4c05e7e84 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 21 May 2025 06:12:44 +0000 Subject: [PATCH 039/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-846ff673301 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c291d703669f3..7ea1f673693e3 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-7ade2c7e3ff +lucene = 10.3.0-snapshot-846ff673301 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 315f1afcd44fe..c8296dd87b7dd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From fad9135e0bf0aa53c299c27230fbd97877787d20 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 22 May 2025 06:14:11 +0000 Subject: [PATCH 040/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-e026915ea59 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 7ea1f673693e3..ec88fbaccfe18 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-846ff673301 +lucene = 10.3.0-snapshot-e026915ea59 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c8296dd87b7dd..9c9e5af5e8db3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 57150060b5cb1401c151f11a50481c0e0320f2bb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 22 May 2025 11:19:12 +0000 Subject: [PATCH 041/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f11c8092d6d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index ec88fbaccfe18..11847dea052f2 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-e026915ea59 +lucene = 10.3.0-snapshot-f11c8092d6d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9c9e5af5e8db3..be8b1638f0f60 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 5e1c8a59d08b3f7e0523cd5b2283322bca2173c8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 23 May 2025 06:13:44 +0000 Subject: [PATCH 042/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b74c6681b5f --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 11847dea052f2..9f465bbaace19 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f11c8092d6d +lucene = 10.3.0-snapshot-b74c6681b5f bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index be8b1638f0f60..e97b75fd72691 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2951,129 +2951,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 9b1296986d9b25de2900bf2de3611b3aedd0211d Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Fri, 23 May 2025 04:48:09 -0700 Subject: [PATCH 043/184] lucene_snapshot: Update to new Lucene 10.3 postings format (#128240) There are a few different things going on in this PR, all of which are required to get the lucene_snspahot branch building again, but the most substantial is the update to the new Lucene 10.3 postings format, Lucene103PostingsFormat. The Lucene90BlockTreeTermsWriter class is used in the implementation of the 10.1 postings codec in Lucene. With the new 10.3 postings format that class is no longer needed, so it has been moved to a test-only location, in order to support backward compatibility testing. In Elasticsearch we were using Lucene90BlockTreeTermsWriter (from Lucene) directly, through our copy of the Lucene 9.0 postings format, namely ES812PostingsFormat. So if you look at ES812PostingsFormat , you should that the imports should now show that we're using our own copy. Additionally, changes are required because of the removal of deprecated methods in IOContext, as well as the override of hints for direct IO. Co-authored-by: Simon Cooper --- .../tsdb/TSDBDocValuesMergeBenchmark.java | 5 +- .../search/simple/SimpleSearchIT.java | 10 +- server/src/main/java/module-info.java | 3 +- .../diskusage/IndexDiskUsageAnalyzer.java | 6 +- .../elasticsearch/common/lucene/Lucene.java | 2 +- .../index/codec/CodecService.java | 6 +- .../codec/Elasticsearch900Lucene101Codec.java | 4 +- .../codec/Elasticsearch92Lucene103Codec.java | 133 ++ .../codec/LegacyPerFieldMapperCodec.java | 6 +- .../index/codec/PerFieldFormatSupplier.java | 10 +- .../index/codec/PerFieldMapperCodec.java | 2 +- .../codec/postings/ES812PostingsFormat.java | 9 +- .../Lucene90BlockTreeTermsWriter.java | 1161 +++++++++++++++++ .../ES816BinaryQuantizedVectorsReader.java | 6 +- .../DirectIOLucene99FlatVectorsFormat.java | 74 +- .../ES818BinaryQuantizedVectorsReader.java | 6 +- .../org/elasticsearch/index/store/Store.java | 4 +- .../services/org.apache.lucene.codecs.Codec | 1 + .../IndexDiskUsageAnalyzerTests.java | 18 +- .../search/function/MinScoreScorerTests.java | 2 +- .../elasticsearch/index/codec/CodecTests.java | 2 +- .../index/codec/PerFieldMapperCodecTests.java | 10 +- .../postings/ES812PostingsFormatTests.java | 4 +- .../codec/tsdb/DocValuesCodecDuelTests.java | 4 +- .../tsdb/ES87TSDBDocValuesFormatTests.java | 12 +- .../codec/tsdb/TsdbDocValueBwcTests.java | 4 +- .../es819/ES819TSDBDocValuesFormatTests.java | 13 +- .../vectors/ES813FlatVectorFormatTests.java | 12 +- .../ES813Int8FlatVectorFormatTests.java | 12 +- ...HnswScalarQuantizedVectorsFormatTests.java | 12 +- .../ES815BitFlatVectorFormatTests.java | 12 +- .../ES815HnswBitVectorsFormatTests.java | 12 +- ...S816BinaryQuantizedVectorsFormatTests.java | 11 +- ...HnswBinaryQuantizedVectorsFormatTests.java | 11 +- ...S818BinaryQuantizedVectorsFormatTests.java | 11 +- ...HnswBinaryQuantizedVectorsFormatTests.java | 11 +- .../codec/zstd/StoredFieldCodecDuelTests.java | 6 +- ...estCompressionStoredFieldsFormatTests.java | 4 +- ...td814BestSpeedStoredFieldsFormatTests.java | 4 +- .../engine/CompletionStatsCacheTests.java | 11 +- .../index/mapper/DateFieldTypeTests.java | 2 +- .../search/query/QueryPhaseTests.java | 8 +- .../vectors/RescoreKnnVectorQueryTests.java | 4 +- .../test/cluster/FeatureFlag.java | 3 +- 44 files changed, 1455 insertions(+), 208 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java create mode 100644 server/src/main/java/org/elasticsearch/index/codec/postings/Lucene90BlockTreeTermsWriter.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java index 71164e35ad557..d0b72cd030609 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/TSDBDocValuesMergeBenchmark.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -63,7 +63,6 @@ public class TSDBDocValuesMergeBenchmark { static { - // For Elasticsearch900Lucene101Codec: LogConfigurator.loadLog4jPlugins(); LogConfigurator.configureESLogging(); LogConfigurator.setNodeName("test"); @@ -259,7 +258,7 @@ private static IndexWriterConfig createIndexWriterConfig(boolean optimizedMergeE config.setLeafSorter(DataStream.TIMESERIES_LEAF_READERS_SORTER); config.setMergePolicy(new LogByteSizeMergePolicy()); var docValuesFormat = new ES819TSDBDocValuesFormat(4096, optimizedMergeEnabled); - config.setCodec(new Elasticsearch900Lucene101Codec() { + config.setCodec(new Elasticsearch92Lucene103Codec() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/simple/SimpleSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/simple/SimpleSearchIT.java index 5a9be73d92268..dc18835460d20 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/simple/SimpleSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/simple/SimpleSearchIT.java @@ -261,10 +261,12 @@ public void testSimpleTerminateAfterCount() throws Exception { ensureGreen(); refresh(); - for (int i = 1; i < max; i++) { + // query all but one doc to avoid optimizations that may rewrite to a MatchAllDocs, which simplifies assertions + final int queryMax = max - 1; + for (int i = 1; i < queryMax; i++) { final int finalI = i; assertResponse( - prepareSearch("test").setQuery(QueryBuilders.rangeQuery("field").gte(1).lte(max)).setTerminateAfter(i), + prepareSearch("test").setQuery(QueryBuilders.rangeQuery("field").gte(1).lte(queryMax)).setTerminateAfter(i), response -> { assertHitCount(response, finalI); assertTrue(response.isTerminatedEarly()); @@ -272,9 +274,9 @@ public void testSimpleTerminateAfterCount() throws Exception { ); } assertResponse( - prepareSearch("test").setQuery(QueryBuilders.rangeQuery("field").gte(1).lte(max)).setTerminateAfter(2 * max), + prepareSearch("test").setQuery(QueryBuilders.rangeQuery("field").gte(1).lte(queryMax)).setTerminateAfter(2 * max), response -> { - assertHitCount(response, max); + assertHitCount(response, queryMax); assertFalse(response.isTerminatedEarly()); } ); diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 7edefe9fae581..20df7f456d109 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -461,7 +461,8 @@ org.elasticsearch.index.codec.Elasticsearch814Codec, org.elasticsearch.index.codec.Elasticsearch816Codec, org.elasticsearch.index.codec.Elasticsearch900Codec, - org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; + org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec, + org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider; diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java index daae5af9127f1..9d2595732c585 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java @@ -10,6 +10,7 @@ package org.elasticsearch.action.admin.indices.diskusage; import org.apache.logging.log4j.Logger; +import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat; import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat; import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat; @@ -22,7 +23,7 @@ import org.apache.lucene.codecs.PointsReader; import org.apache.lucene.codecs.StoredFieldsReader; import org.apache.lucene.codecs.TermVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat; +import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DirectoryReader; @@ -318,6 +319,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException { if (term != null && termsEnum.seekExact(term)) { final TermState termState = termsEnum.termState(); + if (termState instanceof final Lucene103PostingsFormat.IntBlockTermState blockTermState) { + return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP); + } if (termState instanceof final Lucene101PostingsFormat.IntBlockTermState blockTermState) { return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP); } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 99ed0917b12bf..09f61d60c0b27 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -93,7 +93,7 @@ public class Lucene { - public static final String LATEST_CODEC = "Lucene101"; + public static final String LATEST_CODEC = "Lucene103"; public static final String SOFT_DELETES_FIELD = "__soft_deletes"; diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 5f887b2b594d3..17028137b78d8 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -12,7 +12,7 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FilterCodec; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.core.Nullable; @@ -46,7 +46,7 @@ public class CodecService implements CodecProvider { public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) { final var codecs = new HashMap(); - Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, bigArrays); + Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays); if (ZSTD_STORED_FIELDS_FEATURE_FLAG) { codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays)); } else { @@ -58,7 +58,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) BEST_COMPRESSION_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays) ); - Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays); + Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays); codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec); codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault()); diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java index d96495fb0f615..c0f633bf62268 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java @@ -9,12 +9,12 @@ package org.elasticsearch.index.codec; +import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec; +import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; -import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java new file mode 100644 index 0000000000000..c26d485fc8c99 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java @@ -0,0 +1,133 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec; + +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; +import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; +import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; +import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; +import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat; +import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat; + +/** + * Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed + * stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}. + */ +public class Elasticsearch92Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec { + + static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene103PostingsFormat(); + + private final StoredFieldsFormat storedFieldsFormat; + + private final PostingsFormat defaultPostingsFormat; + private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { + @Override + public PostingsFormat getPostingsFormatForField(String field) { + return Elasticsearch92Lucene103Codec.this.getPostingsFormatForField(field); + } + }; + + private final DocValuesFormat defaultDVFormat; + private final DocValuesFormat docValuesFormat = new XPerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return Elasticsearch92Lucene103Codec.this.getDocValuesFormatForField(field); + } + }; + + private final KnnVectorsFormat defaultKnnVectorsFormat; + private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return Elasticsearch92Lucene103Codec.this.getKnnVectorsFormatForField(field); + } + }; + + /** Public no-arg constructor, needed for SPI loading at read-time. */ + public Elasticsearch92Lucene103Codec() { + this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED); + } + + /** + * Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of + * worse space-efficiency or vice-versa. + */ + public Elasticsearch92Lucene103Codec(Zstd814StoredFieldsFormat.Mode mode) { + super("Elasticsearch92Lucene103", new Lucene103Codec()); + this.storedFieldsFormat = mode.getFormat(); + this.defaultPostingsFormat = DEFAULT_POSTINGS_FORMAT; + this.defaultDVFormat = new Lucene90DocValuesFormat(); + this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final PostingsFormat postingsFormat() { + return postingsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } + + @Override + public final KnnVectorsFormat knnVectorsFormat() { + return knnVectorsFormat; + } + + /** + * Returns the postings format that should be used for writing new segments of field. + * + *

The default implementation always returns "Lucene912". + * + *

WARNING: if you subclass, you are responsible for index backwards compatibility: + * future version of Lucene are only guaranteed to be able to read the default implementation, + */ + public PostingsFormat getPostingsFormatForField(String field) { + return defaultPostingsFormat; + } + + /** + * Returns the docvalues format that should be used for writing new segments of field + * . + * + *

The default implementation always returns "Lucene912". + * + *

WARNING: if you subclass, you are responsible for index backwards compatibility: + * future version of Lucene are only guaranteed to be able to read the default implementation. + */ + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + + /** + * Returns the vectors format that should be used for writing new segments of field + * + *

The default implementation always returns "Lucene912". + * + *

WARNING: if you subclass, you are responsible for index backwards compatibility: + * future version of Lucene are only guaranteed to be able to read the default implementation. + */ + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return defaultKnnVectorsFormat; + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java index 9e4ecb1a46c17..c3e9ab6617b87 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java @@ -13,7 +13,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.index.mapper.MapperService; @@ -22,11 +22,11 @@ * Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new * changes in {@link PerFieldMapperCodec}. */ -public final class LegacyPerFieldMapperCodec extends Lucene101Codec { +public final class LegacyPerFieldMapperCodec extends Lucene103Codec { private final PerFieldFormatSupplier formatSupplier; - public LegacyPerFieldMapperCodec(Lucene101Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) { + public LegacyPerFieldMapperCodec(Lucene103Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) { super(compressionMode); this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays); // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java index 4a79df430505d..8198963184763 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java @@ -12,7 +12,6 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.elasticsearch.common.util.BigArrays; @@ -34,13 +33,12 @@ * vectors. */ public class PerFieldFormatSupplier { - public static final FeatureFlag USE_LUCENE101_POSTINGS_FORMAT = new FeatureFlag("use_lucene101_postings_format"); + public static final FeatureFlag USE_DEFAULT_LUCENE_POSTINGS_FORMAT = new FeatureFlag("use_default_lucene_postings_format"); private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat(); private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat(); private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat(); private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat(); - private static final Lucene101PostingsFormat lucene101PostingsFormat = new Lucene101PostingsFormat(); private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101"); private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat; @@ -53,10 +51,10 @@ public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays) this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField); if (mapperService != null - && USE_LUCENE101_POSTINGS_FORMAT.isEnabled() - && mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.USE_LUCENE101_POSTINGS_FORMAT) + && USE_DEFAULT_LUCENE_POSTINGS_FORMAT.isEnabled() + && mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.UPGRADE_TO_LUCENE_10_3_0) && mapperService.getIndexSettings().getMode() == IndexMode.STANDARD) { - defaultPostingsFormat = lucene101PostingsFormat; + defaultPostingsFormat = Elasticsearch92Lucene103Codec.DEFAULT_POSTINGS_FORMAT; } else { // our own posting format using PFOR defaultPostingsFormat = es812PostingsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java index 9a3055f96bba8..0ffb63270fd58 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java @@ -26,7 +26,7 @@ * per index in real time via the mapping API. If no specific postings format or vector format is * configured for a specific field the default postings or vector format is used. */ -public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec { +public final class PerFieldMapperCodec extends Elasticsearch92Lucene103Codec { private final PerFieldFormatSupplier formatSupplier; diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java index 6ccfaba7853f2..4fb6bcd00b1fb 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/postings/ES812PostingsFormat.java @@ -19,6 +19,7 @@ */ package org.elasticsearch.index.codec.postings; +import org.apache.lucene.backward_codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader; import org.apache.lucene.codecs.BlockTermState; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.codecs.FieldsConsumer; @@ -27,8 +28,6 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.PostingsReaderBase; import org.apache.lucene.codecs.PostingsWriterBase; -import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsReader; -import org.apache.lucene.codecs.lucene90.blocktree.Lucene90BlockTreeTermsWriter; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; @@ -100,7 +99,7 @@ *

Term Dictionary *

The .tim file contains the list of terms in each field along with per-term statistics * (such as docfreq) and pointers to the frequencies, positions, payload and skip data in the - * .doc, .pos, and .pay files. See {@link Lucene90BlockTreeTermsWriter} for more details on + * .doc, .pos, and .pay files. See {@link Lucene90BlockTreeTermsReader} for more details on * the format. *

NOTE: The term dictionary can plug into different postings implementations: the postings * writer/reader are actually responsible for encoding and decoding the PostingsHeader and @@ -155,7 +154,7 @@ *

*
Term Index *

The .tip file contains an index into the term dictionary, so that it can be accessed - * randomly. See {@link Lucene90BlockTreeTermsWriter} for more details on the format. + * randomly. See {@link Lucene90BlockTreeTermsReader} for more details on the format. *

* * @@ -343,7 +342,7 @@ * * */ -public final class ES812PostingsFormat extends PostingsFormat { +public class ES812PostingsFormat extends PostingsFormat { /** * Filename extension for document number, frequencies, and skip data. See chapter: Writes terms dict and index, block-encoding (column stride) each term's metadata for each set + * of terms between two index terms. + * + *

Files: + * + *

+ * + *

+ * + *

Term Dictionary

+ * + *

The .tim file contains the list of terms in each field along with per-term statistics (such as + * docfreq) and per-term metadata (typically pointers to the postings list for that term in the + * inverted index). + * + *

The .tim is arranged in blocks: with blocks containing a variable number of entries (by + * default 25-48), where each entry is either a term or a reference to a sub-block. + * + *

NOTE: The term dictionary can plug into different postings implementations: the postings + * writer/reader are actually responsible for encoding and decoding the Postings Metadata and Term + * Metadata sections. + * + *

    + *
  • TermsDict (.tim) --> Header, FieldDictNumFields, Footer + *
  • FieldDict --> PostingsHeader, NodeBlockNumBlocks + *
  • NodeBlock --> (OuterNode | InnerNode) + *
  • OuterNode --> EntryCount, SuffixLength, ByteSuffixLength, StatsLength, < + * TermStats >EntryCount, MetaLength, + * <TermMetadata>EntryCount + *
  • InnerNode --> EntryCount, SuffixLength[,Sub?], ByteSuffixLength, StatsLength, + * < TermStats ? >EntryCount, MetaLength, <TermMetadata ? + * >EntryCount + *
  • TermStats --> DocFreq, TotalTermFreq + *
  • Header --> {@link CodecUtil#writeHeader CodecHeader} + *
  • EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength --> {@link DataOutput#writeVInt + * VInt} + *
  • TotalTermFreq --> {@link DataOutput#writeVLong VLong} + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter} + *
+ * + *

Notes: + * + *

    + *
  • Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information for + * the BlockTree implementation. + *
  • DocFreq is the count of documents which contain the term. + *
  • TotalTermFreq is the total number of occurrences of the term. This is encoded as the + * difference between the total number of occurrences and the DocFreq. + *
  • PostingsHeader and TermMetadata are plugged into by the specific postings implementation: + * these contain arbitrary per-file data (such as parameters or versioning information) and + * per-term data (such as pointers to inverted files). + *
  • For inner nodes of the tree, every entry will steal one bit to mark whether it points to + * child nodes(sub-block). If so, the corresponding TermStats and TermMetaData are omitted. + *
+ * + *

+ * + *

Term Metadata

+ * + *

The .tmd file contains the list of term metadata (such as FST index metadata) and field level + * statistics (such as sum of total term freq). + * + *

    + *
  • TermsMeta (.tmd) --> Header, NumFields, <FieldStats>NumFields, + * TermIndexLength, TermDictLength, Footer + *
  • FieldStats --> FieldNumber, NumTerms, RootCodeLength, ByteRootCodeLength, + * SumTotalTermFreq?, SumDocFreq, DocCount, MinTerm, MaxTerm, IndexStartFP, FSTHeader, + * FSTMetadata + *
  • Header,FSTHeader --> {@link CodecUtil#writeHeader CodecHeader} + *
  • TermIndexLength, TermDictLength --> {@link DataOutput#writeLong Uint64} + *
  • MinTerm,MaxTerm --> {@link DataOutput#writeVInt VInt} length followed by the byte[] + *
  • NumFields,FieldNumber,RootCodeLength,DocCount --> {@link DataOutput#writeVInt VInt} + *
  • NumTerms,SumTotalTermFreq,SumDocFreq,IndexStartFP --> {@link DataOutput#writeVLong + * VLong} + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter} + *
+ * + *

Notes: + * + *

    + *
  • FieldNumber is the fields number from {@link FieldInfos}. (.fnm) + *
  • NumTerms is the number of unique terms for the field. + *
  • RootCode points to the root block for the field. + *
  • SumDocFreq is the total number of postings, the number of term-document pairs across the + * entire field. + *
  • DocCount is the number of documents that have at least one posting for this field. + *
  • MinTerm, MaxTerm are the lowest and highest term in this field. + *
+ * + * + * + *

Term Index

+ * + *

The .tip file contains an index into the term dictionary, so that it can be accessed randomly. + * The index is also used to determine when a given term cannot exist on disk (in the .tim file), + * saving a disk seek. + * + *

    + *
  • TermsIndex (.tip) --> Header, FSTIndexNumFieldsFooter + *
  • Header --> {@link CodecUtil#writeHeader CodecHeader} + * + *
  • FSTIndex --> {@link FST FST<byte[]>} + *
  • Footer --> {@link CodecUtil#writeFooter CodecFooter} + *
+ * + *

Notes: + * + *

    + *
  • The .tip file contains a separate FST for each field. The FST maps a term prefix to the + * on-disk block that holds all terms starting with that prefix. Each field's IndexStartFP + * points to its FST. + *
  • It's possible that an on-disk block would contain too many terms (more than the allowed + * maximum (default: 48)). When this happens, the block is sub-divided into new blocks (called + * "floor blocks"), and then the output in the FST for the block's prefix encodes the leading + * byte of each sub-block, and its file pointer. + *
+ * + * @see Lucene90BlockTreeTermsReader + */ +public final class Lucene90BlockTreeTermsWriter extends FieldsConsumer { + + /** + * Suggested default value for the {@code minItemsInBlock} parameter to {@link + * #Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. + */ + public static final int DEFAULT_MIN_BLOCK_SIZE = 25; + + /** + * Suggested default value for the {@code maxItemsInBlock} parameter to {@link + * #Lucene90BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. + */ + public static final int DEFAULT_MAX_BLOCK_SIZE = 48; + + public static final int OUTPUT_FLAGS_NUM_BITS = 2; + public static final int OUTPUT_FLAGS_MASK = 0x3; + public static final int OUTPUT_FLAG_IS_FLOOR = 0x1; + public static final int OUTPUT_FLAG_HAS_TERMS = 0x2; + + /** Extension of terms meta file */ + static final String TERMS_EXTENSION = "tim"; + static final String TERMS_CODEC_NAME = "BlockTreeTermsDict"; + + // public static boolean DEBUG = false; + // public static boolean DEBUG2 = false; + + // private final static boolean SAVE_DOT_FILES = false; + + private final IndexOutput metaOut; + private final IndexOutput termsOut; + private final IndexOutput indexOut; + final int maxDoc; + final int minItemsInBlock; + final int maxItemsInBlock; + final int version; + + final PostingsWriterBase postingsWriter; + final FieldInfos fieldInfos; + + private final List fields = new ArrayList<>(); + + /** + * Create a new writer. The number of items (terms or sub-blocks) per block will aim to be between + * minItemsPerBlock and maxItemsPerBlock, though in some cases the blocks may be smaller than the + * min. + */ + public Lucene90BlockTreeTermsWriter( + SegmentWriteState state, + PostingsWriterBase postingsWriter, + int minItemsInBlock, + int maxItemsInBlock + ) throws IOException { + this(state, postingsWriter, minItemsInBlock, maxItemsInBlock, Lucene90BlockTreeTermsReader.VERSION_CURRENT); + } + + /** Expert constructor that allows configuring the version, used for bw tests. */ + public Lucene90BlockTreeTermsWriter( + SegmentWriteState state, + PostingsWriterBase postingsWriter, + int minItemsInBlock, + int maxItemsInBlock, + int version + ) throws IOException { + validateSettings(minItemsInBlock, maxItemsInBlock); + + this.minItemsInBlock = minItemsInBlock; + this.maxItemsInBlock = maxItemsInBlock; + if (version < Lucene90BlockTreeTermsReader.VERSION_START || version > Lucene90BlockTreeTermsReader.VERSION_CURRENT) { + throw new IllegalArgumentException( + "Expected version in range [" + + Lucene90BlockTreeTermsReader.VERSION_START + + ", " + + Lucene90BlockTreeTermsReader.VERSION_CURRENT + + "], but got " + + version + ); + } + this.version = version; + + this.maxDoc = state.segmentInfo.maxDoc(); + this.fieldInfos = state.fieldInfos; + this.postingsWriter = postingsWriter; + + final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_EXTENSION); + termsOut = state.directory.createOutput(termsName, state.context); + boolean success = false; + IndexOutput metaOut = null, indexOut = null; + try { + CodecUtil.writeIndexHeader(termsOut, TERMS_CODEC_NAME, version, state.segmentInfo.getId(), state.segmentSuffix); + + final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_INDEX_EXTENSION); + indexOut = state.directory.createOutput(indexName, state.context); + CodecUtil.writeIndexHeader(indexOut, TERMS_INDEX_CODEC_NAME, version, state.segmentInfo.getId(), state.segmentSuffix); + // segment = state.segmentInfo.name; + + final String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, TERMS_META_EXTENSION); + metaOut = state.directory.createOutput(metaName, state.context); + CodecUtil.writeIndexHeader(metaOut, TERMS_META_CODEC_NAME, version, state.segmentInfo.getId(), state.segmentSuffix); + + postingsWriter.init(metaOut, state); // have consumer write its format/header + + this.metaOut = metaOut; + this.indexOut = indexOut; + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut); + } + } + } + + /** Throws {@code IllegalArgumentException} if any of these settings is invalid. */ + public static void validateSettings(int minItemsInBlock, int maxItemsInBlock) { + if (minItemsInBlock <= 1) { + throw new IllegalArgumentException("minItemsInBlock must be >= 2; got " + minItemsInBlock); + } + if (minItemsInBlock > maxItemsInBlock) { + throw new IllegalArgumentException( + "maxItemsInBlock must be >= minItemsInBlock; got maxItemsInBlock=" + maxItemsInBlock + " minItemsInBlock=" + minItemsInBlock + ); + } + if (2 * (minItemsInBlock - 1) > maxItemsInBlock) { + throw new IllegalArgumentException( + "maxItemsInBlock must be at least 2*(minItemsInBlock-1); got maxItemsInBlock=" + + maxItemsInBlock + + " minItemsInBlock=" + + minItemsInBlock + ); + } + } + + @Override + public void write(Fields fields, NormsProducer norms) throws IOException { + // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment); + + String lastField = null; + for (String field : fields) { + assert lastField == null || lastField.compareTo(field) < 0; + lastField = field; + + // if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field); + Terms terms = fields.terms(field); + if (terms == null) { + continue; + } + + TermsEnum termsEnum = terms.iterator(); + TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field)); + while (true) { + BytesRef term = termsEnum.next(); + // if (DEBUG) System.out.println("BTTW: next term " + term); + + if (term == null) { + break; + } + + // if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + + // ToStringUtils.bytesRefToString(term)); + termsWriter.write(term, termsEnum, norms); + } + + termsWriter.finish(); + + // if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field); + } + } + + static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) { + assert fp < (1L << 62); + return (fp << 2) | (hasTerms ? OUTPUT_FLAG_HAS_TERMS : 0) | (isFloor ? OUTPUT_FLAG_IS_FLOOR : 0); + } + + private static class PendingEntry { + public final boolean isTerm; + + protected PendingEntry(boolean isTerm) { + this.isTerm = isTerm; + } + } + + private static final class PendingTerm extends PendingEntry { + public final byte[] termBytes; + // stats + metadata + public final BlockTermState state; + + PendingTerm(BytesRef term, BlockTermState state) { + super(true); + this.termBytes = new byte[term.length]; + System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length); + this.state = state; + } + + @Override + public String toString() { + return "TERM: " + ToStringUtils.bytesRefToString(termBytes); + } + } + + /** + * Encodes long value to variable length byte[], in MSB order. Use {@link + * FieldReader readMSBVLong} to decode. + * + *

Package private for testing + */ + static void writeMSBVLong(long l, DataOutput scratchBytes) throws IOException { + assert l >= 0; + // Keep zero bits on most significant byte to have more chance to get prefix bytes shared. + // e.g. we expect 0x7FFF stored as [0x81, 0xFF, 0x7F] but not [0xFF, 0xFF, 0x40] + final int bytesNeeded = (Long.SIZE - Long.numberOfLeadingZeros(l) - 1) / 7 + 1; + l <<= Long.SIZE - bytesNeeded * 7; + for (int i = 1; i < bytesNeeded; i++) { + scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL) | 0x80)); + l = l << 7; + } + scratchBytes.writeByte((byte) (((l >>> 57) & 0x7FL))); + } + + private final class PendingBlock extends PendingEntry { + public final BytesRef prefix; + public final long fp; + public FST index; + public List> subIndices; + public final boolean hasTerms; + public final boolean isFloor; + public final int floorLeadByte; + + PendingBlock(BytesRef prefix, long fp, boolean hasTerms, boolean isFloor, int floorLeadByte, List> subIndices) { + super(false); + this.prefix = prefix; + this.fp = fp; + this.hasTerms = hasTerms; + this.isFloor = isFloor; + this.floorLeadByte = floorLeadByte; + this.subIndices = subIndices; + } + + @Override + public String toString() { + return "BLOCK: prefix=" + ToStringUtils.bytesRefToString(prefix); + } + + public void compileIndex(List blocks, ByteBuffersDataOutput scratchBytes, IntsRefBuilder scratchIntsRef) + throws IOException { + + assert (isFloor && blocks.size() > 1) || (isFloor == false && blocks.size() == 1) : "isFloor=" + isFloor + " blocks=" + blocks; + assert this == blocks.get(0); + + assert scratchBytes.size() == 0; + + // write the leading vLong in MSB order for better outputs sharing in the FST + if (version >= Lucene90BlockTreeTermsReader.VERSION_MSB_VLONG_OUTPUT) { + writeMSBVLong(encodeOutput(fp, hasTerms, isFloor), scratchBytes); + } else { + scratchBytes.writeVLong(encodeOutput(fp, hasTerms, isFloor)); + } + if (isFloor) { + scratchBytes.writeVInt(blocks.size() - 1); + for (int i = 1; i < blocks.size(); i++) { + PendingBlock sub = blocks.get(i); + assert sub.floorLeadByte != -1; + // if (DEBUG) { + // System.out.println(" write floorLeadByte=" + + // Integer.toHexString(sub.floorLeadByte&0xff)); + // } + scratchBytes.writeByte((byte) sub.floorLeadByte); + assert sub.fp > fp; + scratchBytes.writeVLong((sub.fp - fp) << 1 | (sub.hasTerms ? 1 : 0)); + } + } + + long estimateSize = prefix.length; + for (PendingBlock block : blocks) { + if (block.subIndices != null) { + for (FST subIndex : block.subIndices) { + estimateSize += subIndex.numBytes(); + } + } + } + int estimateBitsRequired = PackedInts.bitsRequired(estimateSize); + int pageBits = Math.min(15, Math.max(6, estimateBitsRequired)); + + final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); + final int fstVersion; + if (version >= Lucene90BlockTreeTermsReader.VERSION_CURRENT) { + fstVersion = FST.VERSION_CURRENT; + } else { + fstVersion = FST.VERSION_90; + } + final FSTCompiler fstCompiler = new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE1, outputs) + // Disable suffixes sharing for block tree index because suffixes are mostly dropped + // from the FST index and left in the term blocks. + .suffixRAMLimitMB(0d) + .dataOutput(getOnHeapReaderWriter(pageBits)) + .setVersion(fstVersion) + .build(); + // if (DEBUG) { + // System.out.println(" compile index for prefix=" + prefix); + // } + // indexBuilder.DEBUG = false; + final byte[] bytes = scratchBytes.toArrayCopy(); + assert bytes.length > 0; + fstCompiler.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length)); + scratchBytes.reset(); + + // Copy over index for all sub-blocks + for (PendingBlock block : blocks) { + if (block.subIndices != null) { + for (FST subIndex : block.subIndices) { + append(fstCompiler, subIndex, scratchIntsRef); + } + block.subIndices = null; + } + } + + index = FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()); + + assert subIndices == null; + + /* + Writer w = new OutputStreamWriter(new FileOutputStream("out.dot")); + Util.toDot(index, w, false, false); + System.out.println("SAVED to out.dot"); + w.close(); + */ + } + + // TODO: maybe we could add bulk-add method to + // Builder? Takes FST and unions it w/ current + // FST. + private void append(FSTCompiler fstCompiler, FST subIndex, IntsRefBuilder scratchIntsRef) throws IOException { + final BytesRefFSTEnum subIndexEnum = new BytesRefFSTEnum<>(subIndex); + BytesRefFSTEnum.InputOutput indexEnt; + while ((indexEnt = subIndexEnum.next()) != null) { + // if (DEBUG) { + // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + // + indexEnt.output); + // } + fstCompiler.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output); + } + } + } + + private final ByteBuffersDataOutput scratchBytes = ByteBuffersDataOutput.newResettableInstance(); + private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder(); + + private static class StatsWriter { + + private final DataOutput out; + private final boolean hasFreqs; + private int singletonCount; + + StatsWriter(DataOutput out, boolean hasFreqs) { + this.out = out; + this.hasFreqs = hasFreqs; + } + + void add(int df, long ttf) throws IOException { + // Singletons (DF==1, TTF==1) are run-length encoded + if (df == 1 && (hasFreqs == false || ttf == 1)) { + singletonCount++; + } else { + finish(); + out.writeVInt(df << 1); + if (hasFreqs) { + out.writeVLong(ttf - df); + } + } + } + + void finish() throws IOException { + if (singletonCount > 0) { + out.writeVInt(((singletonCount - 1) << 1) | 1); + singletonCount = 0; + } + } + } + + class TermsWriter { + private final FieldInfo fieldInfo; + private long numTerms; + final FixedBitSet docsSeen; + long sumTotalTermFreq; + long sumDocFreq; + + // Records index into pending where the current prefix at that + // length "started"; for example, if current term starts with 't', + // startsByPrefix[0] is the index into pending for the first + // term/sub-block starting with 't'. We use this to figure out when + // to write a new block: + private final BytesRefBuilder lastTerm = new BytesRefBuilder(); + private int[] prefixStarts = new int[8]; + + // Pending stack of terms and blocks. As terms arrive (in sorted order) + // we append to this stack, and once the top of the stack has enough + // terms starting with a common prefix, we write a new block with + // those terms and replace those terms in the stack with a new block: + private final List pending = new ArrayList<>(); + + // Reused in writeBlocks: + private final List newBlocks = new ArrayList<>(); + + private PendingTerm firstPendingTerm; + private PendingTerm lastPendingTerm; + + /** Writes the top count entries in pending, using prevTerm to compute the prefix. */ + void writeBlocks(int prefixLength, int count) throws IOException { + + assert count > 0; + + // if (DEBUG2) { + // BytesRef br = new BytesRef(lastTerm.bytes()); + // br.length = prefixLength; + // System.out.println("writeBlocks: seg=" + segment + " prefix=" + + // ToStringUtils.bytesRefToString(br) + " count=" + count); + // } + + // Root block better write all remaining pending entries: + assert prefixLength > 0 || count == pending.size(); + + int lastSuffixLeadLabel = -1; + + // True if we saw at least one term in this block (we record if a block + // only points to sub-blocks in the terms index so we can avoid seeking + // to it when we are looking for a term): + boolean hasTerms = false; + boolean hasSubBlocks = false; + + int start = pending.size() - count; + int end = pending.size(); + int nextBlockStart = start; + int nextFloorLeadLabel = -1; + + for (int i = start; i < end; i++) { + + PendingEntry ent = pending.get(i); + + int suffixLeadLabel; + + if (ent.isTerm) { + PendingTerm term = (PendingTerm) ent; + if (term.termBytes.length == prefixLength) { + // Suffix is 0, i.e. prefix 'foo' and term is + // 'foo' so the term has empty string suffix + // in this block + assert lastSuffixLeadLabel == -1 : "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel; + suffixLeadLabel = -1; + } else { + suffixLeadLabel = term.termBytes[prefixLength] & 0xff; + } + } else { + PendingBlock block = (PendingBlock) ent; + assert block.prefix.length > prefixLength; + suffixLeadLabel = block.prefix.bytes[block.prefix.offset + prefixLength] & 0xff; + } + // if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" + + // suffixLeadLabel); + + if (suffixLeadLabel != lastSuffixLeadLabel) { + int itemsInBlock = i - nextBlockStart; + if (itemsInBlock >= minItemsInBlock && end - nextBlockStart > maxItemsInBlock) { + // The count is too large for one block, so we must break it into "floor" blocks, where + // we record + // the leading label of the suffix of the first term in each floor block, so at search + // time we can + // jump to the right floor block. We just use a naive greedy segmenter here: make a new + // floor + // block as soon as we have at least minItemsInBlock. This is not always best: it often + // produces + // a too-small block as the final block: + boolean isFloor = itemsInBlock < count; + newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks)); + + hasTerms = false; + hasSubBlocks = false; + nextFloorLeadLabel = suffixLeadLabel; + nextBlockStart = i; + } + + lastSuffixLeadLabel = suffixLeadLabel; + } + + if (ent.isTerm) { + hasTerms = true; + } else { + hasSubBlocks = true; + } + } + + // Write last block, if any: + if (nextBlockStart < end) { + int itemsInBlock = end - nextBlockStart; + boolean isFloor = itemsInBlock < count; + newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks)); + } + + assert newBlocks.isEmpty() == false; + + PendingBlock firstBlock = newBlocks.get(0); + + assert firstBlock.isFloor || newBlocks.size() == 1; + + firstBlock.compileIndex(newBlocks, scratchBytes, scratchIntsRef); + + // Remove slice from the top of the pending stack, that we just wrote: + pending.subList(pending.size() - count, pending.size()).clear(); + + // Append new block + pending.add(firstBlock); + + newBlocks.clear(); + } + + private boolean allEqual(byte[] b, int startOffset, int endOffset, byte value) { + Objects.checkFromToIndex(startOffset, endOffset, b.length); + for (int i = startOffset; i < endOffset; ++i) { + if (b[i] != value) { + return false; + } + } + return true; + } + + /** + * Writes the specified slice (start is inclusive, end is exclusive) from pending stack as a new + * block. If isFloor is true, there were too many (more than maxItemsInBlock) entries sharing + * the same prefix, and so we broke it into multiple floor blocks where we record the starting + * label of the suffix of each floor block. + */ + private PendingBlock writeBlock( + int prefixLength, + boolean isFloor, + int floorLeadLabel, + int start, + int end, + boolean hasTerms, + boolean hasSubBlocks + ) throws IOException { + + assert end > start; + + long startFP = termsOut.getFilePointer(); + + boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1; + + final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0)); + System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength); + prefix.length = prefixLength; + + // if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + + // ToStringUtils.bytesRefToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + + // " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + + // " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + + // hasSubBlocks); + + // Write block header: + int numEntries = end - start; + int code = numEntries << 1; + if (end == pending.size()) { + // Last block: + code |= 1; + } + termsOut.writeVInt(code); + + /* + if (DEBUG) { + System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + ToStringUtils.bytesRefToString(prefix) + + " entCount="+(end-start+1) +" startFP="+startFP+(isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : "")); + } + */ + + // 1st pass: pack term suffix bytes into byte[] blob + // TODO: cutover to bulk int codec... simple64? + + // We optimize the leaf block case (block has only terms), writing a more + // compact format in this case: + boolean isLeafBlock = hasSubBlocks == false; + + // System.out.println(" isLeaf=" + isLeafBlock); + + final List> subIndices; + + boolean absolute = true; + + if (isLeafBlock) { + // Block contains only ordinary terms: + subIndices = null; + StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS); + for (int i = start; i < end; i++) { + PendingEntry ent = pending.get(i); + assert ent.isTerm : "i=" + i; + + PendingTerm term = (PendingTerm) ent; + + assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix; + BlockTermState state = term.state; + final int suffix = term.termBytes.length - prefixLength; + // if (DEBUG2) { + // BytesRef suffixBytes = new BytesRef(suffix); + // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); + // suffixBytes.length = suffix; + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); + // } + + // For leaf block we write suffix straight + suffixLengthsWriter.writeVInt(suffix); + suffixWriter.append(term.termBytes, prefixLength, suffix); + assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel; + + // Write term stats, to separate byte[] blob: + statsWriter.add(state.docFreq, state.totalTermFreq); + + // Write term meta data + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); + absolute = false; + } + statsWriter.finish(); + } else { + // Block has at least one prefix term or a sub block: + subIndices = new ArrayList<>(); + StatsWriter statsWriter = new StatsWriter(this.statsWriter, fieldInfo.getIndexOptions() != IndexOptions.DOCS); + for (int i = start; i < end; i++) { + PendingEntry ent = pending.get(i); + if (ent.isTerm) { + PendingTerm term = (PendingTerm) ent; + + assert StringHelper.startsWith(term.termBytes, prefix) : term + " prefix=" + prefix; + BlockTermState state = term.state; + final int suffix = term.termBytes.length - prefixLength; + // if (DEBUG2) { + // BytesRef suffixBytes = new BytesRef(suffix); + // System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix); + // suffixBytes.length = suffix; + // System.out.println(" write term suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes)); + // } + + // For non-leaf block we borrow 1 bit to record + // if entry is term or sub-block, and 1 bit to record if + // it's a prefix term. Terms cannot be larger than ~32 KB + // so we won't run out of bits: + + suffixLengthsWriter.writeVInt(suffix << 1); + suffixWriter.append(term.termBytes, prefixLength, suffix); + + // Write term stats, to separate byte[] blob: + statsWriter.add(state.docFreq, state.totalTermFreq); + + // TODO: now that terms dict "sees" these longs, + // we can explore better column-stride encodings + // to encode all long[0]s for this block at + // once, all long[1]s, etc., e.g. using + // Simple64. Alternatively, we could interleave + // stats + meta ... no reason to have them + // separate anymore: + + // Write term meta data + postingsWriter.encodeTerm(metaWriter, fieldInfo, state, absolute); + absolute = false; + } else { + PendingBlock block = (PendingBlock) ent; + assert StringHelper.startsWith(block.prefix, prefix); + final int suffix = block.prefix.length - prefixLength; + assert StringHelper.startsWith(block.prefix, prefix); + + assert suffix > 0; + + // For non-leaf block we borrow 1 bit to record + // if entry is term or sub-block:f + suffixLengthsWriter.writeVInt((suffix << 1) | 1); + suffixWriter.append(block.prefix.bytes, prefixLength, suffix); + + // if (DEBUG2) { + // BytesRef suffixBytes = new BytesRef(suffix); + // System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix); + // suffixBytes.length = suffix; + // System.out.println(" write sub-block suffix=" + + // ToStringUtils.bytesRefToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + + // (startFP-block.fp) + " floor=" + block.isFloor); + // } + + assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel + : "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff); + assert block.fp < startFP; + + suffixLengthsWriter.writeVLong(startFP - block.fp); + subIndices.add(block.index); + } + } + statsWriter.finish(); + + assert subIndices.size() != 0; + } + + // Write suffixes byte[] blob to terms dict output, either uncompressed, compressed with LZ4 + // or with LowercaseAsciiCompression. + CompressionAlgorithm compressionAlg = CompressionAlgorithm.NO_COMPRESSION; + // If there are 2 suffix bytes or less per term, then we don't bother compressing as suffix + // are unlikely what + // makes the terms dictionary large, and it also tends to be frequently the case for dense IDs + // like + // auto-increment IDs, so not compressing in that case helps not hurt ID lookups by too much. + // We also only start compressing when the prefix length is greater than 2 since blocks whose + // prefix length is + // 1 or 2 always all get visited when running a fuzzy query whose max number of edits is 2. + if (suffixWriter.length() > 2L * numEntries && prefixLength > 2) { + // LZ4 inserts references whenever it sees duplicate strings of 4 chars or more, so only try + // it out if the + // average suffix length is greater than 6. + if (suffixWriter.length() > 6L * numEntries) { + if (compressionHashTable == null) { + compressionHashTable = new LZ4.HighCompressionHashTable(); + } + LZ4.compress(suffixWriter.bytes(), 0, suffixWriter.length(), spareWriter, compressionHashTable); + if (spareWriter.size() < suffixWriter.length() - (suffixWriter.length() >>> 2)) { + // LZ4 saved more than 25%, go for it + compressionAlg = CompressionAlgorithm.LZ4; + } + } + if (compressionAlg == CompressionAlgorithm.NO_COMPRESSION) { + spareWriter.reset(); + if (spareBytes.length < suffixWriter.length()) { + spareBytes = new byte[ArrayUtil.oversize(suffixWriter.length(), 1)]; + } + if (LowercaseAsciiCompression.compress(suffixWriter.bytes(), suffixWriter.length(), spareBytes, spareWriter)) { + compressionAlg = CompressionAlgorithm.LOWERCASE_ASCII; + } + } + } + long token = ((long) suffixWriter.length()) << 3; + if (isLeafBlock) { + token |= 0x04; + } + token |= compressionAlg.code; + termsOut.writeVLong(token); + if (compressionAlg == CompressionAlgorithm.NO_COMPRESSION) { + termsOut.writeBytes(suffixWriter.bytes(), suffixWriter.length()); + } else { + spareWriter.copyTo(termsOut); + } + suffixWriter.setLength(0); + spareWriter.reset(); + + // Write suffix lengths + final int numSuffixBytes = Math.toIntExact(suffixLengthsWriter.size()); + spareBytes = ArrayUtil.growNoCopy(spareBytes, numSuffixBytes); + suffixLengthsWriter.copyTo(new ByteArrayDataOutput(spareBytes)); + suffixLengthsWriter.reset(); + if (allEqual(spareBytes, 1, numSuffixBytes, spareBytes[0])) { + // Structured fields like IDs often have most values of the same length + termsOut.writeVInt((numSuffixBytes << 1) | 1); + termsOut.writeByte(spareBytes[0]); + } else { + termsOut.writeVInt(numSuffixBytes << 1); + termsOut.writeBytes(spareBytes, numSuffixBytes); + } + + // Stats + final int numStatsBytes = Math.toIntExact(statsWriter.size()); + termsOut.writeVInt(numStatsBytes); + statsWriter.copyTo(termsOut); + statsWriter.reset(); + + // Write term meta data byte[] blob + termsOut.writeVInt((int) metaWriter.size()); + metaWriter.copyTo(termsOut); + metaWriter.reset(); + + // if (DEBUG) { + // System.out.println(" fpEnd=" + out.getFilePointer()); + // } + + if (hasFloorLeadLabel) { + // We already allocated to length+1 above: + prefix.bytes[prefix.length++] = (byte) floorLeadLabel; + } + + return new PendingBlock(prefix, startFP, hasTerms, isFloor, floorLeadLabel, subIndices); + } + + TermsWriter(FieldInfo fieldInfo) { + this.fieldInfo = fieldInfo; + assert fieldInfo.getIndexOptions() != IndexOptions.NONE; + docsSeen = new FixedBitSet(maxDoc); + postingsWriter.setField(fieldInfo); + } + + /** Writes one term's worth of postings. */ + public void write(BytesRef text, TermsEnum termsEnum, NormsProducer norms) throws IOException { + /* + if (DEBUG) { + int[] tmp = new int[lastTerm.length]; + System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length); + System.out.println("BTTW: write term=" + ToStringUtils.bytesRefToString(text) + " prefixStarts=" + Arrays.toString(tmp) + + " pending.size()=" + pending.size()); + } + */ + + BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen, norms); + if (state != null) { + + assert state.docFreq != 0; + assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq + : "postingsWriter=" + postingsWriter; + pushTerm(text); + + PendingTerm term = new PendingTerm(text, state); + pending.add(term); + // if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" + + // pending.size()); + + sumDocFreq += state.docFreq; + sumTotalTermFreq += state.totalTermFreq; + numTerms++; + if (firstPendingTerm == null) { + firstPendingTerm = term; + } + lastPendingTerm = term; + } + } + + /** Pushes the new term to the top of the stack, and writes new blocks. */ + private void pushTerm(BytesRef text) throws IOException { + // Find common prefix between last term and current term: + int prefixLength = Arrays.mismatch(lastTerm.bytes(), 0, lastTerm.length(), text.bytes, text.offset, text.offset + text.length); + if (prefixLength == -1) { // Only happens for the first term, if it is empty + assert lastTerm.length() == 0; + prefixLength = 0; + } + + // if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length); + + // Close the "abandoned" suffix now: + for (int i = lastTerm.length() - 1; i >= prefixLength; i--) { + + // How many items on top of the stack share the current suffix + // we are closing: + int prefixTopSize = pending.size() - prefixStarts[i]; + if (prefixTopSize >= minItemsInBlock) { + // if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + + // " minItemsInBlock=" + minItemsInBlock); + writeBlocks(i + 1, prefixTopSize); + prefixStarts[i] -= prefixTopSize - 1; + } + } + + if (prefixStarts.length < text.length) { + prefixStarts = ArrayUtil.grow(prefixStarts, text.length); + } + + // Init new tail: + for (int i = prefixLength; i < text.length; i++) { + prefixStarts[i] = pending.size(); + } + + lastTerm.copyBytes(text); + } + + // Finishes all terms in this field + public void finish() throws IOException { + if (numTerms > 0) { + // if (DEBUG) System.out.println("BTTW: finish prefixStarts=" + + // Arrays.toString(prefixStarts)); + + // Add empty term to force closing of all final blocks: + pushTerm(new BytesRef()); + + // TODO: if pending.size() is already 1 with a non-zero prefix length + // we can save writing a "degenerate" root block, but we have to + // fix all the places that assume the root block's prefix is the empty string: + pushTerm(new BytesRef()); + writeBlocks(0, pending.size()); + + // We better have one final "root" block: + assert pending.size() == 1 && pending.get(0).isTerm == false : "pending.size()=" + pending.size() + " pending=" + pending; + final PendingBlock root = (PendingBlock) pending.get(0); + assert root.prefix.length == 0; + final BytesRef rootCode = root.index.getEmptyOutput(); + assert rootCode != null; + + ByteBuffersDataOutput metaOut = new ByteBuffersDataOutput(); + fields.add(metaOut); + + metaOut.writeVInt(fieldInfo.number); + metaOut.writeVLong(numTerms); + metaOut.writeVInt(rootCode.length); + metaOut.writeBytes(rootCode.bytes, rootCode.offset, rootCode.length); + assert fieldInfo.getIndexOptions() != IndexOptions.NONE; + if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) { + metaOut.writeVLong(sumTotalTermFreq); + } + metaOut.writeVLong(sumDocFreq); + metaOut.writeVInt(docsSeen.cardinality()); + writeBytesRef(metaOut, new BytesRef(firstPendingTerm.termBytes)); + writeBytesRef(metaOut, new BytesRef(lastPendingTerm.termBytes)); + metaOut.writeVLong(indexOut.getFilePointer()); + // Write FST to index + root.index.save(metaOut, indexOut); + // System.out.println(" write FST " + indexStartFP + " field=" + fieldInfo.name); + + /* + if (DEBUG) { + final String dotFileName = segment + "_" + fieldInfo.name + ".dot"; + Writer w = new OutputStreamWriter(new FileOutputStream(dotFileName)); + Util.toDot(root.index, w, false, false); + System.out.println("SAVED to " + dotFileName); + w.close(); + } + */ + + } else { + assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS && sumTotalTermFreq == -1; + assert sumDocFreq == 0; + assert docsSeen.cardinality() == 0; + } + } + + private final ByteBuffersDataOutput suffixLengthsWriter = ByteBuffersDataOutput.newResettableInstance(); + private final BytesRefBuilder suffixWriter = new BytesRefBuilder(); + private final ByteBuffersDataOutput statsWriter = ByteBuffersDataOutput.newResettableInstance(); + private final ByteBuffersDataOutput metaWriter = ByteBuffersDataOutput.newResettableInstance(); + private final ByteBuffersDataOutput spareWriter = ByteBuffersDataOutput.newResettableInstance(); + private byte[] spareBytes = BytesRef.EMPTY_BYTES; + private LZ4.HighCompressionHashTable compressionHashTable; + } + + private boolean closed; + + @Override + public void close() throws IOException { + if (closed) { + return; + } + closed = true; + + boolean success = false; + try { + metaOut.writeVInt(fields.size()); + for (ByteBuffersDataOutput fieldMeta : fields) { + fieldMeta.copyTo(metaOut); + } + CodecUtil.writeFooter(indexOut); + metaOut.writeLong(indexOut.getFilePointer()); + CodecUtil.writeFooter(termsOut); + metaOut.writeLong(termsOut.getFilePointer()); + CodecUtil.writeFooter(metaOut); + success = true; + } finally { + if (success) { + IOUtils.close(metaOut, termsOut, indexOut, postingsWriter); + } else { + IOUtils.closeWhileHandlingException(metaOut, termsOut, indexOut, postingsWriter); + } + } + } + + private static void writeBytesRef(DataOutput out, BytesRef bytes) throws IOException { + out.writeVInt(bytes.length); + out.writeBytes(bytes.bytes, bytes.offset, bytes.length); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index 5f290a72ba833..964e7c5f43000 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -35,9 +35,11 @@ import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataAccessHint; +import org.apache.lucene.store.FileDataHint; +import org.apache.lucene.store.FileTypeHint; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; @@ -106,7 +108,7 @@ public class ES816BinaryQuantizedVectorsReader extends FlatVectorsReader { ES816BinaryQuantizedVectorsFormat.VECTOR_DATA_CODEC_NAME, // Quantized vectors are accessed randomly from their node ID stored in the HNSW // graph. - state.context.withReadAdvice(ReadAdvice.RANDOM) + state.context.withHints(FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM) ); success = true; } finally { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java index a0cd6dbf65688..ebe226bbaa2d0 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java @@ -30,10 +30,9 @@ import org.apache.lucene.store.FlushInfo; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MergeInfo; -import org.apache.lucene.store.ReadAdvice; +import org.elasticsearch.common.util.set.Sets; import java.io.IOException; -import java.util.Optional; import java.util.Set; /** @@ -67,7 +66,40 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio return new Lucene99FlatVectorsWriter(state, vectorsScorer); } - private static final IOContext DIRECT_IO_CONTEXT = new IOContext() { + @Override + public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { + // only override the context for the random-access use case + SegmentReadState directIOState = state.context.context() == IOContext.Context.DEFAULT + ? new SegmentReadState( + state.directory, + state.segmentInfo, + state.fieldInfos, + new DirectIOContext(state.context.hints()), + state.segmentSuffix + ) + : state; + // Use mmap for merges and direct I/O for searches. + // TODO: Open the mmap file with sequential access instead of random (current behavior). + return new MergeReaderWrapper( + new Lucene99FlatVectorsReader(directIOState, vectorsScorer), + new Lucene99FlatVectorsReader(state, vectorsScorer) + ); + } + + @Override + public String toString() { + return "Lucene99FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')'; + } + + static class DirectIOContext implements IOContext { + + final Set hints; + + DirectIOContext(Set hints) { + // always add DirectIOHint to the hints given + this.hints = Sets.union(hints, Set.of(DirectIOHint.INSTANCE)); + } + @Override public Context context() { return Context.DEFAULT; @@ -85,44 +117,12 @@ public FlushInfo flushInfo() { @Override public Set hints() { - return Set.of(DirectIOHint.INSTANCE); + return hints; } @Override public IOContext withHints(FileOpenHint... hints) { - return this; - } - - @Override - public Optional readAdvice() { - return Optional.empty(); - } - - @Override - public IOContext withReadAdvice(ReadAdvice advice) { - return this; + return new DirectIOContext(Set.of(hints)); } - }; - - @Override - public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { - SegmentReadState directIOState = new SegmentReadState( - state.directory, - state.segmentInfo, - state.fieldInfos, - DIRECT_IO_CONTEXT, - state.segmentSuffix - ); - // Use mmap for merges and direct I/O for searches. - // TODO: Open the mmap file with sequential access instead of random (current behavior). - return new MergeReaderWrapper( - new Lucene99FlatVectorsReader(directIOState, vectorsScorer), - new Lucene99FlatVectorsReader(state, vectorsScorer) - ); - } - - @Override - public String toString() { - return "Lucene99FlatVectorsFormat(" + "vectorsScorer=" + vectorsScorer + ')'; } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index 8e547f9b3fa20..6d7eee650e902 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -35,9 +35,11 @@ import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataAccessHint; +import org.apache.lucene.store.FileDataHint; +import org.apache.lucene.store.FileTypeHint; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.RamUsageEstimator; @@ -107,7 +109,7 @@ public class ES818BinaryQuantizedVectorsReader extends FlatVectorsReader { ES818BinaryQuantizedVectorsFormat.VECTOR_DATA_CODEC_NAME, // Quantized vectors are accessed randomly from their node ID stored in the HNSW // graph. - state.context.withReadAdvice(ReadAdvice.RANDOM) + state.context.withHints(FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM) ); success = true; } finally { diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index af28bc3bb32d3..d4f2fcb23baab 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -27,13 +27,13 @@ import org.apache.lucene.store.BufferedChecksum; import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataAccessHint; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Lock; import org.apache.lucene.store.NIOFSDirectory; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; @@ -153,7 +153,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref // while equivalent, these different read once contexts are checked by identity in directory implementations private static IOContext createReadOnceContext() { - var context = IOContext.READONCE.withReadAdvice(ReadAdvice.SEQUENTIAL); + var context = IOContext.READONCE.withHints(DataAccessHint.SEQUENTIAL); assert context != IOContext.READONCE; assert context.equals(IOContext.READONCE); return context; diff --git a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 1fbdaea9c772a..971db6dcc032c 100644 --- a/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/server/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -2,3 +2,4 @@ org.elasticsearch.index.codec.Elasticsearch814Codec org.elasticsearch.index.codec.Elasticsearch816Codec org.elasticsearch.index.codec.Elasticsearch900Codec org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec +org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java index fdcfe5e3720f8..58b847d7a87a1 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java @@ -12,7 +12,7 @@ import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; @@ -326,7 +326,7 @@ public void testTriangle() throws Exception { public void testCompletionField() throws Exception { IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true) .setUseCompoundFile(false) - .setCodec(new Lucene101Codec(Lucene101Codec.Mode.BEST_SPEED) { + .setCodec(new Lucene103Codec(Lucene103Codec.Mode.BEST_SPEED) { @Override public PostingsFormat getPostingsFormatForField(String field) { if (field.startsWith("suggest_")) { @@ -432,25 +432,25 @@ private static void addFieldsToDoc(Document doc, IndexableField[] fields) { enum CodecMode { BEST_SPEED { @Override - Lucene101Codec.Mode mode() { - return Lucene101Codec.Mode.BEST_SPEED; + Lucene103Codec.Mode mode() { + return Lucene103Codec.Mode.BEST_SPEED; } }, BEST_COMPRESSION { @Override - Lucene101Codec.Mode mode() { - return Lucene101Codec.Mode.BEST_COMPRESSION; + Lucene103Codec.Mode mode() { + return Lucene103Codec.Mode.BEST_COMPRESSION; } }; - abstract Lucene101Codec.Mode mode(); + abstract Lucene103Codec.Mode mode(); } static void indexRandomly(Directory directory, CodecMode codecMode, int numDocs, Consumer addFields) throws IOException { IndexWriterConfig config = new IndexWriterConfig().setCommitOnClose(true) .setUseCompoundFile(randomBoolean()) - .setCodec(new Lucene101Codec(codecMode.mode())); + .setCodec(new Lucene103Codec(codecMode.mode())); try (IndexWriter writer = new IndexWriter(directory, config)) { for (int i = 0; i < numDocs; i++) { final Document doc = new Document(); @@ -662,7 +662,7 @@ static void rewriteIndexWithPerFieldCodec(Directory source, CodecMode mode, Dire try (DirectoryReader reader = DirectoryReader.open(source)) { IndexWriterConfig config = new IndexWriterConfig().setSoftDeletesField(Lucene.SOFT_DELETES_FIELD) .setUseCompoundFile(randomBoolean()) - .setCodec(new Lucene101Codec(mode.mode()) { + .setCodec(new Lucene103Codec(mode.mode()) { @Override public PostingsFormat getPostingsFormatForField(String field) { return new ES812PostingsFormat(); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java b/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java index 55ca666d8588b..b44c9e06bc059 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/search/function/MinScoreScorerTests.java @@ -142,7 +142,7 @@ public float getMaxScore(int upTo) throws IOException { random(), new ScoreMode[] { ScoreMode.COMPLETE, ScoreMode.TOP_SCORES, ScoreMode.TOP_DOCS_WITH_SCORES } ); - final Scorer assertingScorer = AssertingScorer.wrap(random(), scorer, scoreMode, true); + final Scorer assertingScorer = AssertingScorer.wrap(scorer, scoreMode.needsScores(), true); if (twoPhase && randomBoolean()) { return hideTwoPhaseIterator(assertingScorer); } else { diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index 23ee616d54231..a2ff440facaf0 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -50,7 +50,7 @@ public void testResolveDefaultCodecs() throws Exception { assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG); CodecService codecService = createCodecService(); assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class)); - assertThat(codecService.codec("default"), instanceOf(Elasticsearch900Lucene101Codec.class)); + assertThat(codecService.codec("default"), instanceOf(Elasticsearch92Lucene103Codec.class)); } public void testDefault() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java index 0941ac17d6a90..2afddca3f0876 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/PerFieldMapperCodecTests.java @@ -10,7 +10,7 @@ package org.elasticsearch.index.codec; import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat; +import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.settings.Settings; @@ -94,8 +94,8 @@ public void testUseBloomFilter() throws IOException { assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(ES87BloomFilterPostingsFormat.class)); assertThat(perFieldMapperCodec.useBloomFilter("another_field"), is(false)); - Class expectedPostingsFormat = PerFieldFormatSupplier.USE_LUCENE101_POSTINGS_FORMAT.isEnabled() - && timeSeries == false ? Lucene101PostingsFormat.class : ES812PostingsFormat.class; + Class expectedPostingsFormat = PerFieldFormatSupplier.USE_DEFAULT_LUCENE_POSTINGS_FORMAT.isEnabled() + && timeSeries == false ? Lucene103PostingsFormat.class : ES812PostingsFormat.class; assertThat(perFieldMapperCodec.getPostingsFormatForField("another_field"), instanceOf(expectedPostingsFormat)); } @@ -110,8 +110,8 @@ public void testUseBloomFilterWithTimestampFieldEnabled() throws IOException { public void testUseBloomFilterWithTimestampFieldEnabled_noTimeSeriesMode() throws IOException { PerFieldFormatSupplier perFieldMapperCodec = createFormatSupplier(true, false, false); assertThat(perFieldMapperCodec.useBloomFilter("_id"), is(false)); - Class expectedPostingsFormat = PerFieldFormatSupplier.USE_LUCENE101_POSTINGS_FORMAT.isEnabled() - ? Lucene101PostingsFormat.class + Class expectedPostingsFormat = PerFieldFormatSupplier.USE_DEFAULT_LUCENE_POSTINGS_FORMAT.isEnabled() + ? Lucene103PostingsFormat.class : ES812PostingsFormat.class; assertThat(perFieldMapperCodec.getPostingsFormatForField("_id"), instanceOf(expectedPostingsFormat)); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java index b11ab47102288..f59e075d6ec5a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/postings/ES812PostingsFormatTests.java @@ -19,10 +19,10 @@ */ package org.elasticsearch.index.codec.postings; +import org.apache.lucene.backward_codecs.lucene90.blocktree.FieldReader; +import org.apache.lucene.backward_codecs.lucene90.blocktree.Stats; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.CompetitiveImpactAccumulator; -import org.apache.lucene.codecs.lucene90.blocktree.FieldReader; -import org.apache.lucene.codecs.lucene90.blocktree.Stats; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.DirectoryReader; diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java index f0ce28f11a51a..e711f032a9f68 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesCodecDuelTests.java @@ -26,7 +26,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.elasticsearch.test.ESTestCase; @@ -55,7 +55,7 @@ public void testDuel() throws IOException { baselineConfig.setCodec(TestUtil.alwaysDocValuesFormat(new Lucene90DocValuesFormat())); var contenderConf = newIndexWriterConfig(); contenderConf.setMergePolicy(mergePolicy); - Codec codec = new Elasticsearch900Lucene101Codec() { + Codec codec = new Elasticsearch92Lucene103Codec() { final DocValuesFormat docValuesFormat = randomBoolean() ? new ES819TSDBDocValuesFormat() diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java index a219ebb3740cc..f9e3313d012c4 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesFormatTests.java @@ -12,7 +12,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.DocValuesConsumer; -import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.SortedDocValuesField; @@ -34,9 +33,9 @@ import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.index.BaseDocValuesFormatTestCase; import org.apache.lucene.tests.index.RandomIndexWriter; +import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.logging.LogConfigurator; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import java.io.IOException; import java.util.ArrayList; @@ -53,7 +52,6 @@ public class ES87TSDBDocValuesFormatTests extends BaseDocValuesFormatTestCase { private static final int NUM_DOCS = 10; static { - // For Elasticsearch900Lucene101Codec: LogConfigurator.loadLog4jPlugins(); LogConfigurator.configureESLogging(); } @@ -74,13 +72,7 @@ public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOExcept } } - private final Codec codec = new Elasticsearch900Lucene101Codec() { - - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - return new TestES87TSDBDocValuesFormat(); - } - }; + private final Codec codec = TestUtil.alwaysDocValuesFormat(new TestES87TSDBDocValuesFormat()); @Override protected Codec getCodec() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java index 9c41e7a80ed66..f9426347e7ca1 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/TsdbDocValueBwcTests.java @@ -33,7 +33,7 @@ import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.core.SuppressForbidden; import org.elasticsearch.index.codec.Elasticsearch816Codec; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests.TestES87TSDBDocValuesFormat; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; @@ -64,7 +64,7 @@ public DocValuesFormat getDocValuesFormatForField(String field) { return docValuesFormat; } }; - var newCodec = new Elasticsearch900Lucene101Codec() { + var newCodec = new Elasticsearch92Lucene103Codec() { final DocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 368d6f23d0fa1..ff27e1de5b82d 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -10,7 +10,6 @@ package org.elasticsearch.index.codec.tsdb.es819; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericDocValuesField; @@ -25,9 +24,9 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.metadata.DataStream; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import java.util.Arrays; @@ -35,15 +34,7 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { - private final Codec codec = new Elasticsearch900Lucene101Codec() { - - final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat(); - - @Override - public DocValuesFormat getDocValuesFormatForField(String field) { - return docValuesFormat; - } - }; + final Codec codec = TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat()); @Override protected Codec getCodec() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java index 9ea13750d8cd7..6b6c3056f4cb5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormatTests.java @@ -10,9 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; @@ -23,6 +21,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import java.io.IOException; @@ -36,14 +35,11 @@ public class ES813FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase { LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES813FlatVectorFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES813FlatVectorFormat(); - } - }; + return codec; } public void testSearchWithVisitedLimit() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java index 6a29bf08ae9a5..402f793ed7987 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormatTests.java @@ -10,9 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; @@ -23,6 +21,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import java.io.IOException; @@ -36,14 +35,11 @@ public class ES813Int8FlatVectorFormatTests extends BaseKnnVectorsFormatTestCase LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES813Int8FlatVectorFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES813Int8FlatVectorFormat(); - } - }; + return codec; } public void testSearchWithVisitedLimit() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java index b064321a70ae8..355dd07934eca 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java @@ -10,9 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -29,6 +27,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import java.io.IOException; @@ -45,14 +44,11 @@ public class ES814HnswScalarQuantizedVectorsFormatTests extends BaseKnnVectorsFo LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES814HnswScalarQuantizedVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES814HnswScalarQuantizedVectorsFormat(); - } - }; + return codec; } // The following test scenarios are similar to their superclass namesakes, diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java index c770a62479d38..c65e43e100c8a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormatTests.java @@ -10,9 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnByteVectorField; @@ -23,20 +21,18 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; import org.junit.Before; import java.io.IOException; public class ES815BitFlatVectorFormatTests extends BaseKnnBitVectorsFormatTestCase { + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES815BitFlatVectorFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES815BitFlatVectorFormat(); - } - }; + return codec; } @Before diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java index 544dccc73c070..fce4fdf748452 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormatTests.java @@ -10,9 +10,7 @@ package org.elasticsearch.index.codec.vectors; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnByteVectorField; @@ -23,20 +21,18 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; import org.junit.Before; import java.io.IOException; public class ES815HnswBitVectorsFormatTests extends BaseKnnBitVectorsFormatTestCase { + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES815HnswBitVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES815HnswBitVectorsFormat(); - } - }; + return codec; } @Before diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java index 6aec7b595ce8b..07e2b0568c6fd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; @@ -43,6 +42,7 @@ import org.apache.lucene.search.TotalHits; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; @@ -62,14 +62,11 @@ public class ES816BinaryQuantizedVectorsFormatTests extends BaseKnnVectorsFormat LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES816BinaryQuantizedRWVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES816BinaryQuantizedRWVectorsFormat(); - } - }; + return codec; } public void testSearch() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index a80240e228efc..0bba6e7248068 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; @@ -39,6 +38,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; @@ -59,14 +59,11 @@ public class ES816HnswBinaryQuantizedVectorsFormatTests extends BaseKnnVectorsFo LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES816HnswBinaryQuantizedRWVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES816HnswBinaryQuantizedRWVectorsFormat(); - } - }; + return codec; } public void testToString() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java index 26bd905270f83..b52924e3f995c 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; import org.apache.lucene.document.KnnFloatVectorField; @@ -43,6 +42,7 @@ import org.apache.lucene.search.TotalHits; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.common.logging.LogConfigurator; import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; @@ -63,14 +63,11 @@ public class ES818BinaryQuantizedVectorsFormatTests extends BaseKnnVectorsFormat LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES818BinaryQuantizedVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES818BinaryQuantizedVectorsFormat(); - } - }; + return codec; } public void testSearch() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java index cf7b710ef3093..3ba7a13b5a59e 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader; import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; import org.apache.lucene.document.Document; @@ -39,6 +38,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; +import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.SameThreadExecutorService; import org.elasticsearch.common.logging.LogConfigurator; @@ -59,14 +59,11 @@ public class ES818HnswBinaryQuantizedVectorsFormatTests extends BaseKnnVectorsFo LogConfigurator.configureESLogging(); // native access requires logging to be initialized } + final Codec codec = TestUtil.alwaysKnnVectorsFormat(new ES818HnswBinaryQuantizedVectorsFormat()); + @Override protected Codec getCodec() { - return new Lucene101Codec() { - @Override - public KnnVectorsFormat getKnnVectorsFormatForField(String field) { - return new ES818HnswBinaryQuantizedVectorsFormat(); - } - }; + return codec; } public void testToString() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/zstd/StoredFieldCodecDuelTests.java b/server/src/test/java/org/elasticsearch/index/codec/zstd/StoredFieldCodecDuelTests.java index 0e5732ec09e5b..34b360d797930 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/zstd/StoredFieldCodecDuelTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/zstd/StoredFieldCodecDuelTests.java @@ -10,7 +10,7 @@ package org.elasticsearch.index.codec.zstd; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; +import org.apache.lucene.codecs.lucene103.Lucene103Codec; import org.apache.lucene.document.Document; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.DirectoryReader; @@ -35,13 +35,13 @@ public class StoredFieldCodecDuelTests extends ESTestCase { private static final String DOUBLE_FIELD = "double_field_5"; public void testDuelBestSpeed() throws IOException { - var baseline = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, null, BigArrays.NON_RECYCLING_INSTANCE); + var baseline = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, null, BigArrays.NON_RECYCLING_INSTANCE); var contender = new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, null, BigArrays.NON_RECYCLING_INSTANCE); doTestDuel(baseline, contender); } public void testDuelBestCompression() throws IOException { - var baseline = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, null, BigArrays.NON_RECYCLING_INSTANCE); + var baseline = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_COMPRESSION, null, BigArrays.NON_RECYCLING_INSTANCE); var contender = new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, null, BigArrays.NON_RECYCLING_INSTANCE); doTestDuel(baseline, contender); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java index b6fefcb9a4e98..f89fa52256e15 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestCompressionStoredFieldsFormatTests.java @@ -11,11 +11,11 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; public class Zstd814BestCompressionStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase { - private final Codec codec = new Elasticsearch900Lucene101Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION); + private final Codec codec = new Elasticsearch92Lucene103Codec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION); @Override protected Codec getCodec() { diff --git a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java index 98318707f6c4b..f3d120ed185e7 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/zstd/Zstd814BestSpeedStoredFieldsFormatTests.java @@ -11,11 +11,11 @@ import org.apache.lucene.codecs.Codec; import org.apache.lucene.tests.index.BaseStoredFieldsFormatTestCase; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; public class Zstd814BestSpeedStoredFieldsFormatTests extends BaseStoredFieldsFormatTestCase { - private final Codec codec = new Elasticsearch900Lucene101Codec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED); + private final Codec codec = new Elasticsearch92Lucene103Codec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED); @Override protected Codec getCodec() { diff --git a/server/src/test/java/org/elasticsearch/index/engine/CompletionStatsCacheTests.java b/server/src/test/java/org/elasticsearch/index/engine/CompletionStatsCacheTests.java index 1343078906d6f..835a6457973fe 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/CompletionStatsCacheTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/CompletionStatsCacheTests.java @@ -8,8 +8,6 @@ */ package org.elasticsearch.index.engine; -import org.apache.lucene.codecs.PostingsFormat; -import org.apache.lucene.codecs.lucene101.Lucene101Codec; import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; @@ -17,6 +15,7 @@ import org.apache.lucene.search.suggest.document.Completion101PostingsFormat; import org.apache.lucene.search.suggest.document.SuggestField; import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.TestUtil; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.cache.query.TrivialQueryCachingPolicy; @@ -44,13 +43,7 @@ public void testExceptionsAreNotCached() { public void testCompletionStatsCache() throws IOException, InterruptedException { final IndexWriterConfig indexWriterConfig = newIndexWriterConfig(); - final PostingsFormat postingsFormat = new Completion101PostingsFormat(); - indexWriterConfig.setCodec(new Lucene101Codec() { - @Override - public PostingsFormat getPostingsFormatForField(String field) { - return postingsFormat; // all fields are suggest fields - } - }); + indexWriterConfig.setCodec(TestUtil.alwaysPostingsFormat(new Completion101PostingsFormat())); try (Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig)) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 16c2c5ca5ddb8..aa444b58bc580 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -317,7 +317,7 @@ public void testRangeQuery() throws IOException { Query expected = new IndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) - ); + ).rewrite(newSearcher(new MultiReader())); assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader()))); MappedFieldType ft2 = new DateFieldType("field", false); diff --git a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java index 0d7f16211aa51..9f2fb0d91a1cc 100644 --- a/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/query/QueryPhaseTests.java @@ -815,7 +815,8 @@ public void testNumericSortOptimization() throws Exception { final SortAndFormats formatsLongDate = new SortAndFormats(sortLongDate, new DocValueFormat[] { DocValueFormat.RAW, dvFormatDate }); final SortAndFormats formatsDateLong = new SortAndFormats(sortDateLong, new DocValueFormat[] { dvFormatDate, DocValueFormat.RAW }); - Query q = LongPoint.newRangeQuery(fieldNameLong, startLongValue, startLongValue + numDocs); + // query all but one doc to avoid optimizations that may rewrite to a MatchAllDocs, which simplifies assertions + Query q = LongPoint.newRangeQuery(fieldNameLong, startLongValue, startLongValue + numDocs - 2); // 1. Test sort optimization on long field try (TestSearchContext searchContext = createContext(newContextSearcher(reader), q)) { @@ -883,7 +884,7 @@ public void testNumericSortOptimization() throws Exception { QueryPhase.addCollectorsAndSearch(searchContext); assertTrue(searchContext.sort().sort.getSort()[0].getOptimizeSortWithPoints()); assertThat(searchContext.queryResult().topDocs().topDocs.scoreDocs, arrayWithSize(0)); - assertThat(searchContext.queryResult().topDocs().topDocs.totalHits.value(), equalTo((long) numDocs)); + assertThat(searchContext.queryResult().topDocs().topDocs.totalHits.value(), equalTo((long) numDocs - 1)); assertThat(searchContext.queryResult().topDocs().topDocs.totalHits.relation(), equalTo(TotalHits.Relation.EQUAL_TO)); } @@ -994,8 +995,7 @@ public void testMinScore() throws Exception { QueryPhase.addCollectorsAndSearch(context); TotalHits totalHits = context.queryResult().topDocs().topDocs.totalHits; assertThat(totalHits.value(), greaterThanOrEqualTo(5L)); - var expectedRelation = totalHits.value() == 10 ? Relation.EQUAL_TO : Relation.GREATER_THAN_OR_EQUAL_TO; - assertThat(totalHits.relation(), is(expectedRelation)); + assertThat(totalHits.relation(), is(Relation.GREATER_THAN_OR_EQUAL_TO)); } } diff --git a/server/src/test/java/org/elasticsearch/search/vectors/RescoreKnnVectorQueryTests.java b/server/src/test/java/org/elasticsearch/search/vectors/RescoreKnnVectorQueryTests.java index 05b7bc9ef4f82..bab62994e2a06 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/RescoreKnnVectorQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/RescoreKnnVectorQueryTests.java @@ -29,7 +29,7 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.Weight; import org.apache.lucene.store.Directory; -import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.vectors.ES813Int8FlatVectorFormat; import org.elasticsearch.index.codec.vectors.ES814HnswScalarQuantizedVectorsFormat; import org.elasticsearch.index.codec.vectors.es818.ES818BinaryQuantizedVectorsFormat; @@ -211,7 +211,7 @@ private static void addRandomDocuments(int numDocs, Directory d, int numDims) th new ES813Int8FlatVectorFormat(), new ES814HnswScalarQuantizedVectorsFormat() ); - iwc.setCodec(new Elasticsearch900Lucene101Codec(randomFrom(Zstd814StoredFieldsFormat.Mode.values())) { + iwc.setCodec(new Elasticsearch92Lucene103Codec(randomFrom(Zstd814StoredFieldsFormat.Mode.values())) { @Override public KnnVectorsFormat getKnnVectorsFormatForField(String field) { return format; diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index eea25b85b8548..49b79dc0a1350 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -19,7 +19,8 @@ public enum FeatureFlag { TIME_SERIES_MODE("es.index_mode_feature_flag_registered=true", Version.fromString("8.0.0"), null), SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null), DOC_VALUES_SKIPPER("es.doc_values_skipper_feature_flag_enabled=true", Version.fromString("8.18.1"), null), - USE_LUCENE101_POSTINGS_FORMAT("es.use_lucene101_postings_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null); + USE_LUCENE101_POSTINGS_FORMAT("es.use_lucene101_postings_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null), + USE_LUCENE1013POSTINGS_FORMAT("es.use_lucene103_postings_format_feature_flag_enabled=true", Version.fromString("9.2.0"), null); public final String systemProperty; public final Version from; From 09a6f9c8aace73eefff12e5b006026f96be92986 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 23 May 2025 17:26:50 +0100 Subject: [PATCH 044/184] Add an integration test to verify DirectIO is used (#128370) --- .../elasticsearch/index/store/DirectIOIT.java | 85 +++++++++++++++++++ .../index/store/FsDirectoryFactory.java | 1 + 2 files changed, 86 insertions(+) create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java new file mode 100644 index 0000000000000..a968f7d0bb81b --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java @@ -0,0 +1,85 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.store; + +import org.apache.logging.log4j.Level; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.search.vectors.KnnSearchBuilder; +import org.elasticsearch.search.vectors.VectorData; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.test.InternalSettingsPlugin; +import org.elasticsearch.test.MockLog; +import org.elasticsearch.test.junit.annotations.TestLogging; + +import java.util.Collection; +import java.util.List; +import java.util.stream.IntStream; + +@LuceneTestCase.SuppressCodecs("*") // only use our own codecs +public class DirectIOIT extends ESIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return List.of(InternalSettingsPlugin.class); + } + + private void indexVectors() { + internalCluster().startNode(); + prepareCreate("vectors").setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false)) + .setMapping(""" + { + "properties": { + "vector": { + "type": "dense_vector", + "dims": 64, + "element_type": "float", + "index": true, + "similarity": "l2_norm", + "index_options": { + "type": "bbq_flat" + } + } + } + } + """) + .get(); + ensureGreen("vectors"); + + for (int i = 0; i < 1000; i++) { + indexDoc("vectors", Integer.toString(i), "vector", IntStream.range(0, 64).mapToDouble(d -> randomFloat()).toArray()); + } + } + + @TestLogging(value = "org.elasticsearch.index.store.FsDirectoryFactory:DEBUG", reason = "to capture trace logging for direct IO") + public void testDirectIOUsed() { + try (MockLog mockLog = MockLog.capture(FsDirectoryFactory.class)) { + // we're just looking for some evidence direct IO is used + mockLog.addExpectation( + new MockLog.PatternSeenEventExpectation( + "Direct IO used", + FsDirectoryFactory.class.getCanonicalName(), + Level.DEBUG, + "Opening .*\\.vec with direct IO" + ) + ); + + indexVectors(); + + // do a search + prepareSearch("vectors").setKnnSearch( + List.of(new KnnSearchBuilder("vector", new VectorData(null, new byte[64]), 10, 20, null, null)) + ).get(); + + mockLog.assertAllExpectationsMatched(); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index f02c7b6c16158..0561af50cfbd9 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -157,6 +157,7 @@ public IndexInput openInput(String name, IOContext context) throws IOException { if (directIODelegate != null && context.hints().contains(DirectIOHint.INSTANCE)) { ensureOpen(); ensureCanRead(name); + Log.debug("Opening {} with direct IO", name); return directIODelegate.openInput(name, context); } else if (useDelegate(name, context)) { // we need to do these checks on the outer directory since the inner doesn't know about pending deletes From 062b47757be8563a2262e7f50718e663e434e87b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 24 May 2025 06:13:34 +0000 Subject: [PATCH 045/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-2fa9a91b222 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 9f465bbaace19..2ddf4c0b93a97 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-b74c6681b5f +lucene = 10.3.0-snapshot-2fa9a91b222 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 10b090e5cf3fd..4656df0d75b6c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 09342ab3a74ecefde17b0a8a7e620ea1facef0b9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 25 May 2025 06:13:24 +0000 Subject: [PATCH 046/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-2fa9a91b222 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4656df0d75b6c..11b8059d29a38 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 6eb3ea6da5972352b81184b05969cb2d62452ac6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 26 May 2025 06:14:04 +0000 Subject: [PATCH 047/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-ac002e2c4e3 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 2ddf4c0b93a97..a171b5a398f8d 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-2fa9a91b222 +lucene = 10.3.0-snapshot-ac002e2c4e3 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 11b8059d29a38..ac38d7d63519e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 62283d5a2de8a90d428231f225bc33cc2cc7a35b Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Mon, 26 May 2025 04:42:43 -0700 Subject: [PATCH 048/184] Add raw OffHeapByteSize to IVFVectorsReader (#128451) --- .../index/codec/vectors/IVFVectorsReader.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java index d8d68569d6159..d130519c7d482 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java @@ -35,6 +35,7 @@ import org.elasticsearch.search.vectors.IVFKnnSearchStrategy; import java.io.IOException; +import java.util.Map; import java.util.function.IntPredicate; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; @@ -323,6 +324,22 @@ abstract NeighborQueue scorePostingLists( int nProbe ) throws IOException; + @Override + public Map getOffHeapByteSize(FieldInfo fieldInfo) { + var raw = rawVectorsReader.getOffHeapByteSize(fieldInfo); + FieldEntry fe = fields.get(fieldInfo.number); + if (fe == null) { + assert fieldInfo.getVectorEncoding() == VectorEncoding.BYTE; + return raw; + } + return raw; // for now just return the size of raw + + // TODO: determine desired off off-heap requirements + // var centroids = Map.of(EXTENSION, fe.xxxLength()); + // var clusters = Map.of(EXTENSION, fe.yyyLength()); + // return KnnVectorsReader.mergeOffHeapByteSizeMaps(raw, centroids, clusters); + } + @Override public void close() throws IOException { IOUtils.close(rawVectorsReader, ivfCentroids, ivfClusters); From f77b0eef978608f6c13010023b788b6b044b35ce Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Mon, 26 May 2025 16:10:25 +0100 Subject: [PATCH 049/184] lucene_snapshot: Mute RangeAggregatorTests.testRuntimeFieldTopLevelQueryNot --- .../search/aggregations/bucket/range/RangeAggregatorTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index ab92ea8593445..837dae1d56293 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -596,6 +596,7 @@ public void testOverlappingRanges() throws IOException { * But the union operation overhead that comes with combining the range with * the top level query tends to slow us down more than the standard aggregator. */ + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128471") public void testRuntimeFieldTopLevelQueryNotOptimized() throws IOException { long totalDocs = (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 4; SearchLookup lookup = new SearchLookup(s -> null, (ft, l, ftd) -> null, (ctx, doc) -> null); From 6d8ace619df08742b4899a1abffb008cc0d26988 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 27 May 2025 06:11:50 +0000 Subject: [PATCH 050/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-e865d0076e4 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index a171b5a398f8d..e49ebcd852a61 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-ac002e2c4e3 +lucene = 10.3.0-snapshot-e865d0076e4 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ac38d7d63519e..0d319e8fa7497 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 015581e2786e4332e67d72b9e05c80e9d7500e0d Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Tue, 27 May 2025 12:16:29 +0100 Subject: [PATCH 051/184] lucene_snapshot: Mute ScriptedSimilarityTests.testInitScript --- .../elasticsearch/index/similarity/ScriptedSimilarityTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java index fa5f713dfd672..413fa656494e9 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java @@ -152,6 +152,7 @@ public double execute( dir.close(); } + @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128502") public void testInitScript() throws IOException { final AtomicBoolean initCalled = new AtomicBoolean(); SimilarityWeightScript.Factory weightScriptFactory = () -> { From 53a5f0a819ef4956c0c93d3f47f3e042a011d479 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Tue, 27 May 2025 07:11:15 -0700 Subject: [PATCH 052/184] Fix intermittent failure of DirectIOIT (#128463) --- .../elasticsearch/index/store/DirectIOIT.java | 53 +++++++++++-------- 1 file changed, 30 insertions(+), 23 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java index a968f7d0bb81b..b5ba406b68abf 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java @@ -24,6 +24,9 @@ import java.util.List; import java.util.stream.IntStream; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; + @LuceneTestCase.SuppressCodecs("*") // only use our own codecs public class DirectIOIT extends ESIntegTestCase { @@ -33,30 +36,31 @@ protected Collection> nodePlugins() { } private void indexVectors() { - internalCluster().startNode(); - prepareCreate("vectors").setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false)) - .setMapping(""" - { - "properties": { - "vector": { - "type": "dense_vector", - "dims": 64, - "element_type": "float", - "index": true, - "similarity": "l2_norm", - "index_options": { - "type": "bbq_flat" + assertAcked( + prepareCreate("foo-vectors").setSettings(Settings.builder().put(InternalSettingsPlugin.USE_COMPOUND_FILE.getKey(), false)) + .setMapping(""" + { + "properties": { + "fooVector": { + "type": "dense_vector", + "dims": 64, + "element_type": "float", + "index": true, + "similarity": "l2_norm", + "index_options": { + "type": "bbq_flat" + } + } } } - } - } - """) - .get(); - ensureGreen("vectors"); + """) + ); + ensureGreen("foo-vectors"); for (int i = 0; i < 1000; i++) { - indexDoc("vectors", Integer.toString(i), "vector", IntStream.range(0, 64).mapToDouble(d -> randomFloat()).toArray()); + indexDoc("foo-vectors", Integer.toString(i), "fooVector", IntStream.range(0, 64).mapToDouble(d -> randomFloat()).toArray()); } + refresh(); } @TestLogging(value = "org.elasticsearch.index.store.FsDirectoryFactory:DEBUG", reason = "to capture trace logging for direct IO") @@ -75,11 +79,14 @@ public void testDirectIOUsed() { indexVectors(); // do a search - prepareSearch("vectors").setKnnSearch( - List.of(new KnnSearchBuilder("vector", new VectorData(null, new byte[64]), 10, 20, null, null)) - ).get(); - + var knn = List.of(new KnnSearchBuilder("fooVector", new VectorData(null, new byte[64]), 10, 20, null, null)); + assertHitCount(prepareSearch("foo-vectors").setKnnSearch(knn), 10); mockLog.assertAllExpectationsMatched(); } } + + @Override + protected boolean addMockFSIndexStore() { + return false; // we require to always use the "real" hybrid directory + } } From 79589b4f91da1e64a0f5c438986de9c4921eac5a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 28 May 2025 06:12:50 +0000 Subject: [PATCH 053/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-be23f36590a --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e49ebcd852a61..ed82099e1c121 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-e865d0076e4 +lucene = 10.3.0-snapshot-be23f36590a bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 0d319e8fa7497..b8f2ec9074136 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 31c08d0b7b86adfc580efa495e02bb43e29db380 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Wed, 28 May 2025 03:19:45 -0700 Subject: [PATCH 054/184] DirectIOIT requires filesystem that supports Direct IO (#128524) DirectIOIT requires a filesystem that supports Direct IO. The Elasticsearch CI builds and tests on a tmpfs, /dev/shm/bk, which does not support O_DIRECT. --- .../elasticsearch/index/store/DirectIOIT.java | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java index b5ba406b68abf..cfb7de6c81d88 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java @@ -10,6 +10,11 @@ package org.elasticsearch.index.store; import org.apache.logging.log4j.Level; +import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexOutput; import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.plugins.Plugin; @@ -19,9 +24,13 @@ import org.elasticsearch.test.InternalSettingsPlugin; import org.elasticsearch.test.MockLog; import org.elasticsearch.test.junit.annotations.TestLogging; +import org.junit.BeforeClass; +import java.io.IOException; +import java.nio.file.Path; import java.util.Collection; import java.util.List; +import java.util.OptionalLong; import java.util.stream.IntStream; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; @@ -30,6 +39,25 @@ @LuceneTestCase.SuppressCodecs("*") // only use our own codecs public class DirectIOIT extends ESIntegTestCase { + @BeforeClass + public static void checkSupported() throws IOException { + Path path = createTempDir("directIOProbe"); + try (Directory dir = open(path); IndexOutput out = dir.createOutput("out", IOContext.DEFAULT)) { + out.writeString("test"); + } catch (IOException e) { + assumeNoException("test requires filesystem that supports Direct IO", e); + } + } + + static DirectIODirectory open(Path path) throws IOException { + return new DirectIODirectory(FSDirectory.open(path)) { + @Override + protected boolean useDirectIO(String name, IOContext context, OptionalLong fileLength) { + return true; + } + }; + } + @Override protected Collection> nodePlugins() { return List.of(InternalSettingsPlugin.class); From be627e93c7e1d243b2516753c5fe68c598307971 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Wed, 28 May 2025 12:03:20 +0100 Subject: [PATCH 055/184] lucene_snapshot: Mute org.elasticsearch.xpack.esql.qa.single_node.PushQueriesIT --- .../elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java index aff75d5647589..849d17629a2b0 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java @@ -11,6 +11,7 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; +import org.apache.lucene.tests.util.LuceneTestCase; import org.apache.lucene.tests.util.TimeUnits; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; @@ -53,6 +54,7 @@ */ @ThreadLeakFilters(filters = TestClustersThreadFilter.class) @TimeoutSuite(millis = 10 * TimeUnits.MINUTE) // semantic_text can take a long, long time to start in CI +@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128506") public class PushQueriesIT extends ESRestTestCase { @ClassRule public static ElasticsearchCluster cluster = Clusters.testCluster(); From 46255caa09c3a036e76ca20926c31125638afd63 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Wed, 28 May 2025 05:58:15 -0700 Subject: [PATCH 056/184] Fix ScriptedSimilarityTests by allowing for BulkScoreBulkScorer calling TermScorer::nextDocsAndScores (#128561) --- .../index/similarity/ScriptedSimilarityTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java b/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java index 413fa656494e9..47b1b59306019 100644 --- a/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java +++ b/server/src/test/java/org/elasticsearch/index/similarity/ScriptedSimilarityTests.java @@ -152,7 +152,6 @@ public double execute( dir.close(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128502") public void testInitScript() throws IOException { final AtomicBoolean initCalled = new AtomicBoolean(); SimilarityWeightScript.Factory weightScriptFactory = () -> { @@ -189,7 +188,8 @@ public double execute( StackTraceElement[] stackTraceElements = Thread.currentThread().getStackTrace(); if (Arrays.stream(stackTraceElements).anyMatch(ste -> { - return ste.getClassName().endsWith(".TermScorer") && ste.getMethodName().equals("score"); + return ste.getClassName().endsWith(".TermScorer") + && (ste.getMethodName().equals("score") || ste.getMethodName().equals("nextDocsAndScores")); }) == false) { // this might happen when computing max scores return Float.MAX_VALUE; From 9cfe649dce363bea2ab275128a5997d7f5bba2ac Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 29 May 2025 06:15:14 +0000 Subject: [PATCH 057/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-4c9bef5a268 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index ed82099e1c121..5cd6aa92c67b8 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-be23f36590a +lucene = 10.3.0-snapshot-4c9bef5a268 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b8f2ec9074136..d1347a5141074 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From d5997f4949cefc049b5c9c0b7ed6b6e2909ea6fc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 30 May 2025 06:12:20 +0000 Subject: [PATCH 058/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7b014446738 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5cd6aa92c67b8..5ab10105ea879 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-4c9bef5a268 +lucene = 10.3.0-snapshot-7b014446738 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index d1347a5141074..1c8c787aeb712 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From a6551410ad88e0e32ad9e8062f6628d8dc39181a Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Fri, 30 May 2025 13:51:47 +0100 Subject: [PATCH 059/184] Update IOContext.READONCE to use the new ReadOnceHint.INSTANCE --- .../index/codec/vectors/es818/MergeReaderWrapper.java | 4 +++- server/src/main/java/org/elasticsearch/index/store/Store.java | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java index eef9f5cc28b07..e26784ecfdd96 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java @@ -86,7 +86,9 @@ public Collection getChildResources() { @Override public Map getOffHeapByteSize(FieldInfo fieldInfo) { - return mainReader.getOffHeapByteSize(fieldInfo); + // TODO: https://github.com/elastic/elasticsearch/issues/128672 + // return mainReader.getOffHeapByteSize(fieldInfo); + return Map.of(); // no off-heap when using direct IO } @Override diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index d4f2fcb23baab..f7f8d4275156f 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -34,6 +34,7 @@ import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Lock; import org.apache.lucene.store.NIOFSDirectory; +import org.apache.lucene.store.ReadOnceHint; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; @@ -153,7 +154,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref // while equivalent, these different read once contexts are checked by identity in directory implementations private static IOContext createReadOnceContext() { - var context = IOContext.READONCE.withHints(DataAccessHint.SEQUENTIAL); + var context = IOContext.READONCE.withHints(DataAccessHint.SEQUENTIAL, ReadOnceHint.INSTANCE); assert context != IOContext.READONCE; assert context.equals(IOContext.READONCE); return context; From 816095c72b618d4fee5480737f2b1686e0fa518d Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Fri, 30 May 2025 05:55:10 -0700 Subject: [PATCH 060/184] Fix expected queries in PushQueriesIT (#128608) This commit fixes the expected query in PushQueriesIT, when the range query for foo == 1 rewrites to a MatchAllDocsQuery (*:*), when the data in the index can be proven to be within range. --- .../xpack/esql/qa/single_node/PushQueriesIT.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java index 0aef9a9a9f503..f442654c3aff7 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java @@ -10,7 +10,6 @@ import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import org.apache.lucene.tests.util.LuceneTestCase; import org.elasticsearch.client.Request; import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; @@ -52,7 +51,6 @@ * Tests for pushing queries to lucene. */ @ThreadLeakFilters(filters = TestClustersThreadFilter.class) -@LuceneTestCase.AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128506") public class PushQueriesIT extends ESRestTestCase { @ClassRule public static ElasticsearchCluster cluster = Clusters.testCluster(spec -> spec.plugin("inference-service-test")); @@ -147,8 +145,8 @@ public void testEqualityAndOther() throws IOException { | WHERE test == "%value" AND foo == 1 """; List luceneQueryOptions = switch (type) { - case "text", "auto" -> List.of("#test.keyword:%value -_ignored:test.keyword #foo:[1 TO 1]"); - case "match_only_text" -> List.of("foo:[1 TO 1]"); + case "text", "auto" -> List.of("#test.keyword:%value -_ignored:test.keyword"); + case "match_only_text" -> List.of("*:*"); case "semantic_text" -> /* * single_value_match is here because there are extra documents hiding in the index From 13cbb83ec1a2fe9d54c816a0ab3e8f1807892dde Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 31 May 2025 06:12:55 +0000 Subject: [PATCH 061/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b916623fcfa --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5ab10105ea879..3069b2340e7a0 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-7b014446738 +lucene = 10.3.0-snapshot-b916623fcfa bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 1c8c787aeb712..a49ab4626577f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 693d5bc4d4f74232e2c02894822824b3694c3d56 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 1 Jun 2025 06:13:46 +0000 Subject: [PATCH 062/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-0386d68349d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 3069b2340e7a0..5233ac01c399b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-b916623fcfa +lucene = 10.3.0-snapshot-0386d68349d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a49ab4626577f..9946b84245798 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From a99cb33edddf425cf9303cf4d26be7136e1b6e36 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 2 Jun 2025 06:13:49 +0000 Subject: [PATCH 063/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-0386d68349d --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9946b84245798..4006445e1a10b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From c79150f15c9caacaa7fd39f142f6c04ee8f124da Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 3 Jun 2025 06:13:19 +0000 Subject: [PATCH 064/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f2eb4d9fe29 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5233ac01c399b..e2bfa1d9f13dd 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-0386d68349d +lucene = 10.3.0-snapshot-f2eb4d9fe29 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4006445e1a10b..04ce4cd327f5f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c34de40f5a15e2ad51479639cc66a9c27cfa84e6 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Tue, 3 Jun 2025 08:32:37 +0100 Subject: [PATCH 065/184] Only use direct IO when opening with Default IOContext and using the hybrid directory (#128697) This commit updates the DIO format so that it only use direct IO when opening with Default IOContext and using the hybrid directory. --- .../DirectIOLucene99FlatVectorsFormat.java | 29 ++++++++++++------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java index ebe226bbaa2d0..2a542f6b77864 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/DirectIOLucene99FlatVectorsFormat.java @@ -31,6 +31,7 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MergeInfo; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.index.store.FsDirectoryFactory; import java.io.IOException; import java.util.Set; @@ -66,24 +67,30 @@ public FlatVectorsWriter fieldsWriter(SegmentWriteState state) throws IOExceptio return new Lucene99FlatVectorsWriter(state, vectorsScorer); } + static boolean shouldUseDirectIO(SegmentReadState state) { + return FsDirectoryFactory.isHybridFs(state.directory); + } + @Override public FlatVectorsReader fieldsReader(SegmentReadState state) throws IOException { - // only override the context for the random-access use case - SegmentReadState directIOState = state.context.context() == IOContext.Context.DEFAULT - ? new SegmentReadState( + if (shouldUseDirectIO(state) && state.context.context() == IOContext.Context.DEFAULT) { + // only override the context for the random-access use case + SegmentReadState directIOState = new SegmentReadState( state.directory, state.segmentInfo, state.fieldInfos, new DirectIOContext(state.context.hints()), state.segmentSuffix - ) - : state; - // Use mmap for merges and direct I/O for searches. - // TODO: Open the mmap file with sequential access instead of random (current behavior). - return new MergeReaderWrapper( - new Lucene99FlatVectorsReader(directIOState, vectorsScorer), - new Lucene99FlatVectorsReader(state, vectorsScorer) - ); + ); + // Use mmap for merges and direct I/O for searches. + // TODO: Open the mmap file with sequential access instead of random (current behavior). + return new MergeReaderWrapper( + new Lucene99FlatVectorsReader(directIOState, vectorsScorer), + new Lucene99FlatVectorsReader(state, vectorsScorer) + ); + } else { + return new Lucene99FlatVectorsReader(state, vectorsScorer); + } } @Override From e99a3a2c4c12f5abc8101d332e9a8265adb5f82e Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 3 Jun 2025 13:54:16 +0100 Subject: [PATCH 066/184] Use a hint to indicate reading file footers only (#128816) --- .../index/store/FsDirectoryFactory.java | 4 +--- .../org/elasticsearch/index/store/Store.java | 23 ++++++++++--------- .../store/SearchableSnapshotDirectory.java | 2 +- .../ChecksumBlobContainerIndexInput.java | 2 +- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index 0561af50cfbd9..3b74a4a49d66d 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -163,8 +163,6 @@ public IndexInput openInput(String name, IOContext context) throws IOException { // we need to do these checks on the outer directory since the inner doesn't know about pending deletes ensureOpen(); ensureCanRead(name); - // we switch the context here since mmap checks for the READONCE context by identity - context = context == Store.READONCE_CHECKSUM ? IOContext.READONCE : context; // we only use the mmap to open inputs. Everything else is managed by the NIOFSDirectory otherwise // we might run into trouble with files that are pendingDelete in one directory but still // listed in listAll() from the other. We on the other hand don't want to list files from both dirs @@ -191,7 +189,7 @@ private static String getExtension(String name) { } static boolean useDelegate(String name, IOContext ioContext) { - if (ioContext == Store.READONCE_CHECKSUM) { + if (ioContext.hints().contains(Store.FileFooterOnly.INSTANCE)) { // If we're just reading the footer for the checksum then mmap() isn't really necessary, and it's desperately inefficient // if pre-loading is enabled on this file. return false; diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java index f7f8d4275156f..27d186137958b 100644 --- a/server/src/main/java/org/elasticsearch/index/store/Store.java +++ b/server/src/main/java/org/elasticsearch/index/store/Store.java @@ -146,19 +146,22 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref Property.IndexScope ); + /** + * A {@link org.apache.lucene.store.IOContext.FileOpenHint} that we will only read the Lucene file footer + */ + public enum FileFooterOnly implements IOContext.FileOpenHint { + INSTANCE + } + /** * Specific {@link IOContext} indicating that we will read only the Lucene file footer (containing the file checksum) * See {@link MetadataSnapshot#checksumFromLuceneFile}. */ - public static final IOContext READONCE_CHECKSUM = createReadOnceContext(); - - // while equivalent, these different read once contexts are checked by identity in directory implementations - private static IOContext createReadOnceContext() { - var context = IOContext.READONCE.withHints(DataAccessHint.SEQUENTIAL, ReadOnceHint.INSTANCE); - assert context != IOContext.READONCE; - assert context.equals(IOContext.READONCE); - return context; - } + public static final IOContext READONCE_CHECKSUM = IOContext.READONCE.withHints( + DataAccessHint.SEQUENTIAL, + ReadOnceHint.INSTANCE, + FileFooterOnly.INSTANCE + ); private final AtomicBoolean isClosed = new AtomicBoolean(false); private final StoreDirectory directory; @@ -935,8 +938,6 @@ private static void checksumFromLuceneFile( boolean readFileAsHash, BytesRef writerUuid ) throws IOException { - // We select the read once context carefully here since these constants, while equivalent are - // checked by identity in the different directory implementations. var context = file.startsWith(IndexFileNames.SEGMENTS) ? IOContext.READONCE : READONCE_CHECKSUM; try (IndexInput in = directory.openInput(file, context)) { final long length = in.length(); diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java index d62443e492605..7c229381a5a7c 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/SearchableSnapshotDirectory.java @@ -377,7 +377,7 @@ public IndexInput openInput(final String name, final IOContext context) throws I final BytesRef content = fileInfo.metadata().hash(); return new ByteArrayIndexInput("ByteArrayIndexInput(" + name + ')', content.bytes, content.offset, content.length); } - if (context == Store.READONCE_CHECKSUM) { + if (context.hints().contains(Store.FileFooterOnly.INSTANCE)) { return ChecksumBlobContainerIndexInput.create(fileInfo.physicalName(), fileInfo.length(), fileInfo.checksum(), context); } diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/ChecksumBlobContainerIndexInput.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/ChecksumBlobContainerIndexInput.java index 552c65a6f2550..4636090158988 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/ChecksumBlobContainerIndexInput.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/store/input/ChecksumBlobContainerIndexInput.java @@ -115,7 +115,7 @@ private int checksumPositionOrThrow(long pos) { } private static void ensureReadOnceChecksumContext(IOContext context) { - if (context != Store.READONCE_CHECKSUM) { + if (context.hints().contains(Store.FileFooterOnly.INSTANCE) == false) { assert false : "expected READONCE_CHECKSUM but got " + context; throw new IllegalArgumentException("ChecksumBlobContainerIndexInput should only be used with READONCE_CHECKSUM context"); } From 4f1a2147747b2a0ca0465c59c2437629d865c298 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 4 Jun 2025 06:13:54 +0000 Subject: [PATCH 067/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f2eb4d9fe29 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 04ce4cd327f5f..ddd4a4a9a9190 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From a36d2997089cd26b4819897c27ea092d90cca274 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Wed, 4 Jun 2025 14:18:43 +0100 Subject: [PATCH 068/184] Revert changes for lucene workaround #128671 (#128891) --- ...eFieldMapperDocValuesSkipperBenchmark.java | 6 +- .../resources/forbidden/es-all-signatures.txt | 8 - .../extras/ScaledFloatFieldTypeTests.java | 4 +- server/src/main/java/module-info.java | 1 - .../index/mapper/DateFieldMapper.java | 4 +- .../index/mapper/NumberFieldMapper.java | 11 +- .../lucene/document/NumericField.java | 52 -- ...xSortSortedNumericDocValuesRangeQuery.java | 699 ------------------ .../bucket/filter/QueryToFilterAdapter.java | 6 +- .../index/mapper/DateFieldTypeTests.java | 6 +- .../index/mapper/NumberFieldTypeTests.java | 13 +- .../query/MatchPhraseQueryBuilderTests.java | 4 +- .../query/MultiMatchQueryBuilderTests.java | 4 +- .../index/query/TermQueryBuilderTests.java | 4 +- .../lucene/TimeSeriesSourceOperatorTests.java | 4 +- .../unsignedlong/UnsignedLongFieldMapper.java | 4 +- 16 files changed, 35 insertions(+), 795 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/lucene/document/NumericField.java delete mode 100644 server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java index a732986ebcbab..2110d26463260 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/search/query/range/DateFieldMapperDocValuesSkipperBenchmark.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -28,7 +29,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.BytesRef; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -295,7 +295,7 @@ public void rangeQueryWithDocValuesSkipper(final Blackhole bh) throws IOExceptio /** * Runs the actual Lucene range query, optionally combining a {@link LongPoint} index query * with doc values ({@link SortedNumericDocValuesField}) via {@link IndexOrDocValuesQuery}, - * and then wrapping it with an {@link XIndexSortSortedNumericDocValuesRangeQuery} to utilize the index sort. + * and then wrapping it with an {@link IndexSortSortedNumericDocValuesRangeQuery} to utilize the index sort. * * @param searcher the Lucene {@link IndexSearcher} * @param rangeStartTimestamp lower bound of the timestamp range @@ -316,7 +316,7 @@ private long rangeQuery(final IndexSearcher searcher, long rangeStartTimestamp, ) : SortedNumericDocValuesField.newSlowRangeQuery(TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp); - final Query query = new XIndexSortSortedNumericDocValuesRangeQuery( + final Query query = new IndexSortSortedNumericDocValuesRangeQuery( TIMESTAMP_FIELD, rangeStartTimestamp, rangeEndTimestamp, diff --git a/build-tools-internal/src/main/resources/forbidden/es-all-signatures.txt b/build-tools-internal/src/main/resources/forbidden/es-all-signatures.txt index b372f54d63bd8..8da297fda6b61 100644 --- a/build-tools-internal/src/main/resources/forbidden/es-all-signatures.txt +++ b/build-tools-internal/src/main/resources/forbidden/es-all-signatures.txt @@ -61,11 +61,3 @@ org.apache.logging.log4j.message.ParameterizedMessage#(java.lang.String, j @defaultMessage Use WriteLoadForecaster#getForecastedWriteLoad instead org.elasticsearch.cluster.metadata.IndexMetadata#getForecastedWriteLoad() - -# This is a temporary patch as there is a low level Lucene bug in certain scenarios -# this should be fixed in the new Lucene release 10.3+ -org.apache.lucene.document.LongField#newExactQuery(java.lang.String, long) @ Use org.elasticsearch.lucene.document.NumericField#newExactLongQuery(java.lang.String, long) instead. -org.apache.lucene.document.LongField#newRangeQuery(java.lang.String, long, long) @ Use org.elasticsearch.lucene.document.NumericField#newRangeLongQuery(java.lang.String, long, long) instead. -org.apache.lucene.document.IntField#newExactQuery(java.lang.String, int) @ Use org.elasticsearch.lucene.document.NumericField#newExactIntQuery(java.lang.String, int) instead. -org.apache.lucene.document.IntField#newRangeQuery(java.lang.String, int, int) @ Use org.elasticsearch.lucene.document.NumericField#newRangeIntQuery(java.lang.String, int, int) instead. -org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery @ use org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery instead. diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java index 31a150a41b227..f753d4b91f501 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldTypeTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -30,7 +31,6 @@ import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; -import org.elasticsearch.lucene.document.NumericField; import java.io.IOException; import java.util.Arrays; @@ -48,7 +48,7 @@ public void testTermQuery() { ); double value = (randomDouble() * 2 - 1) * 10000; long scaledValue = Math.round(value * ft.getScalingFactor()); - assertEquals(NumericField.newExactLongQuery("scaled_float", scaledValue), ft.termQuery(value, MOCK_CONTEXT)); + assertEquals(LongField.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, MOCK_CONTEXT)); MappedFieldType ft2 = new ScaledFloatFieldMapper.ScaledFloatFieldType("scaled_float", 0.1 + randomDouble() * 100, false); ElasticsearchException e2 = expectThrows(ElasticsearchException.class, () -> ft2.termQuery("42", MOCK_CONTEXT_DISALLOW_EXPENSIVE)); diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index 3f4d0fc5e2ed6..5f51852934809 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -479,5 +479,4 @@ exports org.elasticsearch.plugins.internal.rewriter to org.elasticsearch.inference; exports org.elasticsearch.lucene.util.automaton; exports org.elasticsearch.index.codec.perfield; - exports org.elasticsearch.lucene.search; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index db3b1e87fb66f..3511c8dc19321 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.geo.ShapeRelation; @@ -49,7 +50,6 @@ import org.elasticsearch.index.query.DateRangeIncludingNowQuery; import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.DateFieldScript; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptCompiler; @@ -750,7 +750,7 @@ public Query rangeQuery( query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); } if (hasDocValues() && context.indexSortedOnField(name())) { - query = new XIndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } return query; }); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index e263ebcfeced0..cc37f7b3f5082 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -46,8 +47,6 @@ import org.elasticsearch.index.fielddata.plain.SortedNumericIndexFieldData; import org.elasticsearch.index.mapper.TimeSeriesParams.MetricType; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.lucene.document.NumericField; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.DoubleFieldScript; import org.elasticsearch.script.LongFieldScript; import org.elasticsearch.script.Script; @@ -1147,7 +1146,7 @@ public Query termQuery(String field, Object value, boolean isIndexed, boolean ha int v = parse(value, true); if (isIndexed && hasDocValues) { - return NumericField.newExactIntQuery(field, v); + return IntField.newExactQuery(field, v); } else if (isIndexed) { return IntPoint.newExactQuery(field, v); } else { @@ -1224,7 +1223,7 @@ public Query rangeQuery( query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); } if (hasDocValues && context.indexSortedOnField(field)) { - query = new XIndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } return query; } @@ -1331,7 +1330,7 @@ public Query termQuery(String field, Object value, boolean isIndexed, boolean ha long v = parse(value, true); if (isIndexed && hasDocValues) { - return NumericField.newExactLongQuery(field, v); + return LongField.newExactQuery(field, v); } else if (isIndexed) { return LongPoint.newExactQuery(field, v); } else { @@ -1382,7 +1381,7 @@ public Query rangeQuery( query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); } if (hasDocValues && context.indexSortedOnField(field)) { - query = new XIndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } return query; }); diff --git a/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java b/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java deleted file mode 100644 index d60244767dd92..0000000000000 --- a/server/src/main/java/org/elasticsearch/lucene/document/NumericField.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.lucene.document; - -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.search.IndexOrDocValuesQuery; -import org.apache.lucene.search.PointRangeQuery; -import org.apache.lucene.search.Query; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; - -public final class NumericField { - - private NumericField() { - // Utility class, no instantiation - } - - public static Query newExactLongQuery(String field, long value) { - return newRangeLongQuery(field, value, value); - } - - public static Query newRangeLongQuery(String field, long lowerValue, long upperValue) { - PointRangeQuery.checkArgs(field, lowerValue, upperValue); - Query fallbackQuery = new IndexOrDocValuesQuery( - LongPoint.newRangeQuery(field, lowerValue, upperValue), - SortedNumericDocValuesField.newSlowRangeQuery(field, lowerValue, upperValue) - ); - return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, fallbackQuery); - } - - public static Query newExactIntQuery(String field, int value) { - return newRangeIntQuery(field, value, value); - } - - public static Query newRangeIntQuery(String field, int lowerValue, int upperValue) { - PointRangeQuery.checkArgs(field, lowerValue, upperValue); - Query fallbackQuery = new IndexOrDocValuesQuery( - IntPoint.newRangeQuery(field, lowerValue, upperValue), - SortedNumericDocValuesField.newSlowRangeQuery(field, lowerValue, upperValue) - ); - return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, fallbackQuery); - } - -} diff --git a/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java b/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java deleted file mode 100644 index bc892a9c778e5..0000000000000 --- a/server/src/main/java/org/elasticsearch/lucene/search/XIndexSortSortedNumericDocValuesRangeQuery.java +++ /dev/null @@ -1,699 +0,0 @@ -/* - * @notice - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * Modifications copyright (C) 2025 Elasticsearch B.V. - */ - -package org.elasticsearch.lucene.search; - -import org.apache.lucene.document.IntPoint; -import org.apache.lucene.document.LongPoint; -import org.apache.lucene.index.DocValues; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.PointValues.IntersectVisitor; -import org.apache.lucene.index.PointValues.PointTree; -import org.apache.lucene.index.PointValues.Relation; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.search.ConstantScoreScorer; -import org.apache.lucene.search.ConstantScoreWeight; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.FieldComparator; -import org.apache.lucene.search.FieldExistsQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.LeafFieldComparator; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Pruning; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryVisitor; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.ScorerSupplier; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortField.Type; -import org.apache.lucene.search.SortedNumericSortField; -import org.apache.lucene.search.Weight; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.ArrayUtil.ByteArrayComparator; -import org.apache.lucene.util.Version; - -import java.io.IOException; -import java.util.ArrayDeque; -import java.util.Deque; -import java.util.Objects; - -/** - * copied from Lucene - */ -public class XIndexSortSortedNumericDocValuesRangeQuery extends Query { - - private final String field; - private final long lowerValue; - private final long upperValue; - private final Query fallbackQuery; - - /** - * Creates a new {@link XIndexSortSortedNumericDocValuesRangeQuery}. - * - * @param field The field name. - * @param lowerValue The lower end of the range (inclusive). - * @param upperValue The upper end of the range (exclusive). - * @param fallbackQuery A query to fall back to if the optimization cannot be applied. - */ - public XIndexSortSortedNumericDocValuesRangeQuery(String field, long lowerValue, long upperValue, Query fallbackQuery) { - // we should only have this while the apache Lucene version is 10.2 or earlier - assert Version.LATEST.major == 10 && Version.LATEST.minor <= 2 - : "This query should only be used with Lucene 10.2 or earlier, but got version: " + Version.LATEST; - this.field = Objects.requireNonNull(field); - this.lowerValue = lowerValue; - this.upperValue = upperValue; - this.fallbackQuery = fallbackQuery; - } - - public Query getFallbackQuery() { - return fallbackQuery; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - XIndexSortSortedNumericDocValuesRangeQuery that = (XIndexSortSortedNumericDocValuesRangeQuery) o; - return lowerValue == that.lowerValue - && upperValue == that.upperValue - && Objects.equals(field, that.field) - && Objects.equals(fallbackQuery, that.fallbackQuery); - } - - @Override - public int hashCode() { - return Objects.hash(field, lowerValue, upperValue, fallbackQuery); - } - - @Override - public void visit(QueryVisitor visitor) { - if (visitor.acceptField(field)) { - visitor.visitLeaf(this); - fallbackQuery.visit(visitor); - } - } - - @Override - public String toString(String field) { - StringBuilder b = new StringBuilder(); - if (this.field.equals(field) == false) { - b.append(this.field).append(":"); - } - return b.append("[").append(lowerValue).append(" TO ").append(upperValue).append("]").toString(); - } - - @Override - public Query rewrite(IndexSearcher indexSearcher) throws IOException { - if (lowerValue == Long.MIN_VALUE && upperValue == Long.MAX_VALUE) { - return new FieldExistsQuery(field); - } - - Query rewrittenFallback = fallbackQuery.rewrite(indexSearcher); - if (rewrittenFallback.getClass() == MatchAllDocsQuery.class) { - return new MatchAllDocsQuery(); - } - if (rewrittenFallback == fallbackQuery) { - return this; - } else { - return new XIndexSortSortedNumericDocValuesRangeQuery(field, lowerValue, upperValue, rewrittenFallback); - } - } - - @Override - public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { - Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost); - - return new ConstantScoreWeight(this, boost) { - - @Override - public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException { - IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context); - if (itAndCount != null) { - DocIdSetIterator disi = itAndCount.it; - return new ScorerSupplier() { - @Override - public Scorer get(long leadCost) throws IOException { - return new ConstantScoreScorer(score(), scoreMode, disi); - } - - @Override - public long cost() { - return disi.cost(); - } - }; - } - return fallbackWeight.scorerSupplier(context); - } - - @Override - public boolean isCacheable(LeafReaderContext ctx) { - // Both queries should always return the same values, so we can just check - // if the fallback query is cacheable. - return fallbackWeight.isCacheable(ctx); - } - - @Override - public int count(LeafReaderContext context) throws IOException { - if (context.reader().hasDeletions() == false) { - if (lowerValue > upperValue) { - return 0; - } - IteratorAndCount itAndCount = null; - LeafReader reader = context.reader(); - - // first use bkd optimization if possible - SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(reader, field); - NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); - PointValues pointValues = reader.getPointValues(field); - if (pointValues != null && pointValues.getDocCount() == reader.maxDoc()) { - itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues); - } - if (itAndCount != null && itAndCount.count != -1) { - return itAndCount.count; - } - - // use index sort optimization if possible - Sort indexSort = reader.getMetaData().sort(); - if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getField().equals(field)) { - final SortField sortField = indexSort.getSort()[0]; - final SortField.Type sortFieldType = getSortFieldType(sortField); - // The index sort optimization is only supported for Type.INT and Type.LONG - if (sortFieldType == Type.INT || sortFieldType == Type.LONG) { - Object missingValue = sortField.getMissingValue(); - // This is the fix - final long missingLongValue = missingValue == null ? 0L : ((Number) missingValue).longValue(); - // all documents have docValues or missing value falls outside the range - if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc()) - || (missingLongValue < lowerValue || missingLongValue > upperValue)) { - itAndCount = getDocIdSetIterator(sortField, sortFieldType, context, numericValues); - } - if (itAndCount != null && itAndCount.count != -1) { - return itAndCount.count; - } - } - } - } - return fallbackWeight.count(context); - } - }; - } - - private static class ValueAndDoc { - byte[] value; - int docID; - boolean done; - } - - /** - * Move to the minimum leaf node that has at least one value that is greater than (or equal to if - * {@code allowEqual}) {@code value}, and return the next greater value on this block. Upon - * returning, the {@code pointTree} must be on the leaf node where the value was found. - */ - private static ValueAndDoc findNextValue( - PointTree pointTree, - byte[] value, - boolean allowEqual, - ByteArrayComparator comparator, - boolean lastDoc - ) throws IOException { - int cmp = comparator.compare(pointTree.getMaxPackedValue(), 0, value, 0); - if (cmp < 0 || (cmp == 0 && allowEqual == false)) { - return null; - } - if (pointTree.moveToChild() == false) { - ValueAndDoc vd = new ValueAndDoc(); - pointTree.visitDocValues(new IntersectVisitor() { - - @Override - public void visit(int docID, byte[] packedValue) throws IOException { - if (vd.value == null) { - int cmp = comparator.compare(packedValue, 0, value, 0); - if (cmp > 0 || (cmp == 0 && allowEqual)) { - vd.value = packedValue.clone(); - vd.docID = docID; - } - } else if (lastDoc && vd.done == false) { - int cmp = comparator.compare(packedValue, 0, vd.value, 0); - assert cmp >= 0; - if (cmp > 0) { - vd.done = true; - } else { - vd.docID = docID; - } - } - } - - @Override - public void visit(int docID) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.CELL_CROSSES_QUERY; - } - }); - if (vd.value != null) { - return vd; - } else { - return null; - } - } - - // Recurse - do { - ValueAndDoc vd = findNextValue(pointTree, value, allowEqual, comparator, lastDoc); - if (vd != null) { - return vd; - } - } while (pointTree.moveToSibling()); - - boolean moved = pointTree.moveToParent(); - assert moved; - return null; - } - - /** - * Find the next value that is greater than (or equal to if {@code allowEqual}) and return either - * its first doc ID or last doc ID depending on {@code lastDoc}. This method returns -1 if there - * is no greater value in the dataset. - */ - private static int nextDoc(PointTree pointTree, byte[] value, boolean allowEqual, ByteArrayComparator comparator, boolean lastDoc) - throws IOException { - ValueAndDoc vd = findNextValue(pointTree, value, allowEqual, comparator, lastDoc); - if (vd == null) { - return -1; - } - if (lastDoc == false || vd.done) { - return vd.docID; - } - - // We found the next value, now we need the last doc ID. - int doc = lastDoc(pointTree, vd.value, comparator); - if (doc == -1) { - // vd.docID was actually the last doc ID - return vd.docID; - } else { - return doc; - } - } - - /** - * Compute the last doc ID that matches the given value and is stored on a leaf node that compares - * greater than the current leaf node that the provided {@link PointTree} is positioned on. This - * returns -1 if no other leaf node contains the provided {@code value}. - */ - private static int lastDoc(PointTree pointTree, byte[] value, ByteArrayComparator comparator) throws IOException { - // Create a stack of nodes that may contain value that we'll use to search for the last leaf - // node that contains `value`. - // While the logic looks a bit complicated due to the fact that the PointTree API doesn't allow - // moving back to previous siblings, this effectively performs a binary search. - Deque stack = new ArrayDeque<>(); - - outer: while (true) { - - // Move to the next node - while (pointTree.moveToSibling() == false) { - if (pointTree.moveToParent() == false) { - // No next node - break outer; - } - } - - int cmp = comparator.compare(pointTree.getMinPackedValue(), 0, value, 0); - if (cmp > 0) { - // This node doesn't have `value`, so next nodes can't either - break; - } - - stack.push(pointTree.clone()); - } - - while (stack.isEmpty() == false) { - PointTree next = stack.pop(); - if (next.moveToChild() == false) { - int[] lastDoc = { -1 }; - next.visitDocValues(new IntersectVisitor() { - - @Override - public void visit(int docID) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public void visit(int docID, byte[] packedValue) throws IOException { - int cmp = comparator.compare(value, 0, packedValue, 0); - if (cmp == 0) { - lastDoc[0] = docID; - } - } - - @Override - public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - return Relation.CELL_CROSSES_QUERY; - } - }); - if (lastDoc[0] != -1) { - return lastDoc[0]; - } - } else { - do { - int cmp = comparator.compare(next.getMinPackedValue(), 0, value, 0); - if (cmp > 0) { - // This node doesn't have `value`, so next nodes can't either - break; - } - stack.push(next.clone()); - } while (next.moveToSibling()); - } - } - - return -1; - } - - private boolean matchNone(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint) throws IOException { - assert points.getNumDimensions() == 1; - final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); - return comparator.compare(points.getMinPackedValue(), 0, queryUpperPoint, 0) > 0 - || comparator.compare(points.getMaxPackedValue(), 0, queryLowerPoint, 0) < 0; - } - - private boolean matchAll(PointValues points, byte[] queryLowerPoint, byte[] queryUpperPoint) throws IOException { - assert points.getNumDimensions() == 1; - final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); - return comparator.compare(points.getMinPackedValue(), 0, queryLowerPoint, 0) >= 0 - && comparator.compare(points.getMaxPackedValue(), 0, queryUpperPoint, 0) <= 0; - } - - private IteratorAndCount getDocIdSetIteratorOrNullFromBkd(LeafReaderContext context, DocIdSetIterator delegate) throws IOException { - Sort indexSort = context.reader().getMetaData().sort(); - if (indexSort == null || indexSort.getSort().length == 0 || indexSort.getSort()[0].getField().equals(field) == false) { - return null; - } - - final boolean reverse = indexSort.getSort()[0].getReverse(); - - PointValues points = context.reader().getPointValues(field); - if (points == null) { - return null; - } - - if (points.getNumDimensions() != 1) { - return null; - } - - if (points.getBytesPerDimension() != Long.BYTES && points.getBytesPerDimension() != Integer.BYTES) { - return null; - } - - if (points.size() != points.getDocCount()) { - return null; - } - - assert lowerValue <= upperValue; - byte[] queryLowerPoint; - byte[] queryUpperPoint; - if (points.getBytesPerDimension() == Integer.BYTES) { - queryLowerPoint = IntPoint.pack((int) lowerValue).bytes; - queryUpperPoint = IntPoint.pack((int) upperValue).bytes; - } else { - queryLowerPoint = LongPoint.pack(lowerValue).bytes; - queryUpperPoint = LongPoint.pack(upperValue).bytes; - } - if (matchNone(points, queryLowerPoint, queryUpperPoint)) { - return IteratorAndCount.empty(); - } - if (matchAll(points, queryLowerPoint, queryUpperPoint)) { - int maxDoc = context.reader().maxDoc(); - if (points.getDocCount() == maxDoc) { - return IteratorAndCount.all(maxDoc); - } else { - return IteratorAndCount.sparseRange(0, maxDoc, delegate); - } - } - - int minDocId, maxDocId; - final ByteArrayComparator comparator = ArrayUtil.getUnsignedComparator(points.getBytesPerDimension()); - - if (reverse) { - minDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, true) + 1; - } else { - minDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, false); - if (minDocId == -1) { - // No matches - return IteratorAndCount.empty(); - } - } - - if (reverse) { - maxDocId = nextDoc(points.getPointTree(), queryLowerPoint, true, comparator, true) + 1; - if (maxDocId == 0) { - // No matches - return IteratorAndCount.empty(); - } - } else { - maxDocId = nextDoc(points.getPointTree(), queryUpperPoint, false, comparator, false); - if (maxDocId == -1) { - maxDocId = context.reader().maxDoc(); - } - } - - if (minDocId == maxDocId) { - return IteratorAndCount.empty(); - } - - if ((points.getDocCount() == context.reader().maxDoc())) { - return IteratorAndCount.denseRange(minDocId, maxDocId); - } else { - return IteratorAndCount.sparseRange(minDocId, maxDocId, delegate); - } - } - - private IteratorAndCount getDocIdSetIteratorOrNull(LeafReaderContext context) throws IOException { - if (lowerValue > upperValue) { - return IteratorAndCount.empty(); - } - - SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field); - NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues); - if (numericValues != null) { - IteratorAndCount itAndCount = getDocIdSetIteratorOrNullFromBkd(context, numericValues); - if (itAndCount != null) { - return itAndCount; - } - Sort indexSort = context.reader().getMetaData().sort(); - if (indexSort != null && indexSort.getSort().length > 0 && indexSort.getSort()[0].getField().equals(field)) { - - final SortField sortField = indexSort.getSort()[0]; - final SortField.Type sortFieldType = getSortFieldType(sortField); - // The index sort optimization is only supported for Type.INT and Type.LONG - if (sortFieldType == Type.INT || sortFieldType == Type.LONG) { - return getDocIdSetIterator(sortField, sortFieldType, context, numericValues); - } - } - } - return null; - } - - /** - * Computes the document IDs that lie within the range [lowerValue, upperValue] by performing - * binary search on the field's doc values. - * - *

Because doc values only allow forward iteration, we need to reload the field comparator - * every time the binary search accesses an earlier element. - * - *

We must also account for missing values when performing the binary search. For this reason, - * we load the {@link FieldComparator} instead of checking the docvalues directly. The returned - * {@link DocIdSetIterator} makes sure to wrap the original docvalues to skip over documents with - * no value. - */ - private IteratorAndCount getDocIdSetIterator( - SortField sortField, - SortField.Type sortFieldType, - LeafReaderContext context, - DocIdSetIterator delegate - ) throws IOException { - long lower = sortField.getReverse() ? upperValue : lowerValue; - long upper = sortField.getReverse() ? lowerValue : upperValue; - int maxDoc = context.reader().maxDoc(); - - // Perform a binary search to find the first document with value >= lower. - ValueComparator comparator = loadComparator(sortField, sortFieldType, lower, context); - int low = 0; - int high = maxDoc - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - if (comparator.compare(mid) <= 0) { - high = mid - 1; - comparator = loadComparator(sortField, sortFieldType, lower, context); - } else { - low = mid + 1; - } - } - int firstDocIdInclusive = high + 1; - - // Perform a binary search to find the first document with value > upper. - // Since we know that upper >= lower, we can initialize the lower bound - // of the binary search to the result of the previous search. - comparator = loadComparator(sortField, sortFieldType, upper, context); - low = firstDocIdInclusive; - high = maxDoc - 1; - - while (low <= high) { - int mid = (low + high) >>> 1; - if (comparator.compare(mid) < 0) { - high = mid - 1; - comparator = loadComparator(sortField, sortFieldType, upper, context); - } else { - low = mid + 1; - } - } - - int lastDocIdExclusive = high + 1; - - if (firstDocIdInclusive == lastDocIdExclusive) { - return IteratorAndCount.empty(); - } - - Object missingValue = sortField.getMissingValue(); - LeafReader reader = context.reader(); - PointValues pointValues = reader.getPointValues(field); - // this is the fix - final long missingLongValue = missingValue == null ? 0L : ((Number) missingValue).longValue(); - // all documents have docValues or missing value falls outside the range - if ((pointValues != null && pointValues.getDocCount() == reader.maxDoc()) - || (missingLongValue < lowerValue || missingLongValue > upperValue)) { - return IteratorAndCount.denseRange(firstDocIdInclusive, lastDocIdExclusive); - } else { - return IteratorAndCount.sparseRange(firstDocIdInclusive, lastDocIdExclusive, delegate); - } - } - - /** Compares the given document's value with a stored reference value. */ - private interface ValueComparator { - int compare(int docID) throws IOException; - } - - private static ValueComparator loadComparator(SortField sortField, SortField.Type type, long topValue, LeafReaderContext context) - throws IOException { - @SuppressWarnings("unchecked") - FieldComparator fieldComparator = (FieldComparator) sortField.getComparator(1, Pruning.NONE); - if (type == Type.INT) { - fieldComparator.setTopValue((int) topValue); - } else { - // Since we support only Type.INT and Type.LONG, assuming LONG for all other cases - fieldComparator.setTopValue(topValue); - } - - LeafFieldComparator leafFieldComparator = fieldComparator.getLeafComparator(context); - int direction = sortField.getReverse() ? -1 : 1; - - return doc -> { - int value = leafFieldComparator.compareTop(doc); - return direction * value; - }; - } - - private static SortField.Type getSortFieldType(SortField sortField) { - // We expect the sortField to be SortedNumericSortField - if (sortField instanceof SortedNumericSortField) { - return ((SortedNumericSortField) sortField).getNumericType(); - } else { - return sortField.getType(); - } - } - - /** - * Provides a {@code DocIdSetIterator} along with an accurate count of documents provided by the - * iterator (or {@code -1} if an accurate count is unknown). - */ - private record IteratorAndCount(DocIdSetIterator it, int count) { - - static IteratorAndCount empty() { - return new IteratorAndCount(DocIdSetIterator.empty(), 0); - } - - static IteratorAndCount all(int maxDoc) { - return new IteratorAndCount(DocIdSetIterator.all(maxDoc), maxDoc); - } - - static IteratorAndCount denseRange(int minDoc, int maxDoc) { - return new IteratorAndCount(DocIdSetIterator.range(minDoc, maxDoc), maxDoc - minDoc); - } - - static IteratorAndCount sparseRange(int minDoc, int maxDoc, DocIdSetIterator delegate) { - return new IteratorAndCount(new BoundedDocIdSetIterator(minDoc, maxDoc, delegate), -1); - } - } - - /** - * A doc ID set iterator that wraps a delegate iterator and only returns doc IDs in the range - * [firstDocInclusive, lastDoc). - */ - private static class BoundedDocIdSetIterator extends DocIdSetIterator { - private final int firstDoc; - private final int lastDoc; - private final DocIdSetIterator delegate; - - private int docID = -1; - - BoundedDocIdSetIterator(int firstDoc, int lastDoc, DocIdSetIterator delegate) { - assert delegate != null; - this.firstDoc = firstDoc; - this.lastDoc = lastDoc; - this.delegate = delegate; - } - - @Override - public int docID() { - return docID; - } - - @Override - public int nextDoc() throws IOException { - return advance(docID + 1); - } - - @Override - public int advance(int target) throws IOException { - if (target < firstDoc) { - target = firstDoc; - } - - int result = delegate.advance(target); - if (result < lastDoc) { - docID = result; - } else { - docID = NO_MORE_DOCS; - } - return docID; - } - - @Override - public long cost() { - return Math.min(delegate.cost(), lastDoc - firstDoc); - } - } -} diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java index 33403c87a27b1..e8e33655d47c1 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/filter/QueryToFilterAdapter.java @@ -17,6 +17,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; @@ -27,7 +28,6 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.util.Bits; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.search.aggregations.Aggregator; import org.elasticsearch.xcontent.XContentBuilder; @@ -160,8 +160,8 @@ private static Query unwrap(Query query) { query = ((ConstantScoreQuery) query).getQuery(); continue; } - if (query instanceof XIndexSortSortedNumericDocValuesRangeQuery) { - query = ((XIndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) { + query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); continue; } if (query instanceof IndexOrDocValuesQuery) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 7c27fbc5df5ba..aa444b58bc580 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -20,6 +20,7 @@ import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.elasticsearch.ElasticsearchParseException; @@ -41,7 +42,6 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.SearchExecutionContextHelper; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.field.DateNanosDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; @@ -374,7 +374,7 @@ public void testRangeQueryWithIndexSort() { Query pointQuery = LongPoint.newRangeQuery("field", instant1, instant2); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - Query expected = new XIndexSortSortedNumericDocValuesRangeQuery( + Query expected = new IndexSortSortedNumericDocValuesRangeQuery( "field", instant1, instant2, @@ -383,7 +383,7 @@ public void testRangeQueryWithIndexSort() { assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context)); ft = new DateFieldType("field", false); - expected = new XIndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery); + expected = new IndexSortSortedNumericDocValuesRangeQuery("field", instant1, instant2, dvQuery); assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java index bd06177e17b93..048e292430c54 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NumberFieldTypeTests.java @@ -16,7 +16,9 @@ import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.FloatField; import org.apache.lucene.document.FloatPoint; +import org.apache.lucene.document.IntField; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; @@ -25,6 +27,7 @@ import org.apache.lucene.sandbox.document.HalfFloatPoint; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; @@ -46,8 +49,6 @@ import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.query.SearchExecutionContextHelper; -import org.elasticsearch.lucene.document.NumericField; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.script.ScriptCompiler; import org.elasticsearch.search.MultiValueMode; import org.elasticsearch.xcontent.XContentBuilder; @@ -157,7 +158,7 @@ private record TermQueryTestCase(NumberType type, Query[] expectedQueries) {} public void testTermQuery() { Query[] expectedIntegerQueries = new Query[] { - NumericField.newExactIntQuery("field", 42), + IntField.newExactQuery("field", 42), IntPoint.newExactQuery("field", 42), SortedNumericDocValuesField.newSlowExactQuery("field", 42) }; List testCases = List.of( @@ -167,7 +168,7 @@ public void testTermQuery() { new TermQueryTestCase( NumberType.LONG, new Query[] { - NumericField.newExactLongQuery("field", 42), + LongField.newExactQuery("field", 42), LongPoint.newExactQuery("field", 42), SortedNumericDocValuesField.newSlowExactQuery("field", 42) } ), @@ -866,8 +867,8 @@ public void doTestIndexSortRangeQueries(NumberType type, Supplier valueS context, isIndexed ); - assertThat(query, instanceOf(XIndexSortSortedNumericDocValuesRangeQuery.class)); - Query fallbackQuery = ((XIndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); + assertThat(query, instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)); + Query fallbackQuery = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery(); if (isIndexed) { assertThat(fallbackQuery, instanceOf(IndexOrDocValuesQuery.class)); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java index d02c2ce29bfa0..3532751359cfe 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; @@ -20,7 +21,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParsingException; import org.elasticsearch.core.Strings; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; @@ -107,7 +107,7 @@ protected void doAssertLuceneQuery(MatchPhraseQueryBuilder queryBuilder, Query q .or(instanceOf(PointRangeQuery.class)) .or(instanceOf(IndexOrDocValuesQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) - .or(instanceOf(XIndexSortSortedNumericDocValuesRangeQuery.class)) + .or(instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)) ); } diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 2f89e09b735a0..7f421a9667be5 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -16,6 +16,7 @@ import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; @@ -28,7 +29,6 @@ import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.core.Strings; import org.elasticsearch.index.query.MultiMatchQueryBuilder.Type; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.test.AbstractQueryTestCase; import org.hamcrest.Matchers; @@ -172,7 +172,7 @@ protected void doAssertLuceneQuery(MultiMatchQueryBuilder queryBuilder, Query qu instanceOf(PointRangeQuery.class), instanceOf(IndexOrDocValuesQuery.class), instanceOf(PrefixQuery.class), - instanceOf(XIndexSortSortedNumericDocValuesRangeQuery.class) + instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class) ) ) ); diff --git a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java index d1428d95788f8..9d5db1b3c32f8 100644 --- a/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/TermQueryBuilderTests.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; @@ -21,7 +22,6 @@ import org.elasticsearch.index.mapper.DateFieldMapper; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.xcontent.json.JsonStringEncoder; import org.hamcrest.CoreMatchers; @@ -96,7 +96,7 @@ protected void doAssertLuceneQuery(TermQueryBuilder queryBuilder, Query query, S .or(instanceOf(MatchNoDocsQuery.class)) .or(instanceOf(AutomatonQuery.class)) .or(instanceOf(IndexOrDocValuesQuery.class)) - .or(instanceOf(XIndexSortSortedNumericDocValuesRangeQuery.class)) + .or(instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)) ); MappedFieldType mapper = context.getFieldType(queryBuilder.fieldName()); if (query instanceof TermQuery termQuery) { diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSourceOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSourceOperatorTests.java index a5cd0ded41905..15ae1d506a2fe 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSourceOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/TimeSeriesSourceOperatorTests.java @@ -9,6 +9,7 @@ import org.apache.lucene.document.DoubleDocValuesField; import org.apache.lucene.document.FloatDocValuesField; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedDocValuesField; @@ -51,7 +52,6 @@ import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.RoutingPathFields; import org.elasticsearch.index.mapper.TimeSeriesIdFieldMapper; -import org.elasticsearch.lucene.document.NumericField; import org.hamcrest.Matcher; import org.junit.After; @@ -300,7 +300,7 @@ public void testMatchNone() throws Exception { } try (var reader = writer.getReader()) { var ctx = new LuceneSourceOperatorTests.MockShardContext(reader, 0); - Query query = randomFrom(NumericField.newRangeLongQuery("@timestamp", 0, t0), new MatchNoDocsQuery()); + Query query = randomFrom(LongField.newRangeQuery("@timestamp", 0, t0), new MatchNoDocsQuery()); var timeSeriesFactory = TimeSeriesSourceOperatorFactory.create( Integer.MAX_VALUE, randomIntBetween(1, 1024), diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java index 7bd1dd4d64582..5061c8e303514 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java @@ -13,6 +13,7 @@ import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.search.IndexOrDocValuesQuery; +import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -47,7 +48,6 @@ import org.elasticsearch.index.mapper.TimeSeriesParams.MetricType; import org.elasticsearch.index.mapper.ValueFetcher; import org.elasticsearch.index.query.SearchExecutionContext; -import org.elasticsearch.lucene.search.XIndexSortSortedNumericDocValuesRangeQuery; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.aggregations.support.TimeSeriesValuesSourceType; import org.elasticsearch.search.aggregations.support.ValuesSourceType; @@ -364,7 +364,7 @@ public Query rangeQuery( Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(name())) { - query = new XIndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } } return query; From 724f28771eecb142f102f3ddf2c5b6bd1b052a5c Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 5 Jun 2025 17:32:06 +0100 Subject: [PATCH 069/184] Fix lucene_snapshot build - remove XIndexSortSortedNumericDocValuesRangeQuery (#128988) Remove usage of XIndexSortSortedNumericDocValuesRangeQuery - since we don't need it with Lucene 10.3, and it has been removed from the ES codebase. --- .../org/elasticsearch/index/mapper/DateFieldMapper.java | 2 +- .../org/elasticsearch/index/mapper/DateFieldTypeTests.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 9100b11c0026b..b45f99af8309a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -868,7 +868,7 @@ public Query rangeQuery( query = SortedNumericDocValuesField.newSlowRangeQuery(name(), l, u); } if (hasDocValues() && context.indexSortedOnField(name())) { - query = new XIndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); + query = new IndexSortSortedNumericDocValuesRangeQuery(name(), l, u, query); } return query; } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 454da5c88e59d..53880ebcef011 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -412,7 +412,7 @@ public void testRangeQuerySubseconds() throws IOException { Query expected = new IndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) - ); + ).rewrite(newSearcher(new MultiReader())); assertEquals(expected, ft.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader()))); MappedFieldType ft2 = new DateFieldType("field", false); @@ -454,7 +454,7 @@ public void testRangeQueryMillis() throws IOException { Query expected = new IndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) - ); + ).rewrite(newSearcher(new MultiReader())); assertEquals(expected, ft.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); DateFieldType ft2 = new DateFieldType("field", false); @@ -480,7 +480,7 @@ public void testRangeQueryNanos() throws IOException { Query expected = new IndexOrDocValuesQuery( LongPoint.newRangeQuery("field", instant1, instant2), SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2) - ); + ).rewrite(newSearcher(new MultiReader())); assertEquals(expected, ft.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); DateFieldType ft2 = new DateFieldType("field", false, Resolution.NANOSECONDS); From 3cb80ac60fd92f622818d91bc0b11ad18a577bb8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 6 Jun 2025 06:14:12 +0000 Subject: [PATCH 070/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d03071b81b5 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e2bfa1d9f13dd..8d1a83ac0f2c9 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f2eb4d9fe29 +lucene = 10.3.0-snapshot-d03071b81b5 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ddd4a4a9a9190..83fcf135691e1 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 76a05dd0cf296a8727c2c5f7c6a864fe6965fc4f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 7 Jun 2025 06:14:28 +0000 Subject: [PATCH 071/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-58ec9a96ea7 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 8d1a83ac0f2c9..688a28a767b4d 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-d03071b81b5 +lucene = 10.3.0-snapshot-58ec9a96ea7 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 83fcf135691e1..34ede11471135 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 208b85c6300bee21c7b75ec11d58c6d67c4f3f66 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 8 Jun 2025 06:14:50 +0000 Subject: [PATCH 072/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-58ec9a96ea7 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 34ede11471135..6b882721e3ba5 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7d2120ef5b8851c02cc63b108916205e804e8887 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 9 Jun 2025 06:13:38 +0000 Subject: [PATCH 073/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-58ec9a96ea7 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 6b882721e3ba5..1fd82cc56da91 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7aba8fbc3ed4a6e6b9b5ddf1763614c1b0d07e33 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 10 Jun 2025 06:13:43 +0000 Subject: [PATCH 074/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-58ec9a96ea7 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 1fd82cc56da91..5fc8388d48321 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2955,127 +2955,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 0b66ad6b796a5b9860b321a340ae868b775636c0 Mon Sep 17 00:00:00 2001 From: Mayya Sharipova Date: Tue, 10 Jun 2025 11:23:35 -0400 Subject: [PATCH 075/184] Track vector disk usage by vectorReader.getOffHeapByteSize (#128326) Currently IndexDiskUsageAnalyzer reports disk usage of vectors by - Iterating through document values to access vector data - Performing sample searches to force loading of the index structures - using a sampling approach (only visiting a subset of documents based on log scale) - tracking all bytes read during these operations One problem of this approach is that it is very slow. Another problem is that modifications to search algorithms and different encodings make it difficult to write definite test and assert expected results, hence a test failure such as #127689. This modifies IndexDiskUsageAnalyzer for vectors to use a new introduced in Lucene 10.3 method vectorReader.getOffHeapByteSize. As all vector files are offHeap, we can rely on this method to report the precise disk usage. Closes #127689 --- muted-tests.yml | 3 - .../diskusage/IndexDiskUsageAnalyzer.java | 69 ++++--------------- .../IndexDiskUsageAnalyzerTests.java | 29 ++++++-- 3 files changed, 35 insertions(+), 66 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 758bfa607e9ef..e8579536675d2 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -399,9 +399,6 @@ tests: - class: org.elasticsearch.xpack.ccr.action.ShardFollowTaskReplicationTests method: testChangeFollowerHistoryUUID issue: https://github.com/elastic/elasticsearch/issues/127680 -- class: org.elasticsearch.action.admin.indices.diskusage.IndexDiskUsageAnalyzerTests - method: testKnnVectors - issue: https://github.com/elastic/elasticsearch/issues/127689 - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=search/350_point_in_time/point-in-time with index filter} issue: https://github.com/elastic/elasticsearch/issues/127741 diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java index 9d2595732c585..deae3d9f2610a 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java @@ -25,16 +25,13 @@ import org.apache.lucene.codecs.TermVectorsReader; import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; -import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; @@ -47,8 +44,6 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.KnnCollector; -import org.apache.lucene.search.TopKnnCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; @@ -553,7 +548,7 @@ void visitField(Fields vectors, String fieldName) throws IOException { } } - void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws IOException { + void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) { KnnVectorsReader vectorReader = reader.getVectorReader(); if (vectorReader == null) { return; @@ -562,57 +557,19 @@ void analyzeKnnVectors(SegmentReader reader, IndexDiskUsageStats stats) throws I cancellationChecker.checkForCancellation(); directory.resetBytesRead(); if (field.getVectorDimension() > 0) { - switch (field.getVectorEncoding()) { - case BYTE -> { - iterateDocValues(reader.maxDoc(), () -> vectorReader.getByteVectorValues(field.name).iterator(), vectors -> { - cancellationChecker.logEvent(); - vectors.index(); - }); - - // do a couple of randomized searches to figure out min and max offsets of index file - ByteVectorValues vectorValues = vectorReader.getByteVectorValues(field.name); - KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); - final KnnCollector collector = new TopKnnCollector( - Math.max(1, Math.min(100, vectorValues.size() - 1)), - Integer.MAX_VALUE - ); - int numDocsToVisit = reader.maxDoc() < 10 ? reader.maxDoc() : 10 * (int) Math.log10(reader.maxDoc()); - int skipFactor = Math.max(reader.maxDoc() / numDocsToVisit, 1); - for (int i = 0; i < reader.maxDoc(); i += skipFactor) { - if ((i = iterator.advance(i)) == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - cancellationChecker.checkForCancellation(); - vectorReader.search(field.name, vectorValues.vectorValue(iterator.index()), collector, null); - } - stats.addKnnVectors(field.name, directory.getBytesRead()); - } - case FLOAT32 -> { - iterateDocValues(reader.maxDoc(), () -> vectorReader.getFloatVectorValues(field.name).iterator(), vectors -> { - cancellationChecker.logEvent(); - vectors.index(); - }); - - // do a couple of randomized searches to figure out min and max offsets of index file - FloatVectorValues vectorValues = vectorReader.getFloatVectorValues(field.name); - KnnVectorValues.DocIndexIterator iterator = vectorValues.iterator(); - final KnnCollector collector = new TopKnnCollector( - Math.max(1, Math.min(100, vectorValues.size() - 1)), - Integer.MAX_VALUE - ); - int numDocsToVisit = reader.maxDoc() < 10 ? reader.maxDoc() : 10 * (int) Math.log10(reader.maxDoc()); - int skipFactor = Math.max(reader.maxDoc() / numDocsToVisit, 1); - for (int i = 0; i < reader.maxDoc(); i += skipFactor) { - if ((i = iterator.advance(i)) == DocIdSetIterator.NO_MORE_DOCS) { - break; - } - cancellationChecker.checkForCancellation(); - vectorReader.search(field.name, vectorValues.vectorValue(iterator.index()), collector, null); - } - stats.addKnnVectors(field.name, directory.getBytesRead()); - } + Map offHeap = vectorReader.getOffHeapByteSize(field); + long totalSize = 0; + for (var entry : offHeap.entrySet()) { + totalSize += entry.getValue(); } - + long vectorsSize = offHeap.getOrDefault("vec", 0L); + if (vectorsSize == 0L) { + // This can happen if .vec file is opened with directIO + // calculate the size of vectors manually + vectorsSize = field.getVectorDimension() * field.getVectorEncoding().byteSize; + totalSize += vectorsSize; + } + stats.addKnnVectors(field.name, totalSize); } } } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java index 58b847d7a87a1..57de001ef90e5 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzerTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.KnnByteVectorField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.LatLonShape; import org.apache.lucene.document.LongPoint; @@ -67,6 +68,7 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.codec.postings.ES812PostingsFormat; +import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.test.ESTestCase; @@ -254,15 +256,27 @@ public void testKnnVectors() throws Exception { VectorSimilarityFunction similarity = randomFrom(VectorSimilarityFunction.values()); int numDocs = between(1000, 5000); int dimension = between(10, 200); + DenseVectorFieldMapper.ElementType elementType = randomFrom(DenseVectorFieldMapper.ElementType.values()); - indexRandomly(dir, codec, numDocs, doc -> { - float[] vector = randomVector(dimension); - doc.add(new KnnFloatVectorField("vector", vector, similarity)); - }); + if (elementType == DenseVectorFieldMapper.ElementType.FLOAT) { + indexRandomly(dir, codec, numDocs, doc -> { + float[] vector = randomVector(dimension); + doc.add(new KnnFloatVectorField("vector", vector, similarity)); + }); + } else { + indexRandomly(dir, codec, numDocs, doc -> { + byte[] vector = new byte[dimension]; + random().nextBytes(vector); + doc.add(new KnnByteVectorField("vector", vector, similarity)); + }); + } final IndexDiskUsageStats stats = IndexDiskUsageAnalyzer.analyze(testShardId(), lastCommit(dir), () -> {}); logger.info("--> stats {}", stats); - long dataBytes = (long) numDocs * dimension * Float.BYTES; // size of flat vector data + // expected size of flat vector data + long dataBytes = elementType == DenseVectorFieldMapper.ElementType.FLOAT + ? ((long) numDocs * dimension * Float.BYTES) + : ((long) numDocs * dimension); long indexBytesEstimate = (long) numDocs * (Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN / 4); // rough size of HNSW graph assertThat("numDocs=" + numDocs + ";dimension=" + dimension, stats.total().getKnnVectorsBytes(), greaterThan(dataBytes)); long connectionOverhead = stats.total().getKnnVectorsBytes() - dataBytes; @@ -762,8 +776,9 @@ private static void assertStats(IndexDiskUsageStats actualStats, IndexDiskUsageS 0.01, 2048 ); - - assertFieldStats(field, "knn vectors", actualField.getKnnVectorsBytes(), expectedField.getKnnVectorsBytes(), 0.01, 1024); + // Allow difference of a file block size for knn vectors + // we get knn data usage from getOffHeapByteSize but when written on disk it can be rounded to the next block size + assertFieldStats(field, "knn vectors", actualField.getKnnVectorsBytes(), expectedField.getKnnVectorsBytes(), 0.01, 4096); } // We are not able to collect per field stats for stored, vector, points, and norms IndexDiskUsageStats.PerFieldDiskUsage actualTotal = actualStats.total(); From 1f39d3480de2b53c5766b9b5b25f87c55f7b2072 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 11 Jun 2025 06:14:19 +0000 Subject: [PATCH 076/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-0c75f3f6256 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 688a28a767b4d..e33b0490282bf 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-58ec9a96ea7 +lucene = 10.3.0-snapshot-0c75f3f6256 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 5fc8388d48321..0319db061fa9c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ded400f4f308d34f73051ccf190b7bd3951474da Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 12 Jun 2025 06:14:01 +0000 Subject: [PATCH 077/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-44b58bee764 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e33b0490282bf..52f4a38540dff 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-0c75f3f6256 +lucene = 10.3.0-snapshot-44b58bee764 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 0319db061fa9c..a1316be143aff 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2953,129 +2953,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 83f370f191c5daeafdbec3e70e54c0e2785c59c7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 13 Jun 2025 06:14:05 +0000 Subject: [PATCH 078/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-57051690a8d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 52f4a38540dff..d6bea9c4c1e00 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-44b58bee764 +lucene = 10.3.0-snapshot-57051690a8d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9e43f961c040d..0d71d812e04c8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,129 +2973,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 8e74111d5ebb71bbbf0cc96aa66e7f4b653744ad Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 14 Jun 2025 06:13:57 +0000 Subject: [PATCH 079/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-57051690a8d --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 0d71d812e04c8..7f3f8fa7f269b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2975,127 +2975,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 7fe3299430541278dd37f845a0b7b8464ee991a3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 15 Jun 2025 06:17:32 +0000 Subject: [PATCH 080/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-02f5dc30e2f --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d6bea9c4c1e00..d04ba64eb954c 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-57051690a8d +lucene = 10.3.0-snapshot-02f5dc30e2f bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7f3f8fa7f269b..2159f520abc7f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,129 +2973,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From f65477e7264f752da8871f127c4281fcee59c639 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 16 Jun 2025 06:14:08 +0000 Subject: [PATCH 081/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-2503e832c17 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d04ba64eb954c..5e0afb699a4cb 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-02f5dc30e2f +lucene = 10.3.0-snapshot-2503e832c17 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 2159f520abc7f..5b4d27f463cc5 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -2973,129 +2973,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 5a2d69b4407ec069924be8034278792d482a6419 Mon Sep 17 00:00:00 2001 From: ChrisHegarty Date: Mon, 16 Jun 2025 11:10:40 +0100 Subject: [PATCH 082/184] Fix PushQueriesIT for new query rewrite optimizations in Lucene 10.3 --- .../xpack/esql/qa/single_node/PushQueriesIT.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java index da3314310d5f5..667ef38b3c0fd 100644 --- a/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java +++ b/x-pack/plugin/esql/qa/server/single-node/src/javaRestTest/java/org/elasticsearch/xpack/esql/qa/single_node/PushQueriesIT.java @@ -146,9 +146,10 @@ public void testEqualityOrOther() throws IOException { FROM test | WHERE test == "%value" OR foo == 2 """; + // query rewrite optimizations apply to foo, since it's query value is always outside the range of indexed values String luceneQuery = switch (type) { - case AUTO, TEXT_WITH_KEYWORD -> "(#test.keyword:%value -_ignored:test.keyword) foo:[2 TO 2]"; - case KEYWORD -> "test:%value foo:[2 TO 2]"; + case AUTO, TEXT_WITH_KEYWORD -> "#test.keyword:%value -_ignored:test.keyword"; + case KEYWORD -> "test:%value"; case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> "*:*"; case SEMANTIC_TEXT_WITH_KEYWORD -> "FieldExistsQuery [field=_primary_term]"; }; @@ -165,16 +166,17 @@ public void testEqualityAndOther() throws IOException { FROM test | WHERE test == "%value" AND foo == 1 """; + // query rewrite optimizations apply to foo, since it's query value is always within the range of indexed values List luceneQueryOptions = switch (type) { - case AUTO, TEXT_WITH_KEYWORD -> List.of("#test.keyword:%value -_ignored:test.keyword #foo:[1 TO 1]"); - case KEYWORD -> List.of("#test:%value #foo:[1 TO 1]"); - case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("foo:[1 TO 1]"); + case AUTO, TEXT_WITH_KEYWORD -> List.of("#test.keyword:%value -_ignored:test.keyword"); + case KEYWORD -> List.of("test:%value"); + case CONSTANT_KEYWORD, MATCH_ONLY_TEXT_WITH_KEYWORD -> List.of("*:*"); case SEMANTIC_TEXT_WITH_KEYWORD -> /* * single_value_match is here because there are extra documents hiding in the index * that don't have the `foo` field. */ - List.of("#foo:[1 TO 1] #single_value_match(foo)", "foo:[1 TO 1]"); + List.of("#FieldExistsQuery [field=foo] #single_value_match(foo)", "foo:[1 TO 1]"); }; ComputeSignature dataNodeSignature = switch (type) { case AUTO, CONSTANT_KEYWORD, KEYWORD, TEXT_WITH_KEYWORD -> ComputeSignature.FILTER_IN_QUERY; From eb5a22d931299ad801dcc82f82ff012fec435c18 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 17 Jun 2025 06:13:18 +0000 Subject: [PATCH 083/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-c92e06c099d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 160 ++++++++++++------------ 2 files changed, 81 insertions(+), 81 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5e0afb699a4cb..cde5f68061969 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-2503e832c17 +lucene = 10.3.0-snapshot-c92e06c099d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e06fd6ba1408b..25c8c94ebfd80 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -1153,16 +1153,16 @@ - - - - - + + + + + @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From e36c5e36cfb260cb4509c685b6bb93d1d9d413e4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 18 Jun 2025 06:14:07 +0000 Subject: [PATCH 084/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-6e2f1e6608b --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index cde5f68061969..5aeea05d76ad6 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-c92e06c099d +lucene = 10.3.0-snapshot-6e2f1e6608b bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 25c8c94ebfd80..f0ca5c270acce 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 5d08a4b66587d81ad7de40bd5ed835e19e3681cb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 19 Jun 2025 06:14:04 +0000 Subject: [PATCH 085/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-74705f78343 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5aeea05d76ad6..19604bb01d3e5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-6e2f1e6608b +lucene = 10.3.0-snapshot-74705f78343 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f0ca5c270acce..f00e9ebc54ea0 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 1557bcb00e98830615df7e5d7169582ac0735fbf Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 20 Jun 2025 06:15:04 +0000 Subject: [PATCH 086/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f96afd7390f --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 19604bb01d3e5..85b8e216fdb74 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-74705f78343 +lucene = 10.3.0-snapshot-f96afd7390f bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f00e9ebc54ea0..110d7b8641415 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 70bb11a1c74f6a7124d95747a8aa3bbf98541c53 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 21 Jun 2025 06:13:46 +0000 Subject: [PATCH 087/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-5ef689e1028 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 85b8e216fdb74..5589bf48225f5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f96afd7390f +lucene = 10.3.0-snapshot-5ef689e1028 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 110d7b8641415..4df3538eecbdc 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From fce65d7bc4967670191d38a97d092b1183961e91 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 22 Jun 2025 06:13:45 +0000 Subject: [PATCH 088/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-20a34933748 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5589bf48225f5..2ac998fe04f01 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-5ef689e1028 +lucene = 10.3.0-snapshot-20a34933748 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 4df3538eecbdc..a2913179e4b98 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 33afbacd9ff80576640b5b7bef22257a3831941c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 23 Jun 2025 06:13:24 +0000 Subject: [PATCH 089/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-0461da4b056 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 2ac998fe04f01..1ee1cbee11e46 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-20a34933748 +lucene = 10.3.0-snapshot-0461da4b056 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a2913179e4b98..a1355315e9614 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 91215247275009511b6ca3fbc20739d606545584 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 24 Jun 2025 06:14:07 +0000 Subject: [PATCH 090/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f93a0ed318b --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 1ee1cbee11e46..bd6d61cab6d29 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-0461da4b056 +lucene = 10.3.0-snapshot-f93a0ed318b bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a1355315e9614..24cd23dd700e8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ecc1e825d62308c36aefef0db31cd003e9c3e706 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 25 Jun 2025 06:15:41 +0000 Subject: [PATCH 091/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-f93a0ed318b --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 24cd23dd700e8..875312a6300c0 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From bb53952f5ee4255afd8f0b58b8d6d74c8daa2bba Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 26 Jun 2025 06:14:46 +0000 Subject: [PATCH 092/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-52597fa4cac --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index bd6d61cab6d29..7bb424aa6b071 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-f93a0ed318b +lucene = 10.3.0-snapshot-52597fa4cac bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 875312a6300c0..dcea9aff14c24 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 90c39b18caf7c0ad91687f9e54d6b80a1c72841b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 27 Jun 2025 06:14:26 +0000 Subject: [PATCH 093/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-360eeb68ee0 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 7bb424aa6b071..c231263fba8d8 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.1.0 -lucene = 10.3.0-snapshot-52597fa4cac +lucene = 10.3.0-snapshot-360eeb68ee0 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index dcea9aff14c24..1bedfcccac659 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From aaaee99caeba3ac38f239291cef37f972ce45f74 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 28 Jun 2025 06:15:20 +0000 Subject: [PATCH 094/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-360eeb68ee0 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index d8db3c42b16ec..c5a6b15a4d83e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 6f708396bb6b681d08b383c7aa8ce2014f8acd93 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 29 Jun 2025 06:12:53 +0000 Subject: [PATCH 095/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b6fb10d5f93 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index f3904096b471a..e4abaf09c905e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-360eeb68ee0 +lucene = 10.3.0-snapshot-b6fb10d5f93 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c5a6b15a4d83e..f6e7adf305c19 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 906f2079504908ffe9aa82df56f668971df35a0f Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 30 Jun 2025 06:15:44 +0000 Subject: [PATCH 096/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-4fcb6d6d50d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e4abaf09c905e..366ef5c80e921 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-b6fb10d5f93 +lucene = 10.3.0-snapshot-4fcb6d6d50d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f6e7adf305c19..99d3f4fd21f1a 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 105f93c43613ccaa2198db5661a38d3e9a802a6a Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 30 Jun 2025 11:04:19 +0100 Subject: [PATCH 097/184] Catch FileSystemException when opening direct IO (#128834) --- .../elasticsearch/index/store/DirectIOIT.java | 19 +++++--- .../index/store/FsDirectoryFactory.java | 44 +++++++++++++------ 2 files changed, 45 insertions(+), 18 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java index 02e17e3395760..600555320dc02 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/store/DirectIOIT.java @@ -43,6 +43,8 @@ @LuceneTestCase.SuppressCodecs("*") // only use our own codecs public class DirectIOIT extends ESIntegTestCase { + private static boolean SUPPORTED; + @BeforeClass public static void checkSupported() { assumeTrue("Direct IO is not enabled", ES818BinaryQuantizedVectorsFormat.USE_DIRECT_IO); @@ -50,8 +52,9 @@ public static void checkSupported() { Path path = createTempDir("directIOProbe"); try (Directory dir = open(path); IndexOutput out = dir.createOutput("out", IOContext.DEFAULT)) { out.writeString("test"); + SUPPORTED = true; } catch (IOException e) { - assumeNoException("test requires filesystem that supports Direct IO", e); + SUPPORTED = false; } } @@ -109,15 +112,21 @@ static void assertBBQIndexType(String type) { @TestLogging(value = "org.elasticsearch.index.store.FsDirectoryFactory:DEBUG", reason = "to capture trace logging for direct IO") public void testDirectIOUsed() { try (MockLog mockLog = MockLog.capture(FsDirectoryFactory.class)) { - // we're just looking for some evidence direct IO is used - mockLog.addExpectation( - new MockLog.PatternSeenEventExpectation( + // we're just looking for some evidence direct IO is used (or not) + MockLog.LoggingExpectation expectation = SUPPORTED + ? new MockLog.PatternSeenEventExpectation( "Direct IO used", FsDirectoryFactory.class.getCanonicalName(), Level.DEBUG, "Opening .*\\.vec with direct IO" ) - ); + : new MockLog.PatternSeenEventExpectation( + "Direct IO not used", + FsDirectoryFactory.class.getCanonicalName(), + Level.DEBUG, + "Could not open .*\\.vec with direct IO" + ); + mockLog.addExpectation(expectation); indexVectors(); diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index a5b35b64e7664..bcc09dbe65f9e 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -22,6 +22,7 @@ import org.apache.lucene.store.NativeFSLockFactory; import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.store.SimpleFSLockFactory; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.util.FeatureFlag; @@ -36,6 +37,7 @@ import org.elasticsearch.plugins.IndexStorePlugin; import java.io.IOException; +import java.nio.file.FileSystemException; import java.nio.file.Files; import java.nio.file.Path; import java.util.HashSet; @@ -157,22 +159,38 @@ protected boolean useDirectIO(String name, IOContext context, OptionalLong fileL @Override public IndexInput openInput(String name, IOContext context) throws IOException { + Throwable directIOException = null; if (directIODelegate != null && context.hints().contains(DirectIOHint.INSTANCE)) { ensureOpen(); ensureCanRead(name); - Log.debug("Opening {} with direct IO", name); - return directIODelegate.openInput(name, context); - } else if (useDelegate(name, context)) { - // we need to do these checks on the outer directory since the inner doesn't know about pending deletes - ensureOpen(); - ensureCanRead(name); - // we only use the mmap to open inputs. Everything else is managed by the NIOFSDirectory otherwise - // we might run into trouble with files that are pendingDelete in one directory but still - // listed in listAll() from the other. We on the other hand don't want to list files from both dirs - // and intersect for perf reasons. - return delegate.openInput(name, context); - } else { - return super.openInput(name, context); + try { + Log.debug("Opening {} with direct IO", name); + return directIODelegate.openInput(name, context); + } catch (FileSystemException e) { + Log.debug(() -> Strings.format("Could not open %s with direct IO", name), e); + directIOException = e; + // and fallthrough to normal opening below + } + } + + try { + if (useDelegate(name, context)) { + // we need to do these checks on the outer directory since the inner doesn't know about pending deletes + ensureOpen(); + ensureCanRead(name); + // we only use the mmap to open inputs. Everything else is managed by the NIOFSDirectory otherwise + // we might run into trouble with files that are pendingDelete in one directory but still + // listed in listAll() from the other. We on the other hand don't want to list files from both dirs + // and intersect for perf reasons. + return delegate.openInput(name, context); + } else { + return super.openInput(name, context); + } + } catch (Throwable t) { + if (directIOException != null) { + t.addSuppressed(directIOException); + } + throw t; } } From f8389727821d0602579b889150797303e67cc6ec Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 1 Jul 2025 06:15:51 +0000 Subject: [PATCH 098/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-8d2f36302f8 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 366ef5c80e921..a264ed748e728 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-4fcb6d6d50d +lucene = 10.3.0-snapshot-8d2f36302f8 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 99d3f4fd21f1a..e0e9d85e69167 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 0a900eda88924aaced091acee39e26732634f1b6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 2 Jul 2025 06:13:56 +0000 Subject: [PATCH 099/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-8d2f36302f8 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e0e9d85e69167..c218f08caa808 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From e6aec447af4bb0ef4559546d06ae68b8388cd265 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 3 Jul 2025 06:13:25 +0000 Subject: [PATCH 100/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-8d2f36302f8 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c218f08caa808..117639333ee87 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 007dedf2641968e3ec0be9ee74eb31125dd57e49 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 4 Jul 2025 06:15:17 +0000 Subject: [PATCH 101/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d0a4b104214 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index a264ed748e728..2bc5cfecbece4 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-8d2f36302f8 +lucene = 10.3.0-snapshot-d0a4b104214 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 117639333ee87..aa477de734238 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 9d07a112883bda22d85dbbf08a14f4460526d539 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 4 Jul 2025 15:17:55 +0100 Subject: [PATCH 102/184] IVFVectorsFormat doesn't report off-heap stats at the moment (#130553) --- .../codec/vectors/IVFVectorsFormatTests.java | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java index 63b0b7e344b84..60d346ef21585 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java @@ -38,6 +38,8 @@ import static java.lang.String.format; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MAX_VECTORS_PER_CLUSTER; import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.MIN_VECTORS_PER_CLUSTER; +import static org.hamcrest.Matchers.anEmptyMap; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.oneOf; @@ -83,6 +85,25 @@ protected Codec getCodec() { return TestUtil.alwaysKnnVectorsFormat(format); } + @Override + protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOException { + var fieldInfo = r.getFieldInfos().fieldInfo(fieldName); + + if (r instanceof CodecReader codecReader) { + KnnVectorsReader knnVectorsReader = codecReader.getVectorReader(); + if (knnVectorsReader instanceof PerFieldKnnVectorsFormat.FieldsReader fieldsReader) { + knnVectorsReader = fieldsReader.getFieldReader(fieldName); + } + var offHeap = knnVectorsReader.getOffHeapByteSize(fieldInfo); + long totalByteSize = offHeap.values().stream().mapToLong(Long::longValue).sum(); + // IVF doesn't report stats at the moment + assertThat(offHeap, anEmptyMap()); + assertThat(totalByteSize, equalTo(0L)); + } else { + throw new AssertionError("unexpected:" + r.getClass()); + } + } + @Override public void testAdvance() throws Exception { // TODO re-enable with hierarchical IVF, clustering as it is is flaky From 415c05e04f88d4f8a993560b57dec9dd736c062b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 5 Jul 2025 06:18:06 +0000 Subject: [PATCH 103/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d0a4b104214 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index aa477de734238..1d180b66084cd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 0b9b7ed3b0cac4d7e7ea6793613564d6e6c5b6fd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 6 Jul 2025 06:13:52 +0000 Subject: [PATCH 104/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d0a4b104214 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 1d180b66084cd..3d9473ae5f837 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 6d398941e7c0aa462cacda1af93e0e50dbe6fd20 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 7 Jul 2025 06:14:21 +0000 Subject: [PATCH 105/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d0a4b104214 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 3d9473ae5f837..08351cd64e811 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 5a007986f4b03cea0055055d35c3ef7267cd9a3e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 8 Jul 2025 06:14:14 +0000 Subject: [PATCH 106/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b77259c0cd4 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 2bc5cfecbece4..d17432d18729b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-d0a4b104214 +lucene = 10.3.0-snapshot-b77259c0cd4 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 08351cd64e811..7e5be38d120e8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 3e475040d8a91cc080034c8b56b1d633ce6067cd Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 9 Jul 2025 06:13:37 +0000 Subject: [PATCH 107/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7de24ce973d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d17432d18729b..025392c2f7f9f 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-b77259c0cd4 +lucene = 10.3.0-snapshot-7de24ce973d bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7e5be38d120e8..7850098a97938 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 3c2cc69bb16f03b4d2cf49aa8bdea721009c553a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 10 Jul 2025 06:13:44 +0000 Subject: [PATCH 108/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-50a4a2c0eb9 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 025392c2f7f9f..e4eeecdfd795b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-7de24ce973d +lucene = 10.3.0-snapshot-50a4a2c0eb9 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7850098a97938..e6dbf82e355ff 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From f2ec3e84d77076137e96c18494c62e215722775b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 11 Jul 2025 06:13:37 +0000 Subject: [PATCH 109/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-50a4a2c0eb9 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e6dbf82e355ff..bfbebb5750ff8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 09aaebe7f7603972baa7c1c26633b65fafcab90c Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Fri, 11 Jul 2025 16:21:36 +0100 Subject: [PATCH 110/184] Fix class reference --- .../index/codec/TrackingPostingsInMemoryBytesCodec.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java b/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java index 92aebd83398ce..87f88996d9595 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/TrackingPostingsInMemoryBytesCodec.java @@ -31,7 +31,7 @@ /** * A codec that tracks the length of the min and max written terms. Used to improve memory usage estimates in serverless, since - * {@link org.apache.lucene.codecs.lucene90.blocktree.FieldReader} keeps an in-memory reference to the min and max term. + * {@link org.apache.lucene.codecs.lucene103.blocktree.FieldReader} keeps an in-memory reference to the min and max term. */ public class TrackingPostingsInMemoryBytesCodec extends FilterCodec { public static final FeatureFlag TRACK_POSTINGS_IN_MEMORY_BYTES = new FeatureFlag("track_postings_in_memory_bytes"); From 7ff74fc35c4fe15c649f73b5ba7d3cbd5527e315 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 12 Jul 2025 06:14:35 +0000 Subject: [PATCH 111/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-50a4a2c0eb9 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index bfbebb5750ff8..1d0215866e1c2 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 3f710b0757fa5b4ad9cd46ce46e25e5bbbd527fc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 13 Jul 2025 06:13:45 +0000 Subject: [PATCH 112/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-50a4a2c0eb9 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 1d0215866e1c2..f12bb9da06191 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 8ba65fdbabb6f6c24045c2a3ff58380a581ec782 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 14 Jul 2025 06:16:48 +0000 Subject: [PATCH 113/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-78a52b92b72 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e4eeecdfd795b..05a88f3928e59 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-50a4a2c0eb9 +lucene = 10.3.0-snapshot-78a52b92b72 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f12bb9da06191..106e1ab7dd37d 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 64ce557cb2312197a18f5c1b94ec943dde53e061 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 15 Jul 2025 06:13:01 +0000 Subject: [PATCH 114/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-78a52b92b72 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 106e1ab7dd37d..6a0b15dcb73b6 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 08b69fa6f11935c657017129eee90784bb4ea0a8 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 16 Jul 2025 06:15:12 +0000 Subject: [PATCH 115/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-78a52b92b72 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 6a0b15dcb73b6..a43497596300e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 6401040cf2d0c4f6e66aae47a6ceaf7d670379e4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 17 Jul 2025 06:15:34 +0000 Subject: [PATCH 116/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-e32b39fa0a2 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index a9acb4f3bdf00..473b2b7d563d5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-78a52b92b72 +lucene = 10.3.0-snapshot-e32b39fa0a2 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index d3124929641a0..f536318ddb882 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 0d357ae6283a0a8ea60b5ae73a44b63b727c3cc7 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 18 Jul 2025 06:16:52 +0000 Subject: [PATCH 117/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7ac27d10648 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 473b2b7d563d5..d5a769e1c295a 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-e32b39fa0a2 +lucene = 10.3.0-snapshot-7ac27d10648 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index f536318ddb882..8486ab98d5a49 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ad8e5f2692102695933149a3ff923adafec12153 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 19 Jul 2025 06:14:20 +0000 Subject: [PATCH 118/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-88405648fa6 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d5a769e1c295a..9cdea2072a666 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-7ac27d10648 +lucene = 10.3.0-snapshot-88405648fa6 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 8486ab98d5a49..729f19d13f88e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From cfe9768b8a6fa134e60553339593a41ff44eb428 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 20 Jul 2025 06:13:23 +0000 Subject: [PATCH 119/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-88405648fa6 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b70cc4717dd28..2757a2fa0fb01 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 15e5384a8b01454ac8aafd0bb6eb387b0759e274 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 21 Jul 2025 06:19:38 +0000 Subject: [PATCH 120/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-88405648fa6 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 2757a2fa0fb01..ab767474f2b28 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 26402b2a6c9ccdb588c46a6fc158cdd1343126b4 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Mon, 21 Jul 2025 15:53:48 +0100 Subject: [PATCH 121/184] Specialise the wrapping of vector scorer iterator for KnnVectorValues.DocIndexIterator in ExitableDirectoryReader (#131623) This commit specialises the wrapping of vector scorer iterator for KnnVectorValues.DocIndexIterator in ExitableDirectoryReader. This change is necessary now since a new assert in Lucene catches this scenario, since DocIndexIterator is expected in several places which we were previously not exposing. DocIndexIterator can be optimized more than DocIdSetIterator. closes #131610 --- .../search/internal/ExitableDirectoryReader.java | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java index 9c998eb920dc9..fa50317353556 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java @@ -586,8 +586,9 @@ public VectorScorer scorer(byte[] bytes) throws IOException { if (scorer == null) { return null; } + DocIdSetIterator scorerIterator = scorer.iterator(); return new VectorScorer() { - private final DocIdSetIterator iterator = new ExitableDocSetIterator(scorer.iterator(), queryCancellation); + private final DocIdSetIterator iterator = exitableIterator(scorerIterator, queryCancellation); @Override public float score() throws IOException { @@ -637,8 +638,9 @@ public VectorScorer scorer(float[] target) throws IOException { if (scorer == null) { return null; } + DocIdSetIterator scorerIterator = scorer.iterator(); return new VectorScorer() { - private final DocIdSetIterator iterator = new ExitableDocSetIterator(scorer.iterator(), queryCancellation); + private final DocIdSetIterator iterator = exitableIterator(scorerIterator, queryCancellation); @Override public float score() throws IOException { @@ -663,6 +665,15 @@ public FloatVectorValues copy() throws IOException { } } + /** Wraps the iterator in an exitable iterator, specializing for KnnVectorValues.DocIndexIterator. */ + static DocIdSetIterator exitableIterator(DocIdSetIterator iterator, QueryCancellation queryCancellation) { + if (iterator instanceof KnnVectorValues.DocIndexIterator docIndexIterator) { + return createExitableIterator(docIndexIterator, queryCancellation); + } else { + return new ExitableDocSetIterator(iterator, queryCancellation); + } + } + private static KnnVectorValues.DocIndexIterator createExitableIterator( KnnVectorValues.DocIndexIterator delegate, QueryCancellation queryCancellation From fe6c3fd86c31d6b0e8d7838f837ee0d693b337c1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 22 Jul 2025 06:15:01 +0000 Subject: [PATCH 122/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-047009d32c0 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 9cdea2072a666..98e7977d6d8f5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-88405648fa6 +lucene = 10.3.0-snapshot-047009d32c0 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ab767474f2b28..fcf6d2a5a43c6 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 998b18164ff5a88d0363a3b963e2a37b110ca8ee Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 23 Jul 2025 06:15:43 +0000 Subject: [PATCH 123/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7dd51f8c83c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 98e7977d6d8f5..57a8edfb6b4c2 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-047009d32c0 +lucene = 10.3.0-snapshot-7dd51f8c83c bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index fcf6d2a5a43c6..2bbddb4459054 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From a6a8454a1051b8f2df8ec8ce79e4540748f4f825 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Wed, 23 Jul 2025 11:20:40 +0100 Subject: [PATCH 124/184] Handle IndexedDISI in exitableIterator (#131648) --- .../elasticsearch/search/internal/ExitableDirectoryReader.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java index fa50317353556..1df7dcbf7e164 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java @@ -10,6 +10,7 @@ package org.elasticsearch.search.internal; import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.lucene90.IndexedDISI; import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.FilterDirectoryReader; @@ -669,6 +670,8 @@ public FloatVectorValues copy() throws IOException { static DocIdSetIterator exitableIterator(DocIdSetIterator iterator, QueryCancellation queryCancellation) { if (iterator instanceof KnnVectorValues.DocIndexIterator docIndexIterator) { return createExitableIterator(docIndexIterator, queryCancellation); + } else if (iterator instanceof IndexedDISI indexedDISI) { + return createExitableIterator(IndexedDISI.asDocIndexIterator(indexedDISI), queryCancellation); } else { return new ExitableDocSetIterator(iterator, queryCancellation); } From 36643c0e6cb418cd82d29bb607f81fba6188d29d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 24 Jul 2025 06:13:07 +0000 Subject: [PATCH 125/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-b960806d1e4 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 57a8edfb6b4c2..c656d458aa277 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-7dd51f8c83c +lucene = 10.3.0-snapshot-b960806d1e4 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 2bbddb4459054..d6da17e2e9ddd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2448b9a33ba18c11114282648ddef1190e94f5f1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 25 Jul 2025 06:12:58 +0000 Subject: [PATCH 126/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-65638aeabdd --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c656d458aa277..7d5cc0e5b6e51 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-b960806d1e4 +lucene = 10.3.0-snapshot-65638aeabdd bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index d6da17e2e9ddd..37c3422a8d452 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ca4abcc778da5f4b7a2eae9d52501c34ac371e18 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 26 Jul 2025 06:13:39 +0000 Subject: [PATCH 127/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-65638aeabdd --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 37c3422a8d452..c03691079b4c0 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 68ec42a63d1b0e39a31a2a5fc6732d446caf8c91 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 27 Jul 2025 06:15:07 +0000 Subject: [PATCH 128/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-65638aeabdd --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c03691079b4c0..69ecc0c8e0d8f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From e0ef3053511ec53551f33b90cb70bc064ffa5c06 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 28 Jul 2025 06:13:11 +0000 Subject: [PATCH 129/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-65638aeabdd --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 69ecc0c8e0d8f..abb634586d4cc 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 89f124d885900603ed9e272cd929687cab1510e4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 29 Jul 2025 06:18:50 +0000 Subject: [PATCH 130/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-3e42687248e --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 7d5cc0e5b6e51..277d25d8cf5dd 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-65638aeabdd +lucene = 10.3.0-snapshot-3e42687248e bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index abb634586d4cc..e5fbba0a13657 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From cc0c3944e159014e1120d87703b203e6bad20904 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 30 Jul 2025 06:16:46 +0000 Subject: [PATCH 131/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-3e42687248e --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e5fbba0a13657..6a9d4e6341c4c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 9714077e06b9218ee0c10442d67302173df24b1a Mon Sep 17 00:00:00 2001 From: Carlos Delgado <6339205+carlosdelest@users.noreply.github.com> Date: Wed, 30 Jul 2025 17:48:40 +0200 Subject: [PATCH 132/184] Mutes knn nested tests - #131749 (#132087) --- gradle/verification-metadata.xml | 100 +++++++++++++++++++++++-------- muted-tests.yml | 29 +++++++++ 2 files changed, 104 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 6a9d4e6341c4c..48a5b03315783 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,177 @@ - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + - + + + diff --git a/muted-tests.yml b/muted-tests.yml index c2015b106d160..d1c0336eba6bf 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -280,6 +280,9 @@ tests: - class: org.elasticsearch.xpack.ccr.action.ShardFollowTaskReplicationTests method: testChangeFollowerHistoryUUID issue: https://github.com/elastic/elasticsearch/issues/127680 +- class: org.elasticsearch.action.admin.indices.diskusage.IndexDiskUsageAnalyzerTests + method: testKnnVectors + issue: https://github.com/elastic/elasticsearch/issues/127689 - class: org.elasticsearch.backwards.MixedClusterClientYamlTestSuiteIT method: test {p0=search/350_point_in_time/point-in-time with index filter} issue: https://github.com/elastic/elasticsearch/issues/127741 @@ -617,6 +620,32 @@ tests: - class: org.elasticsearch.index.mapper.vectors.DenseVectorFieldIndexTypeUpdateIT method: testDenseVectorMappingUpdate {initialType=int4_hnsw updateType=bbq_disk} issue: https://github.com/elastic/elasticsearch/issues/132165 +- class: org.elasticsearch.*YamlTestSuiteIT + methods: + - test {yaml=search.vectors/130_knn_query_nested_search/*} + - test {p0=search.vectors/130_knn_query_nested_search/*} + issue: https://github.com/elastic/elasticsearch/issues/131749 +- class: org.elasticsearch.*YamlTestSuiteIT + methods: + - test {yaml=search.vectors/100_knn_nested_search/*} + - test {p0=search.vectors/100_knn_nested_search/*} + issue: https://github.com/elastic/elasticsearch/issues/131749 +- class: org.elasticsearch.*YamlTestSuiteIT + methods: + - test {yaml=search.vectors/101_knn_nested_search_bits/*} + - test {p0=search.vectors/101_knn_nested_search_bits/*} +- class: org.elasticsearch.*YamlTestSuiteIT + methods: + - test {yaml=search.vectors/135_knn_query_nested_search_ivf/*} + - test {p0=search.vectors/135_knn_query_nested_search_ivf/*} + issue: https://github.com/elastic/elasticsearch/issues/131749 +- class: org.elasticsearch.search.nested.VectorNestedIT + method: testNestedKNnnSearchWithMultipleSegments + issue: https://github.com/elastic/elasticsearch/issues/131749 +- class: org.elasticsearch.upgrades.SemanticTextUpgradeIT + method: testSemanticTextOperations* + issue: https://github.com/elastic/elasticsearch/issues/131749 + # Examples: # From cb97fc7d0f86e15dc58b035e955340dd10f1d8ae Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 31 Jul 2025 06:15:01 +0000 Subject: [PATCH 133/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-54002a4e6b5 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 200 +++++++++--------------- 2 files changed, 76 insertions(+), 126 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 277d25d8cf5dd..61bf4d0883823 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-3e42687248e +lucene = 10.3.0-snapshot-54002a4e6b5 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 48a5b03315783..b4c79c0e12caa 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,179 +3083,129 @@ - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + - - - - - + + + From 3bce8fe6b21700c09a0d94c114812c476a310d46 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Thu, 31 Jul 2025 12:44:32 +0100 Subject: [PATCH 134/184] Mute a couple of ValuesSourceReaderOperatorTests in lucene_snapshot (#132259) --- muted-tests.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/muted-tests.yml b/muted-tests.yml index d1c0336eba6bf..528398aeb462a 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -479,6 +479,12 @@ tests: - class: org.elasticsearch.compute.lucene.read.SortedSetOrdinalsBuilderTests method: testReader issue: https://github.com/elastic/elasticsearch/issues/131573 +- class: org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorTests + method: testLoadLongShuffledManySegments + issue: https://github.com/elastic/elasticsearch/issues/132258 +- class: org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorTests + method: testLoadLongManySegments + issue: https://github.com/elastic/elasticsearch/issues/132258 - class: org.elasticsearch.search.SearchWithIndexBlocksIT method: testSearchShardsOnIndicesWithIndexRefreshBlocks issue: https://github.com/elastic/elasticsearch/issues/131662 From e7c945cc041b62011562e5260ee69a19b83eb32a Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 1 Aug 2025 06:14:56 +0000 Subject: [PATCH 135/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-a48cd205e16 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 61bf4d0883823..091f0c2c679c7 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-54002a4e6b5 +lucene = 10.3.0-snapshot-a48cd205e16 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 041ea15d7f6ca..3bdf733d1c157 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 12225bf7dab3083392db2d29f8043797167dc3b3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 1 Aug 2025 16:39:44 +0000 Subject: [PATCH 136/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7a97c6357cb --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 091f0c2c679c7..c8b792bd335dd 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-a48cd205e16 +lucene = 10.3.0-snapshot-7a97c6357cb bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 3bdf733d1c157..7846ab85e0e4d 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From bb617250cbde0928198cb7a9115bca156a1a8d24 Mon Sep 17 00:00:00 2001 From: Chris Hegarty <62058229+ChrisHegarty@users.noreply.github.com> Date: Fri, 1 Aug 2025 19:41:57 +0100 Subject: [PATCH 137/184] Remove nested knn search mutes (#132351) --- muted-tests.yml | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index c2af5e82d7828..e3d2f742ff0b1 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -583,29 +583,3 @@ tests: # - class: "org.elasticsearch.xpack.esql.**" # method: "test {union_types.MultiIndexIpStringStatsInline *}" # issue: "https://github.com/elastic/elasticsearch/..." - -- class: org.elasticsearch.*YamlTestSuiteIT - methods: - - test {yaml=search.vectors/130_knn_query_nested_search/*} - - test {p0=search.vectors/130_knn_query_nested_search/*} - issue: https://github.com/elastic/elasticsearch/issues/131749 -- class: org.elasticsearch.*YamlTestSuiteIT - methods: - - test {yaml=search.vectors/100_knn_nested_search/*} - - test {p0=search.vectors/100_knn_nested_search/*} - issue: https://github.com/elastic/elasticsearch/issues/131749 -- class: org.elasticsearch.*YamlTestSuiteIT - methods: - - test {yaml=search.vectors/101_knn_nested_search_bits/*} - - test {p0=search.vectors/101_knn_nested_search_bits/*} -- class: org.elasticsearch.*YamlTestSuiteIT - methods: - - test {yaml=search.vectors/135_knn_query_nested_search_ivf/*} - - test {p0=search.vectors/135_knn_query_nested_search_ivf/*} - issue: https://github.com/elastic/elasticsearch/issues/131749 -- class: org.elasticsearch.search.nested.VectorNestedIT - method: testNestedKNnnSearchWithMultipleSegments - issue: https://github.com/elastic/elasticsearch/issues/131749 -- class: org.elasticsearch.upgrades.SemanticTextUpgradeIT - method: testSemanticTextOperations* - issue: https://github.com/elastic/elasticsearch/issues/131749 From 5b87cae2f8ecd063a2d7e5344ef56470c1d7e4ce Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 2 Aug 2025 06:12:38 +0000 Subject: [PATCH 138/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7a97c6357cb --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 7846ab85e0e4d..dd1e3fd42169c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 28fb03ff92c03dd30652e25bc664d5a8d7b05238 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 3 Aug 2025 06:15:44 +0000 Subject: [PATCH 139/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-6a21a06c87a --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index c8b792bd335dd..13474857cbc9b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-7a97c6357cb +lucene = 10.3.0-snapshot-6a21a06c87a bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index dd1e3fd42169c..02a5c86f20976 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From d1090a0db759d9c1e1062cb6378f75072b8cd20e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 4 Aug 2025 06:17:18 +0000 Subject: [PATCH 140/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-46f352146f3 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 13474857cbc9b..021b8e9df6508 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-6a21a06c87a +lucene = 10.3.0-snapshot-46f352146f3 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 02a5c86f20976..65c0f50b614b1 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ce987aadb664ba99c71f7bd6cb290b8c88d4864b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 5 Aug 2025 06:14:12 +0000 Subject: [PATCH 141/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-5f72fb05bcd --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 021b8e9df6508..535403d62a360 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-46f352146f3 +lucene = 10.3.0-snapshot-5f72fb05bcd bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 65c0f50b614b1..67c8a9b490c49 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 18cedeaacf8f96f808de0028eb19ee9f5bbe74c5 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 6 Aug 2025 06:15:17 +0000 Subject: [PATCH 142/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-5f72fb05bcd --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 67c8a9b490c49..c1f6ea52fae03 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 9c09d704b18c3f28e7f63a1f0acb6c694ba2ba1c Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 7 Aug 2025 06:16:23 +0000 Subject: [PATCH 143/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-cac1a31b6a3 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 535403d62a360..da0dbdaab7d69 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-5f72fb05bcd +lucene = 10.3.0-snapshot-cac1a31b6a3 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c1f6ea52fae03..c6633c95c96e6 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 9c3738aaaa6ab485de4db68629c0ca96c5306fde Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 8 Aug 2025 06:12:44 +0000 Subject: [PATCH 144/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-9b680820d0c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index da0dbdaab7d69..431981015cde1 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-cac1a31b6a3 +lucene = 10.3.0-snapshot-9b680820d0c bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c6633c95c96e6..b4dc078ff8c7b 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 1951984ae3a7a682e847a129af6f3cd0cdf2ac1e Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 9 Aug 2025 06:12:09 +0000 Subject: [PATCH 145/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-79a9ddafcb7 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 431981015cde1..d11b222764d45 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-9b680820d0c +lucene = 10.3.0-snapshot-79a9ddafcb7 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index b4dc078ff8c7b..8ebd9a7b81465 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3083,129 +3083,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2ad97ce40025e7025858ddee659622b59b05d4f9 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 10 Aug 2025 06:12:05 +0000 Subject: [PATCH 146/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-79a9ddafcb7 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 8ebd9a7b81465..568d0866be298 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 99789162b0d722d6f5a4f7f2fd631b326cae21bc Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 11 Aug 2025 06:12:29 +0000 Subject: [PATCH 147/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-79a9ddafcb7 --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 568d0866be298..0f4a616742a61 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3085,127 +3085,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From f1bb39a81b750854043fb12910bdd98fef80b0ad Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 12 Aug 2025 06:12:42 +0000 Subject: [PATCH 148/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-20a189f1c24 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index d11b222764d45..b19e9a27472d5 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-79a9ddafcb7 +lucene = 10.3.0-snapshot-20a189f1c24 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e4a004a034c79..dd5fa8d841d01 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3088,129 +3088,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 80ec7ab1e7bff061ec1263876b02a4ec55c99308 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Tue, 12 Aug 2025 10:58:51 +0100 Subject: [PATCH 149/184] Update MMapDirectory ReadAdvice function (#132710) This has changed in lucene branch_10x --- .../index/store/FsDirectoryFactory.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index 46b11cc1edd4b..ffb87bba11590 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -44,6 +44,7 @@ import java.util.Optional; import java.util.OptionalLong; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.BiPredicate; public class FsDirectoryFactory implements IndexStorePlugin.DirectoryFactory { @@ -96,18 +97,11 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index } } - private static Optional overrideReadAdvice(String name, IOContext context) { - if (context.hints().contains(StandardIOBehaviorHint.INSTANCE)) { - return Optional.of(ReadAdvice.NORMAL); - } - return Optional.empty(); - } - /** Sets the preload, if any, on the given directory based on the extensions. Returns the same directory instance. */ // visibility and extensibility for testing public MMapDirectory setMMapFunctions(MMapDirectory mMapDirectory, Set preLoadExtensions) { mMapDirectory.setPreload(getPreloadFunc(preLoadExtensions)); - mMapDirectory.setReadAdviceOverride(FsDirectoryFactory::overrideReadAdvice); + mMapDirectory.setReadAdvice(getReadAdviceFunc()); return mMapDirectory; } @@ -123,6 +117,15 @@ static BiPredicate getPreloadFunc(Set preLoadExtensio return MMapDirectory.NO_FILES; } + private static BiFunction> getReadAdviceFunc() { + return (name, context) -> { + if (context.hints().contains(StandardIOBehaviorHint.INSTANCE)) { + return Optional.of(ReadAdvice.NORMAL); + } + return MMapDirectory.ADVISE_BY_CONTEXT.apply(name, context); + }; + } + /** * Returns true iff the directory is a hybrid fs directory */ From 8964a34ab7551e56010f828739f9910b5ab998ba Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 13 Aug 2025 06:12:47 +0000 Subject: [PATCH 150/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-6f1a9398595 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index b19e9a27472d5..13c38635e4653 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-20a189f1c24 +lucene = 10.3.0-snapshot-6f1a9398595 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 44ca1605d2626..c4e82ea05c7dd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3088,129 +3088,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 349f4c030295c1dd405eecd74f654ea1037e42d6 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 14 Aug 2025 06:13:19 +0000 Subject: [PATCH 151/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-37db8b00d0b --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 13c38635e4653..e880e837f2143 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-6f1a9398595 +lucene = 10.3.0-snapshot-37db8b00d0b bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c4e82ea05c7dd..12e902914ec35 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3088,129 +3088,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 2040851a8d5a471c0f35f8e6e3224f8c585c3caa Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 15 Aug 2025 06:12:08 +0000 Subject: [PATCH 152/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-3a99fae956f --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index e880e837f2143..dbd60e1c4d6e0 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-37db8b00d0b +lucene = 10.3.0-snapshot-3a99fae956f bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c91608af1b53b..e5f4ad2212ea5 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3123,129 +3123,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c453ff24dbf7147a1bb7136d19ce7e815382e555 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 15 Aug 2025 12:11:47 +0100 Subject: [PATCH 153/184] FlatVectorsReader.getMergeInstance() now throws IOException (#132972) --- .../codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index e59b9ba7e6535..1feee9f642b32 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -130,7 +130,7 @@ private ES818BinaryQuantizedVectorsReader(ES818BinaryQuantizedVectorsReader clon } @Override - public FlatVectorsReader getMergeInstance() { + public FlatVectorsReader getMergeInstance() throws IOException { return new ES818BinaryQuantizedVectorsReader(this, rawVectorsReader.getMergeInstance()); } From 5076681d465d006475243c5bb7f6fe80c4aa5501 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 15 Aug 2025 14:47:12 +0100 Subject: [PATCH 154/184] Implement docIDRunEnd() on ES819TSDBDocValuesProducer (#132939) This method allows consumers to quickly check if a DocIdSetIterator matches a large run of documents; it was missing from our custom Codec DocValues implementations. --- .../es819/ES819TSDBDocValuesProducer.java | 50 +++++++ .../es819/ES819TSDBDocValuesFormatTests.java | 131 ++++++++++++++++++ 2 files changed, 181 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 163e4c729bb95..b5cab3973bea5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -294,6 +294,11 @@ public boolean advanceExact(int target) throws IOException { doc = target; return true; } + + @Override + public int docIDRunEnd() throws IOException { + return maxDoc; + } } private abstract static class SparseBinaryDocValues extends BinaryDocValues { @@ -328,6 +333,11 @@ public int advance(int target) throws IOException { public boolean advanceExact(int target) throws IOException { return disi.advanceExact(target); } + + @Override + public int docIDRunEnd() throws IOException { + return disi.docIDRunEnd(); + } } @Override @@ -369,6 +379,11 @@ public int advance(int target) throws IOException { public long cost() { return ords.cost(); } + + @Override + public int docIDRunEnd() throws IOException { + return ords.docIDRunEnd(); + } }; } @@ -750,6 +765,11 @@ public int advance(int target) throws IOException { public long cost() { return ords.cost(); } + + @Override + public int docIDRunEnd() throws IOException { + return ords.docIDRunEnd(); + } }; } @@ -1086,6 +1106,11 @@ public boolean advanceExact(int target) { public long cost() { return maxDoc; } + + @Override + public int docIDRunEnd() { + return maxDoc; + } }; } else { final IndexedDISI disi = new IndexedDISI( @@ -1127,6 +1152,11 @@ public long cost() { public long longValue() { return 0L; } + + @Override + public int docIDRunEnd() throws IOException { + return disi.docIDRunEnd(); + } }; } } @@ -1178,6 +1208,11 @@ public long cost() { return maxDoc; } + @Override + public int docIDRunEnd() { + return maxDoc; + } + @Override public long longValue() throws IOException { final int index = doc; @@ -1286,6 +1321,11 @@ public long cost() { return disi.cost(); } + @Override + public int docIDRunEnd() throws IOException { + return disi.docIDRunEnd(); + } + @Override public long longValue() throws IOException { final int index = disi.index(); @@ -1406,6 +1446,11 @@ public long nextValue() throws IOException { public int docValueCount() { return count; } + + @Override + public int docIDRunEnd() { + return maxDoc; + } }; } else { // sparse @@ -1463,6 +1508,11 @@ public int docValueCount() { return count; } + @Override + public int docIDRunEnd() throws IOException { + return disi.docIDRunEnd(); + } + private void set() { if (set == false) { final int index = disi.index(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 19118c7ac3270..2ea3791164cfd 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -27,6 +27,7 @@ import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; @@ -49,6 +50,8 @@ import java.util.function.Supplier; import java.util.stream.IntStream; +import static org.elasticsearch.test.ESTestCase.randomFrom; + public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { final Codec codec = TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat()); @@ -959,6 +962,134 @@ private static BulkNumericDocValues getBulkNumericDocValues(LeafReader leafReade return (BulkNumericDocValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(counterField)); } + public void testDocIDEndRun() throws IOException { + String timestampField = "@timestamp"; + String hostnameField = "host.name"; + long baseTimestamp = 1704067200000L; + + var config = getTimeSeriesIndexWriterConfig(hostnameField, timestampField); + try (var dir = newDirectory(); var iw = new IndexWriter(dir, config)) { + long counter1 = 0; + + long[] gauge2Values = new long[] { -2, -4, -6, -8, -10, -12, -14, -16 }; + String[] tags = new String[] { "tag_1", "tag_2", "tag_3", "tag_4", "tag_5", "tag_6", "tag_7", "tag_8" }; + + // IndexedDISI stores ids in blocks of 4096. To test sparse end runs, we want a mixture of + // dense and sparse blocks, so we need the gap frequency to be larger than + // this value, but smaller than two blocks, and to index at least three blocks + int gap_frequency = 4500 + random().nextInt(2048); + int numDocs = 10000 + random().nextInt(10000); + int numHosts = numDocs / 20; + + for (int i = 0; i < numDocs; i++) { + var d = new Document(); + + int batchIndex = i / numHosts; + String hostName = String.format(Locale.ROOT, "host-%03d", batchIndex); + long timestamp = baseTimestamp + (1000L * i); + + d.add(new SortedDocValuesField(hostnameField, new BytesRef(hostName))); + // Index sorting doesn't work with NumericDocValuesField: + d.add(new SortedNumericDocValuesField(timestampField, timestamp)); + d.add(new NumericDocValuesField("counter", counter1++)); + if (i % gap_frequency != 0) { + d.add(new NumericDocValuesField("sparse_counter", counter1)); + } + + int numGauge2 = 1 + random().nextInt(8); + for (int j = 0; j < numGauge2; j++) { + d.add(new SortedNumericDocValuesField("gauge", gauge2Values[(i + j) % gauge2Values.length])); + if (i % gap_frequency != 0) { + d.add(new SortedNumericDocValuesField("sparse_gauge", gauge2Values[(i + j) % gauge2Values.length])); + } + } + + d.add(new SortedDocValuesField("tag", new BytesRef(randomFrom(tags)))); + if (i % gap_frequency != 0) { + d.add(new SortedDocValuesField("sparse_tag", new BytesRef(randomFrom(tags)))); + } + + int numTags = 1 + random().nextInt(8); + for (int j = 0; j < numTags; j++) { + d.add(new SortedSetDocValuesField("tags", new BytesRef(tags[(i + j) % tags.length]))); + if (i % gap_frequency != 0) { + d.add(new SortedSetDocValuesField("sparse_tags", new BytesRef(tags[(i + j) % tags.length]))); + } + } + + d.add(new BinaryDocValuesField("tags_as_bytes", new BytesRef(tags[i % tags.length]))); + if (i % gap_frequency != 0) { + d.add(new BinaryDocValuesField("sparse_tags_as_bytes", new BytesRef(tags[i % tags.length]))); + } + + iw.addDocument(d); + if (i % 100 == 0) { + iw.commit(); + } + } + iw.commit(); + + iw.forceMerge(1); + + try (var reader = DirectoryReader.open(iw)) { + assertEquals(1, reader.leaves().size()); + assertEquals(numDocs, reader.maxDoc()); + var leaf = reader.leaves().get(0).reader(); + var hostNameDV = leaf.getSortedDocValues(hostnameField); + assertNotNull(hostNameDV); + validateRunEnd(hostNameDV); + var timestampDV = DocValues.unwrapSingleton(leaf.getSortedNumericDocValues(timestampField)); + assertNotNull(timestampDV); + validateRunEnd(timestampDV); + var counterOneDV = leaf.getNumericDocValues("counter"); + assertNotNull(counterOneDV); + validateRunEnd(counterOneDV); + var sparseCounter = leaf.getNumericDocValues("sparse_counter"); + assertNotNull(sparseCounter); + validateRunEnd(sparseCounter); + var gaugeOneDV = leaf.getSortedNumericDocValues("gauge"); + assertNotNull(gaugeOneDV); + validateRunEnd(gaugeOneDV); + var sparseGaugeDV = leaf.getSortedNumericDocValues("sparse_gauge"); + assertNotNull(sparseGaugeDV); + validateRunEnd(sparseGaugeDV); + var tagDV = leaf.getSortedDocValues("tag"); + assertNotNull(tagDV); + validateRunEnd(tagDV); + var sparseTagDV = leaf.getSortedDocValues("sparse_tag"); + assertNotNull(sparseTagDV); + validateRunEnd(sparseTagDV); + var tagsDV = leaf.getSortedSetDocValues("tags"); + assertNotNull(tagsDV); + validateRunEnd(tagsDV); + var sparseTagsDV = leaf.getSortedSetDocValues("sparse_tags"); + assertNotNull(sparseTagsDV); + validateRunEnd(sparseTagsDV); + var tagBytesDV = leaf.getBinaryDocValues("tags_as_bytes"); + assertNotNull(tagBytesDV); + validateRunEnd(tagBytesDV); + var sparseTagBytesDV = leaf.getBinaryDocValues("sparse_tags_as_bytes"); + assertNotNull(sparseTagBytesDV); + validateRunEnd(sparseTagBytesDV); + } + } + } + + private void validateRunEnd(DocIdSetIterator iterator) throws IOException { + int runCount = 0; + while (iterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + int runLength = iterator.docIDRunEnd() - iterator.docID() - 1; + if (runLength > 1) { + runCount++; + for (int i = 0; i < runLength; i++) { + int expected = iterator.docID() + 1; + assertEquals(expected, iterator.advance(expected)); + } + } + } + assertTrue("Expected docid runs of greater than 1", runCount > 0); + } + private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) { var config = new IndexWriterConfig(); if (hostnameField != null) { From 45f92486f0724eed28cec07716a35c08ab1cbce1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 16 Aug 2025 06:12:18 +0000 Subject: [PATCH 155/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-82b8ad1baa5 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index dbd60e1c4d6e0..38aa616d38f9e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-3a99fae956f +lucene = 10.3.0-snapshot-82b8ad1baa5 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index e5f4ad2212ea5..42ae54a65cf7e 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3123,129 +3123,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From c413153d811807e59b6f691f3574f7119c87af6b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 17 Aug 2025 06:12:56 +0000 Subject: [PATCH 156/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-4e6f855592c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 38aa616d38f9e..5e0a356a12040 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-82b8ad1baa5 +lucene = 10.3.0-snapshot-4e6f855592c bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 42ae54a65cf7e..dcfd6d090730a 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3123,129 +3123,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 80f5844a98d282eba814e7f19c4fa45e9dcb67e2 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 18 Aug 2025 06:13:20 +0000 Subject: [PATCH 157/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-c7dc0281910 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5e0a356a12040..9f5be89915e81 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-4e6f855592c +lucene = 10.3.0-snapshot-c7dc0281910 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index dcfd6d090730a..5810a90da72d4 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3123,129 +3123,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 3d141484e2dbd115960a1054ea3f092ef397562d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 19 Aug 2025 06:12:18 +0000 Subject: [PATCH 158/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-efb4df54eb1 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 9f5be89915e81..90df3af0bdc1e 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-c7dc0281910 +lucene = 10.3.0-snapshot-efb4df54eb1 bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 5810a90da72d4..563aa22618b83 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3123,129 +3123,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 4adc3e0ee8cb2989362c9a5fbcc4524a0178f316 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 20 Aug 2025 06:14:04 +0000 Subject: [PATCH 159/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-3ab1f03d18c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 152 ++++++++++++------------ 2 files changed, 77 insertions(+), 77 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 90df3af0bdc1e..2b3ea46e0cf9a 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-efb4df54eb1 +lucene = 10.3.0-snapshot-3ab1f03d18c bundled_jdk_vendor = openjdk bundled_jdk = 24+36@1f9ff9062db4449d8ca828c504ffae90 diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c5d32299ae073..874d971e0a003 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -9,13 +9,13 @@ - + @@ -3124,129 +3124,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From fe2874b914f6dae9b4e8e5fd956c2217afaceb1e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 20 Aug 2025 11:58:53 +0100 Subject: [PATCH 160/184] Implement docIDRunEnd on new DocValues implementations; fix expectations in range query rewrites --- .../codec/tsdb/es819/ES819TSDBDocValuesProducer.java | 10 ++++++++++ .../tsdb/es819/ES819TSDBDocValuesFormatTests.java | 8 ++------ .../elasticsearch/index/mapper/DateFieldTypeTests.java | 8 ++++---- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 4c415193c4b9e..f4ac4b0571cb6 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -1304,6 +1304,11 @@ long lookAheadValueAt(int targetDoc) throws IOException { public long longValue() throws IOException { return 0L; // Only one ordinal! } + + @Override + public int docIDRunEnd() throws IOException { + return disi.docIDRunEnd(); + } }; } } else if (entry.sortedOrdinals != null) { @@ -1326,6 +1331,11 @@ long lookAheadValueAt(int targetDoc) { public long longValue() { return ordinalsReader.readValueAndAdvance(doc); } + + @Override + public int docIDRunEnd() throws IOException { + return maxDoc; + } }; } else { final var disi = new IndexedDISI( diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index b39a08a492b79..462842c8dcb9a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -34,13 +34,13 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; import org.apache.lucene.store.Directory; -import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.common.Randomness; import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; +import org.elasticsearch.index.codec.Elasticsearch92Lucene103Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseDenseNumericValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesProducer.BaseSortedDocValues; @@ -66,10 +66,7 @@ public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { -<<<<<<< HEAD - final Codec codec = TestUtil.alwaysDocValuesFormat(new ES819TSDBDocValuesFormat()); -======= - private final Codec codec = new Elasticsearch900Lucene101Codec() { + private final Codec codec = new Elasticsearch92Lucene103Codec() { final ES819TSDBDocValuesFormat docValuesFormat = new ES819TSDBDocValuesFormat( ESTestCase.randomIntBetween(1, 4096), @@ -82,7 +79,6 @@ public DocValuesFormat getDocValuesFormatForField(String field) { return docValuesFormat; } }; ->>>>>>> origin/main @Override protected Codec getCodec() { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index 53880ebcef011..b736e6ca6337d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -376,7 +376,7 @@ public void testRangeQuery() throws IOException { Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); assertEquals( expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader())) + ft2.rangeQuery(date1, date2, true, true, null, null, null, context) ); instant1 = nowInMillis; @@ -419,7 +419,7 @@ public void testRangeQuerySubseconds() throws IOException { Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); assertEquals( expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader())) + ft2.rangeQuery(date1, date2, true, true, null, null, null, context) ); instant1 = nowInMillis; @@ -459,7 +459,7 @@ public void testRangeQueryMillis() throws IOException { DateFieldType ft2 = new DateFieldType("field", false); Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - assertEquals(expected2, ft2.rangeQuery(instant1, instant2, true, true, context).rewrite(newSearcher(new MultiReader()))); + assertEquals(expected2, ft2.rangeQuery(instant1, instant2, true, true, context)); assertIndexUnsearchable( Resolution.MILLISECONDS, @@ -487,7 +487,7 @@ public void testRangeQueryNanos() throws IOException { Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); assertEquals( expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context).rewrite(newSearcher(new MultiReader())) + ft2.rangeQuery(date1, date2, true, true, null, null, null, context) ); assertIndexUnsearchable(Resolution.NANOSECONDS, (unsearchable) -> unsearchable.rangeQuery(instant1, instant2, true, true, context)); From fefb3c581d6c44eb343002607ceaaf4fc293f152 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 20 Aug 2025 14:12:53 +0100 Subject: [PATCH 161/184] spotless --- .../index/mapper/DateFieldTypeTests.java | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java index b736e6ca6337d..735fdd4b9cb3b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldTypeTests.java @@ -374,10 +374,7 @@ public void testRangeQuery() throws IOException { MappedFieldType ft2 = new DateFieldType("field", false); Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - assertEquals( - expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context) - ); + assertEquals(expected2, ft2.rangeQuery(date1, date2, true, true, null, null, null, context)); instant1 = nowInMillis; instant2 = instant1 + 100; @@ -417,10 +414,7 @@ public void testRangeQuerySubseconds() throws IOException { MappedFieldType ft2 = new DateFieldType("field", false); Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - assertEquals( - expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context) - ); + assertEquals(expected2, ft2.rangeQuery(date1, date2, true, true, null, null, null, context)); instant1 = nowInMillis; instant2 = instant1 + 100; @@ -485,10 +479,7 @@ public void testRangeQueryNanos() throws IOException { DateFieldType ft2 = new DateFieldType("field", false, Resolution.NANOSECONDS); Query expected2 = SortedNumericDocValuesField.newSlowRangeQuery("field", instant1, instant2); - assertEquals( - expected2, - ft2.rangeQuery(date1, date2, true, true, null, null, null, context) - ); + assertEquals(expected2, ft2.rangeQuery(date1, date2, true, true, null, null, null, context)); assertIndexUnsearchable(Resolution.NANOSECONDS, (unsearchable) -> unsearchable.rangeQuery(instant1, instant2, true, true, context)); } From b38137053432762c5ca3b91c9726f719ffbc8a85 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 22 Aug 2025 06:13:58 +0000 Subject: [PATCH 162/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-1ff9537a207 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 4a18ffa8eea53..a4c8535383c08 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-3ab1f03d18c +lucene = 10.3.0-snapshot-1ff9537a207 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 6616f260ea84c..24ed14abcd428 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3045,129 +3045,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From ed774f4fefe4c7fe834a0ac32c67b36700db5e84 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 23 Aug 2025 06:12:17 +0000 Subject: [PATCH 163/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-a71a3e2d1de --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index a4c8535383c08..f5da8595a32d4 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-1ff9537a207 +lucene = 10.3.0-snapshot-a71a3e2d1de bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 24ed14abcd428..ac3607e2381d1 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3045,129 +3045,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From e3bcfa89c0a6aa2ae3f0c523abdd69fcd4891670 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 25 Aug 2025 06:14:14 +0000 Subject: [PATCH 164/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-df641166392 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index f5da8595a32d4..90698e13b6f2d 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-a71a3e2d1de +lucene = 10.3.0-snapshot-df641166392 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9aa2cc0e3ea3f..00c9a204d9130 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3040,129 +3040,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From e40fb2f55ae1c04a6835c31af1cf3b3bbcc46f2e Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Mon, 25 Aug 2025 20:40:39 -0400 Subject: [PATCH 165/184] Adjust knn reader interfaces to use new AcceptDocs api (#133501) --- .../codec/vectors/ES813FlatVectorFormat.java | 10 +++--- .../vectors/ES813Int8FlatVectorFormat.java | 10 +++--- .../index/codec/vectors/IVFVectorsReader.java | 16 ++++----- .../index/codec/vectors/IVFVectorsWriter.java | 6 ++-- .../ES816BinaryQuantizedVectorsReader.java | 7 ++-- .../ES818BinaryQuantizedVectorsReader.java | 7 ++-- .../vectors/es818/MergeReaderWrapper.java | 6 ++-- .../index/engine/TranslogDirectoryReader.java | 5 +-- .../index/mapper/DocumentLeafReader.java | 5 +-- .../internal/ExitableDirectoryReader.java | 6 ++-- .../FieldUsageTrackingDirectoryReader.java | 6 ++-- .../vectors/AbstractIVFKnnVectorQuery.java | 32 ++++-------------- .../vectors/IVFKnnFloatVectorQuery.java | 4 +-- ...HnswScalarQuantizedVectorsFormatTests.java | 9 ++++- .../codec/vectors/IVFVectorsFormatTests.java | 25 ++++++++++++-- ...HnswBinaryQuantizedVectorsFormatTests.java | 9 ++++- ...HnswBinaryQuantizedVectorsFormatTests.java | 9 ++++- .../search/SearchCancellationTests.java | 9 ++++- .../CompositeValuesCollectorQueueTests.java | 6 ++-- .../frozen/RewriteCachingDirectoryReader.java | 5 +-- .../accesscontrol/FieldSubsetReader.java | 6 ++-- .../accesscontrol/FieldSubsetReaderTests.java | 33 ++++++++++++++++--- .../compute/lucene/LuceneSliceQueueTests.java | 6 ++-- 23 files changed, 150 insertions(+), 87 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index 325188624a2f4..ef043422a37be 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; @@ -128,13 +129,14 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } - private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException { + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) + throws IOException { OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); - Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); for (int i = 0; i < scorer.maxOrd(); i++) { if (acceptedOrds == null || acceptedOrds.get(i)) { collector.collect(i, scorer.score(i)); @@ -145,7 +147,7 @@ private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, } @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java index 4636a37d14f53..dd9c1dced748d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813Int8FlatVectorFormat.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.Sorter; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.OrdinalTranslatedKnnCollector; @@ -136,13 +137,14 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } - private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, RandomVectorScorer scorer) throws IOException { + private void collectAllMatchingDocs(KnnCollector knnCollector, AcceptDocs acceptDocs, RandomVectorScorer scorer) + throws IOException { OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); - Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); for (int i = 0; i < scorer.maxOrd(); i++) { if (acceptedOrds == null || acceptedOrds.get(i)) { collector.collect(i, scorer.score(i)); @@ -153,7 +155,7 @@ private void collectAllMatchingDocs(KnnCollector knnCollector, Bits acceptDocs, } @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { collectAllMatchingDocs(knnCollector, acceptDocs, reader.getRandomVectorScorer(field, target)); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java index 8082d72482c0a..5fb7dd76ad3e5 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java @@ -22,12 +22,12 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.internal.hppc.IntObjectHashMap; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.store.ChecksumIndexInput; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.BitSet; import org.apache.lucene.util.Bits; import org.elasticsearch.core.IOUtils; import org.elasticsearch.search.vectors.IVFKnnSearchStrategy; @@ -212,7 +212,7 @@ public final ByteVectorValues getByteVectorValues(String field) throws IOExcepti } @Override - public final void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public final void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32) == false) { rawVectorsReader.search(field, target, knnCollector, acceptDocs); @@ -223,11 +223,8 @@ public final void search(String field, float[] target, KnnCollector knnCollector "vector query dimension: " + target.length + " differs from field dimension: " + fieldInfo.getVectorDimension() ); } - float percentFiltered = 1f; - if (acceptDocs instanceof BitSet bitSet) { - percentFiltered = Math.max(0f, Math.min(1f, (float) bitSet.approximateCardinality() / bitSet.length())); - } int numVectors = rawVectorsReader.getFloatVectorValues(field).size(); + float percentFiltered = Math.max(0f, Math.min(1f, (float) acceptDocs.cost() / numVectors)); float visitRatio = DYNAMIC_VISIT_RATIO; // Search strategy may be null if this is being called from checkIndex (e.g. from a test) if (knnCollector.getSearchStrategy() instanceof IVFKnnSearchStrategy ivfSearchStrategy) { @@ -255,7 +252,8 @@ public final void search(String field, float[] target, KnnCollector knnCollector target, postListSlice ); - PostingVisitor scorer = getPostingVisitor(fieldInfo, postListSlice, target, acceptDocs); + Bits acceptDocsBits = acceptDocs.bits(); + PostingVisitor scorer = getPostingVisitor(fieldInfo, postListSlice, target, acceptDocsBits); long expectedDocs = 0; long actualDocs = 0; // initially we visit only the "centroids to search" @@ -271,7 +269,7 @@ public final void search(String field, float[] target, KnnCollector knnCollector expectedDocs += scorer.resetPostingsScorer(offsetAndLength.offset()); actualDocs += scorer.visit(knnCollector); } - if (acceptDocs != null) { + if (acceptDocsBits != null) { float unfilteredRatioVisited = (float) expectedDocs / numVectors; int filteredVectors = (int) Math.ceil(numVectors * percentFiltered); float expectedScored = Math.min(2 * filteredVectors * unfilteredRatioVisited, expectedDocs / 2f); @@ -284,7 +282,7 @@ public final void search(String field, float[] target, KnnCollector knnCollector } @Override - public final void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public final void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); final ByteVectorValues values = rawVectorsReader.getByteVectorValues(field); for (int i = 0; i < values.size(); i++) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java index 82f369662592f..661aefb3491fb 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsWriter.java @@ -24,11 +24,11 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.DataAccessHint; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.RandomAccessInput; -import org.apache.lucene.store.ReadAdvice; import org.apache.lucene.util.LongValues; import org.apache.lucene.util.VectorUtil; import org.elasticsearch.core.IOUtils; @@ -302,11 +302,11 @@ private void mergeOneFieldIVF(FieldInfo fieldInfo, MergeState mergeState) throws try ( IndexInput vectors = mergeState.segmentInfo.dir.openInput( tempRawVectorsFileName, - IOContext.DEFAULT.withReadAdvice(ReadAdvice.SEQUENTIAL) + IOContext.DEFAULT.withHints(DataAccessHint.SEQUENTIAL) ); IndexInput docs = docsFileName == null ? null - : mergeState.segmentInfo.dir.openInput(docsFileName, IOContext.DEFAULT.withReadAdvice(ReadAdvice.SEQUENTIAL)) + : mergeState.segmentInfo.dir.openInput(docsFileName, IOContext.DEFAULT.withHints(DataAccessHint.SEQUENTIAL)) ) { final FloatVectorValues floatVectorValues = getFloatVectorValues(fieldInfo, docs, vectors, numVectors); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java index 9d3b678c56d47..9dde74bad43ed 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es816/ES816BinaryQuantizedVectorsReader.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.store.ChecksumIndexInput; @@ -226,17 +227,17 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { rawVectorsReader.search(field, target, knnCollector, acceptDocs); } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { if (knnCollector.k() == 0) return; final RandomVectorScorer scorer = getRandomVectorScorer(field, target); if (scorer == null) return; OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); - Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); for (int i = 0; i < scorer.maxOrd(); i++) { if (acceptedOrds == null || acceptedOrds.get(i)) { collector.collect(i, scorer.score(i)); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java index 1feee9f642b32..1082faaca4256 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/ES818BinaryQuantizedVectorsReader.java @@ -32,6 +32,7 @@ import org.apache.lucene.index.SegmentReadState; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.store.ChecksumIndexInput; @@ -240,17 +241,17 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { rawVectorsReader.search(field, target, knnCollector, acceptDocs); } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { if (knnCollector.k() == 0) return; final RandomVectorScorer scorer = getRandomVectorScorer(field, target); if (scorer == null) return; OrdinalTranslatedKnnCollector collector = new OrdinalTranslatedKnnCollector(knnCollector, scorer::ordToDoc); - Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs); + Bits acceptedOrds = scorer.getAcceptOrds(acceptDocs.bits()); for (int i = 0; i < scorer.maxOrd(); i++) { if (acceptedOrds == null || acceptedOrds.get(i)) { collector.collect(i, scorer.score(i)); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java index e26784ecfdd96..0492d54ba57f3 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/es818/MergeReaderWrapper.java @@ -13,9 +13,9 @@ import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FloatVectorValues; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.hnsw.RandomVectorScorer; import org.elasticsearch.core.IOUtils; @@ -60,12 +60,12 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { mainReader.search(field, target, knnCollector, acceptDocs); } @Override - public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { mainReader.search(field, target, knnCollector, acceptDocs); } diff --git a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java index 598fb076ba222..24b0512d27598 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/index/engine/TranslogDirectoryReader.java @@ -40,6 +40,7 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.store.ByteBuffersDirectory; @@ -447,12 +448,12 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { getDelegate().searchNearestVectors(field, target, collector, acceptDocs); } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { getDelegate().searchNearestVectors(field, target, collector, acceptDocs); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java index d37f6c51d288d..5a540785bd9fc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; @@ -210,7 +211,7 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) { + public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) { throw new UnsupportedOperationException(); } @@ -255,7 +256,7 @@ public ByteVectorValues getByteVectorValues(String field) { } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) { + public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) { throw new UnsupportedOperationException(); } diff --git a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java index d2adb248ad019..7ce82290e206d 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/ExitableDirectoryReader.java @@ -23,11 +23,11 @@ import org.apache.lucene.index.QueryTimeout; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.VectorScorer; import org.apache.lucene.search.suggest.document.CompletionTerms; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; @@ -141,7 +141,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { if (queryCancellation.isEnabled() == false) { in.searchNearestVectors(field, target, collector, acceptDocs); return; @@ -159,7 +159,7 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { if (queryCancellation.isEnabled() == false) { in.searchNearestVectors(field, target, collector, acceptDocs); return; diff --git a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java index f03be3f09b7d2..8143e6ef28053 100644 --- a/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java +++ b/server/src/main/java/org/elasticsearch/search/internal/FieldUsageTrackingDirectoryReader.java @@ -30,9 +30,9 @@ import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.suggest.document.CompletionTerms; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.CompiledAutomaton; import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; @@ -221,7 +221,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { super.searchNearestVectors(field, target, collector, acceptDocs); if (collector.visitedCount() > 0) { notifier.onKnnVectorsUsed(field); @@ -229,7 +229,7 @@ public void searchNearestVectors(String field, byte[] target, KnnCollector colle } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { super.searchNearestVectors(field, target, collector, acceptDocs); if (collector.visitedCount() > 0) { notifier.onKnnVectorsUsed(field); diff --git a/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java b/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java index 50d94541fe666..e0b9be18d3980 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java @@ -15,11 +15,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldExistsQuery; -import org.apache.lucene.search.FilteredDocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.MatchNoDocsQuery; @@ -35,9 +34,6 @@ import org.apache.lucene.search.Weight; import org.apache.lucene.search.knn.KnnCollectorManager; import org.apache.lucene.search.knn.KnnSearchStrategy; -import org.apache.lucene.util.BitSet; -import org.apache.lucene.util.BitSetIterator; -import org.apache.lucene.util.Bits; import org.elasticsearch.search.profile.query.QueryProfiler; import java.io.IOException; @@ -185,10 +181,10 @@ private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, KnnCollec TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager, float visitRatio) throws IOException { final LeafReader reader = ctx.reader(); - final Bits liveDocs = reader.getLiveDocs(); if (filterWeight == null) { - return approximateSearch(ctx, liveDocs, Integer.MAX_VALUE, knnCollectorManager, visitRatio); + AcceptDocs acceptDocs = AcceptDocs.fromLiveDocs(reader.getLiveDocs(), reader.maxDoc()); + return approximateSearch(ctx, acceptDocs, Integer.MAX_VALUE, knnCollectorManager, visitRatio); } Scorer scorer = filterWeight.scorer(ctx); @@ -196,14 +192,14 @@ TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorM return TopDocsCollector.EMPTY_TOPDOCS; } - BitSet acceptDocs = createBitSet(scorer.iterator(), liveDocs, reader.maxDoc()); - final int cost = acceptDocs.cardinality(); + AcceptDocs acceptDocs = AcceptDocs.fromIteratorSupplier(scorer::iterator, reader.getLiveDocs(), reader.maxDoc()); + final int cost = acceptDocs.cost(); return approximateSearch(ctx, acceptDocs, cost + 1, knnCollectorManager, visitRatio); } abstract TopDocs approximateSearch( LeafReaderContext context, - Bits acceptDocs, + AcceptDocs acceptDocs, int visitedLimit, KnnCollectorManager knnCollectorManager, float visitRatio @@ -218,22 +214,6 @@ public final void profile(QueryProfiler queryProfiler) { queryProfiler.addVectorOpsCount(vectorOpsCount); } - BitSet createBitSet(DocIdSetIterator iterator, Bits liveDocs, int maxDoc) throws IOException { - if (liveDocs == null && iterator instanceof BitSetIterator bitSetIterator) { - // If we already have a BitSet and no deletions, reuse the BitSet - return bitSetIterator.getBitSet(); - } else { - // Create a new BitSet from matching and live docs - FilteredDocIdSetIterator filterIterator = new FilteredDocIdSetIterator(iterator) { - @Override - protected boolean match(int doc) { - return liveDocs == null || liveDocs.get(doc); - } - }; - return BitSet.of(filterIterator, maxDoc); - } - } - static class IVFCollectorManager implements KnnCollectorManager { private final int k; diff --git a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java index 30b37b11005b3..a711bb9b3c5ee 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java @@ -11,12 +11,12 @@ import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.knn.KnnCollectorManager; import org.apache.lucene.search.knn.KnnSearchStrategy; -import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.Arrays; @@ -76,7 +76,7 @@ public int hashCode() { @Override protected TopDocs approximateSearch( LeafReaderContext context, - Bits acceptDocs, + AcceptDocs acceptDocs, int visitedLimit, KnnCollectorManager knnCollectorManager, float visitRatio diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java index d9450b1332f7a..fdbf4679e6ab5 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/ES814HnswScalarQuantizedVectorsFormatTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MMapDirectory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; @@ -170,7 +171,13 @@ private void testSingleVectorPerSegment(VectorSimilarityFunction sim) throws Exc LeafReader leafReader = getOnlyLeafReader(reader); StoredFields storedFields = reader.storedFields(); float[] queryVector = new float[] { 0.6f, 0.8f }; - var hits = leafReader.searchNearestVectors("field", queryVector, 3, null, 100); + var hits = leafReader.searchNearestVectors( + "field", + queryVector, + 3, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + 100 + ); assertEquals(hits.scoreDocs.length, 3); assertEquals("B", storedFields.document(hits.scoreDocs[0].doc).get("id")); assertEquals("A", storedFields.document(hits.scoreDocs[1].doc).get("id")); diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java index 7fdd2a7313e48..a4224c3d3e79a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormatTests.java @@ -25,6 +25,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.VectorEncoding; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; @@ -191,7 +192,13 @@ public void testFewVectorManyTimes() throws IOException { for (LeafReaderContext r : subReaders) { LeafReader leafReader = r.reader(); float[] vector = randomVector(dimensions); - TopDocs topDocs = leafReader.searchNearestVectors("f", vector, 10, leafReader.getLiveDocs(), Integer.MAX_VALUE); + TopDocs topDocs = leafReader.searchNearestVectors( + "f", + vector, + 10, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(Math.min(leafReader.maxDoc(), 10), topDocs.scoreDocs.length); } @@ -219,7 +226,13 @@ public void testOneRepeatedVector() throws IOException { for (LeafReaderContext r : subReaders) { LeafReader leafReader = r.reader(); float[] vector = randomVector(dimensions); - TopDocs topDocs = leafReader.searchNearestVectors("f", vector, 10, leafReader.getLiveDocs(), Integer.MAX_VALUE); + TopDocs topDocs = leafReader.searchNearestVectors( + "f", + vector, + 10, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(Math.min(leafReader.maxDoc(), 10), topDocs.scoreDocs.length); } @@ -250,7 +263,13 @@ public void testWithThreads() throws Exception { for (; totSearch < numSearches && failed.get() == false; totSearch++) { float[] vector = randomVector(dimensions); LeafReader leafReader = getOnlyLeafReader(reader); - leafReader.searchNearestVectors("f", vector, 10, leafReader.getLiveDocs(), Integer.MAX_VALUE); + leafReader.searchNearestVectors( + "f", + vector, + 10, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); } assertTrue(totSearch > 0); } catch (Exception exc) { diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java index c6573398d2db6..c10fa9428bc13 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es816/ES816HnswBinaryQuantizedVectorsFormatTests.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase; @@ -99,7 +100,13 @@ public void testSingleVectorCase() throws Exception { while (docIndexIterator.nextDoc() != NO_MORE_DOCS) { assertArrayEquals(vector, vectorValues.vectorValue(docIndexIterator.index()), 0.00001f); } - TopDocs td = r.searchNearestVectors("f", randomVector(vector.length), 1, null, Integer.MAX_VALUE); + TopDocs td = r.searchNearestVectors( + "f", + randomVector(vector.length), + 1, + AcceptDocs.fromLiveDocs(r.getLiveDocs(), r.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(1, td.totalHits.value()); assertTrue(td.scoreDocs[0].score >= 0); } diff --git a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java index 35bac97013487..bd1ef28351981 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/vectors/es818/ES818HnswBinaryQuantizedVectorsFormatTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.misc.store.DirectIODirectory; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -118,7 +119,13 @@ public void testSingleVectorCase() throws Exception { } float[] randomVector = randomVector(vector.length); float trueScore = similarityFunction.compare(vector, randomVector); - TopDocs td = r.searchNearestVectors("f", randomVector, 1, null, Integer.MAX_VALUE); + TopDocs td = r.searchNearestVectors( + "f", + randomVector, + 1, + AcceptDocs.fromLiveDocs(r.getLiveDocs(), r.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(1, td.totalHits.value()); assertTrue(td.scoreDocs[0].score >= 0); // When it's the only vector in a segment, the score should be very close to the true score diff --git a/server/src/test/java/org/elasticsearch/search/SearchCancellationTests.java b/server/src/test/java/org/elasticsearch/search/SearchCancellationTests.java index aa2e76f512cc8..329f8806b552c 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchCancellationTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchCancellationTests.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.TotalHitCountCollectorManager; @@ -201,7 +202,13 @@ public void testExitableDirectoryReaderVectors() throws IOException { expectThrows(TaskCancelledException.class, () -> leaf.getFloatVectorValues(KNN_FIELD_NAME)); expectThrows( TaskCancelledException.class, - () -> leaf.searchNearestVectors(KNN_FIELD_NAME, new float[] { 1f, 1f, 1f }, 2, leaf.getLiveDocs(), Integer.MAX_VALUE) + () -> leaf.searchNearestVectors( + KNN_FIELD_NAME, + new float[] { 1f, 1f, 1f }, + 2, + AcceptDocs.fromLiveDocs(leaf.getLiveDocs(), leaf.maxDoc()), + Integer.MAX_VALUE + ) ); cancelled.set(false); // Avoid exception during construction of the wrapper objects diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java index 06600441b0a44..505e4c09aba1a 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/composite/CompositeValuesCollectorQueueTests.java @@ -35,6 +35,7 @@ import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.KnnCollector; @@ -490,12 +491,13 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) + throws IOException { } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java index 12864dd66a857..65dff572f9037 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/index/engine/frozen/RewriteCachingDirectoryReader.java @@ -26,6 +26,7 @@ import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -229,12 +230,12 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { throw new UnsupportedOperationException(); } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { throw new UnsupportedOperationException(); } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java index adf12490a7d90..d96dce01e25fe 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java @@ -28,8 +28,8 @@ import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FilterIterator; import org.apache.lucene.util.automaton.CharacterRunAutomaton; @@ -299,7 +299,7 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { if (hasField(field)) { super.searchNearestVectors(field, target, collector, acceptDocs); } @@ -311,7 +311,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector collector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector collector, AcceptDocs acceptDocs) throws IOException { if (hasField(field)) { super.searchNearestVectors(field, target, collector, acceptDocs); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java index 0a072a685fd1a..b5918f0b388bd 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java @@ -46,6 +46,7 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; @@ -211,13 +212,25 @@ public void testKnnVectors() throws Exception { assertEquals(0, iterator.nextDoc()); assertNotNull(vectorValues.vectorValue(iterator.index())); - TopDocs topDocs = leafReader.searchNearestVectors("fieldA", new float[] { 1.0f, 1.0f, 1.0f }, 5, null, Integer.MAX_VALUE); + TopDocs topDocs = leafReader.searchNearestVectors( + "fieldA", + new float[] { 1.0f, 1.0f, 1.0f }, + 5, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertNotNull(topDocs); assertEquals(1, topDocs.scoreDocs.length); // Check that we can't see fieldB assertNull(leafReader.getFloatVectorValues("fieldB")); - topDocs = leafReader.searchNearestVectors("fieldB", new float[] { 1.0f, 1.0f, 1.0f }, 5, null, Integer.MAX_VALUE); + topDocs = leafReader.searchNearestVectors( + "fieldB", + new float[] { 1.0f, 1.0f, 1.0f }, + 5, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(0, topDocs.totalHits.value()); assertEquals(0, topDocs.scoreDocs.length); @@ -246,13 +259,25 @@ public void testKnnByteVectors() throws Exception { assertEquals(0, iterator.nextDoc()); assertNotNull(vectorValues.vectorValue(iterator.index())); - TopDocs topDocs = leafReader.searchNearestVectors("fieldA", new byte[] { 1, 1, 1 }, 5, null, Integer.MAX_VALUE); + TopDocs topDocs = leafReader.searchNearestVectors( + "fieldA", + new byte[] { 1, 1, 1 }, + 5, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertNotNull(topDocs); assertEquals(1, topDocs.scoreDocs.length); // Check that we can't see fieldB assertNull(leafReader.getByteVectorValues("fieldB")); - topDocs = leafReader.searchNearestVectors("fieldB", new byte[] { 1, 1, 1 }, 5, null, Integer.MAX_VALUE); + topDocs = leafReader.searchNearestVectors( + "fieldB", + new byte[] { 1, 1, 1 }, + 5, + AcceptDocs.fromLiveDocs(leafReader.getLiveDocs(), leafReader.maxDoc()), + Integer.MAX_VALUE + ); assertEquals(0, topDocs.totalHits.value()); assertEquals(0, topDocs.scoreDocs.length); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSliceQueueTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSliceQueueTests.java index 6054401fb58d5..79f1fcc86e2c3 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSliceQueueTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/LuceneSliceQueueTests.java @@ -23,6 +23,7 @@ import org.apache.lucene.index.StoredFields; import org.apache.lucene.index.TermVectors; import org.apache.lucene.index.Terms; +import org.apache.lucene.search.AcceptDocs; import org.apache.lucene.search.KnnCollector; import org.apache.lucene.util.Bits; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; @@ -303,12 +304,13 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException { } @Override - public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) + throws IOException { throw new UnsupportedOperationException(); } @Override - public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException { + public void searchNearestVectors(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs) throws IOException { throw new UnsupportedOperationException(); } From c83f4e9bc995946ddb125342999568f8e3213aea Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 26 Aug 2025 06:12:51 +0000 Subject: [PATCH 166/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-586dc103361 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 90698e13b6f2d..a4769f26a99c8 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-df641166392 +lucene = 10.3.0-snapshot-586dc103361 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 00c9a204d9130..418e2ec137fdf 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3040,129 +3040,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 050319791ae6a44a181a069675c7158df9ad69d4 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Wed, 27 Aug 2025 06:13:05 +0000 Subject: [PATCH 167/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-7982423a6d3 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index a4769f26a99c8..5be43a1635a6a 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-586dc103361 +lucene = 10.3.0-snapshot-7982423a6d3 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 34f16840da051..ac3a12b99800c 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3097,129 +3097,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 095c2353e0371841a97f2900dfc43464c69fcfc1 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Thu, 28 Aug 2025 06:13:45 +0000 Subject: [PATCH 168/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-fa706145062 --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 5be43a1635a6a..9d081979f9193 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-7982423a6d3 +lucene = 10.3.0-snapshot-fa706145062 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index ac3a12b99800c..e6ab8198c8c9d 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3097,129 +3097,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 0be667cf751298fe34613b8a7520362b57473632 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 29 Aug 2025 06:13:26 +0000 Subject: [PATCH 169/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-de669618c3c --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 9d081979f9193..1b90af9476ad2 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-fa706145062 +lucene = 10.3.0-snapshot-de669618c3c bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c7f0a4983c361..a2ee1a7c339d7 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3157,129 +3157,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 65cf4d29dad82736d7ec7b85ff14ea86955f530d Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sat, 30 Aug 2025 06:13:10 +0000 Subject: [PATCH 170/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-d664abb181a --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 1b90af9476ad2..70b45f1186a97 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-de669618c3c +lucene = 10.3.0-snapshot-d664abb181a bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index a2ee1a7c339d7..9fd60481e298f 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3157,129 +3157,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From e6bc7803e84e25023ab35f1593fa6cda8ec5fd28 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Sun, 31 Aug 2025 06:12:41 +0000 Subject: [PATCH 171/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-878a3db9c2d --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 70b45f1186a97..8f7ef45226e43 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-d664abb181a +lucene = 10.3.0-snapshot-878a3db9c2d bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 9fd60481e298f..d5467a7cc9831 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3157,129 +3157,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 4b3b2f73df3576e6dc5a5d70c964404ae169d34b Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Mon, 1 Sep 2025 06:14:14 +0000 Subject: [PATCH 172/184] [Automated] Update Lucene snapshot to 10.3.0-snapshot-878a3db9c2d --- gradle/verification-metadata.xml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index d5467a7cc9831..8c1b64b3f71e8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3159,127 +3159,127 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + From 876ea52ce411931724f7dbdd50d2cf331c47e470 Mon Sep 17 00:00:00 2001 From: Simon Cooper Date: Mon, 1 Sep 2025 10:13:44 +0100 Subject: [PATCH 173/184] Update ValuesSourceReaderOperatorTests test condition for changes in lucene (#133807) --- muted-tests.yml | 6 ------ .../lucene/read/ValuesSourceReaderOperatorTests.java | 2 +- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index 6c932c6dc484c..3cea1dd5afd9e 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -393,12 +393,6 @@ tests: - class: org.elasticsearch.packaging.test.DockerTests method: test010Install issue: https://github.com/elastic/elasticsearch/issues/131376 -- class: org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorTests - method: testLoadLongShuffledManySegments - issue: https://github.com/elastic/elasticsearch/issues/132258 -- class: org.elasticsearch.compute.lucene.read.ValuesSourceReaderOperatorTests - method: testLoadLongManySegments - issue: https://github.com/elastic/elasticsearch/issues/132258 - class: org.elasticsearch.packaging.test.DockerTests method: test151MachineDependentHeapWithSizeOverride issue: https://github.com/elastic/elasticsearch/issues/123437 diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 3291ef4b5a2e2..05981ed3803da 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -943,7 +943,7 @@ private void testLoadLong(boolean shuffle, boolean manySegments) throws IOExcept DriverContext driverContext = driverContext(); List input = CannedSourceOperator.collectPages(sourceOperator(driverContext, numDocs)); - assertThat(reader.leaves(), hasSize(manySegments ? greaterThan(5) : equalTo(1))); + assertThat(reader.leaves(), hasSize(manySegments ? greaterThan(1) : equalTo(1))); assertThat(input, hasSize(reader.leaves().size())); if (manySegments) { input = List.of(CannedSourceOperator.mergePages(input)); From ad3bd375beddbe0224f6e5fdf1518537b9217bf3 Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Tue, 2 Sep 2025 06:12:07 +0000 Subject: [PATCH 174/184] [Automated] Update Lucene snapshot to 10.4.0-snapshot-28bd476b1a1 --- build-tools-internal/version.properties | 2 +- docs/Versions.asciidoc | 4 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 3 files changed, 78 insertions(+), 78 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 8f7ef45226e43..f4c9aa7a422b3 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-878a3db9c2d +lucene = 10.4.0-snapshot-28bd476b1a1 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index aa02432b42b2a..738069856228f 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -1,8 +1,8 @@ include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[] -:lucene_version: 10.3.0 -:lucene_version_path: 10_3_0 +:lucene_version: 10.4.0 +:lucene_version_path: 10_4_0 :jdk: 11.0.2 :jdk_major: 11 :build_type: tar diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index 8c1b64b3f71e8..b08d028fd5c06 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3157,129 +3157,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From cf9052a370aa9c65e6c759b0d7636d554f059552 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 2 Sep 2025 14:05:41 +0100 Subject: [PATCH 175/184] Use last lucene 10.3 snapshot --- build-tools-internal/version.properties | 2 +- gradle/verification-metadata.xml | 150 ++++++++++++------------ 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index f6f4f81589ad3..8f7ef45226e43 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.2.2 +lucene = 10.3.0-snapshot-878a3db9c2d bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c31b3b420b812..8c1b64b3f71e8 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3157,129 +3157,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From dc1e276b9b569a4ab27a7ec40d25e619119b894b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 2 Sep 2025 14:22:50 +0100 Subject: [PATCH 176/184] correct docs version --- docs/Versions.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Versions.asciidoc b/docs/Versions.asciidoc index 738069856228f..aa02432b42b2a 100644 --- a/docs/Versions.asciidoc +++ b/docs/Versions.asciidoc @@ -1,8 +1,8 @@ include::{docs-root}/shared/versions/stack/{source_branch}.asciidoc[] -:lucene_version: 10.4.0 -:lucene_version_path: 10_4_0 +:lucene_version: 10.3.0 +:lucene_version_path: 10_3_0 :jdk: 11.0.2 :jdk_major: 11 :build_type: tar From d994c940f341e4aadf99b566d9087ba4149b615e Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Mon, 8 Sep 2025 12:56:41 -0400 Subject: [PATCH 177/184] Aggs: Fix test (#134308) Make sure that data lands in both ranges so we properly emulate the non-optimized case. Closes #128471 --- .../aggregations/bucket/range/RangeAggregatorTests.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 837dae1d56293..aa7e312f601af 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -592,11 +592,10 @@ public void testOverlappingRanges() throws IOException { /** * If the top level query is a runtime field we use the standard aggregator * because it's marginally faster. You'd expect it to be a *ton* faster but - * usually the ranges drive the iteration and they are still fairly fast. + * usually the ranges drive the iteration, and they are still fairly fast. * But the union operation overhead that comes with combining the range with * the top level query tends to slow us down more than the standard aggregator. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/128471") public void testRuntimeFieldTopLevelQueryNotOptimized() throws IOException { long totalDocs = (long) RangeAggregator.DOCS_PER_RANGE_TO_USE_FILTERS * 4; SearchLookup lookup = new SearchLookup(s -> null, (ft, l, ftd) -> null, (ctx, doc) -> null); @@ -609,7 +608,8 @@ public void execute() { Query query = new StringScriptFieldTermQuery(new Script("dummy"), scriptFactory, "dummy", "cat", false); debugTestCase(new RangeAggregationBuilder("r").field(NUMBER_FIELD_NAME).addRange(0, 1).addRange(1, 2).addRange(2, 3), query, iw -> { for (int d = 0; d < totalDocs; d++) { - iw.addDocument(List.of(new IntPoint(NUMBER_FIELD_NAME, 0), new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 0))); + int v = d % 2; + iw.addDocument(List.of(new IntPoint(NUMBER_FIELD_NAME, v), new SortedNumericDocValuesField(NUMBER_FIELD_NAME, v))); } }, (InternalRange r, Class impl, Map> debug) -> { assertThat( @@ -620,7 +620,7 @@ public void execute() { assertThat(r.getBuckets().stream().map(InternalRange.Bucket::getTo).collect(toList()), equalTo(List.of(1.0, 2.0, 3.0))); assertThat( r.getBuckets().stream().map(InternalRange.Bucket::getDocCount).collect(toList()), - equalTo(List.of(totalDocs, 0L, 0L)) + equalTo(List.of(totalDocs / 2, totalDocs / 2, 0L)) ); assertThat(impl, equalTo(RangeAggregator.NoOverlap.class)); assertMap( From de796ab3907aec6def7474a5c96cb76b187784bc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 9 Sep 2025 16:04:12 +0100 Subject: [PATCH 178/184] Update to Lucene 10.3.0 RC2 --- build-tools-internal/version.properties | 2 +- build.gradle | 5 + gradle/verification-metadata.xml | 150 ++++++++++++------------ 3 files changed, 81 insertions(+), 76 deletions(-) diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index bba2dc37463e5..5fce6461ae799 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,5 +1,5 @@ elasticsearch = 9.2.0 -lucene = 10.3.0-snapshot-878a3db9c2d +lucene = 10.3.0 bundled_jdk_vendor = openjdk bundled_jdk = 24.0.2+12@fdc5d0102fe0414db21410ad5834341f diff --git a/build.gradle b/build.gradle index d26d4d73748e4..2b6a839c1b5a5 100644 --- a/build.gradle +++ b/build.gradle @@ -300,6 +300,11 @@ if (project.gradle.startParameter.taskNames.any { it.startsWith("checkPart") || subprojects { proj -> apply plugin: 'elasticsearch.base' + +repositories { + // TODO: Temporary for Lucene RC builds. REMOVE + maven { url = "https://dist.apache.org/repos/dist/dev/lucene/lucene-10.3.0-RC2-rev-e2871287e4a378739f0b74081d124e3668347875/lucene/maven" } + } } allprojects { diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index c595f28e53230..43345e18939b3 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -3172,129 +3172,129 @@ - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + - - - + + + From 087f46842ad0ee0db4c37cc5bcb2e9a08704511e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Sep 2025 08:43:32 +0100 Subject: [PATCH 179/184] Use released lucene 10.3.0 --- build.gradle | 5 ----- 1 file changed, 5 deletions(-) diff --git a/build.gradle b/build.gradle index 2b6a839c1b5a5..d26d4d73748e4 100644 --- a/build.gradle +++ b/build.gradle @@ -300,11 +300,6 @@ if (project.gradle.startParameter.taskNames.any { it.startsWith("checkPart") || subprojects { proj -> apply plugin: 'elasticsearch.base' - -repositories { - // TODO: Temporary for Lucene RC builds. REMOVE - maven { url = "https://dist.apache.org/repos/dist/dev/lucene/lucene-10.3.0-RC2-rev-e2871287e4a378739f0b74081d124e3668347875/lucene/maven" } - } } allprojects { From b4e0da79b28fdfaeda13f5b81d968a061a9dcf3e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Sep 2025 12:26:03 +0100 Subject: [PATCH 180/184] Update docs/changelog/133980.yaml --- docs/changelog/133980.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/133980.yaml diff --git a/docs/changelog/133980.yaml b/docs/changelog/133980.yaml new file mode 100644 index 0000000000000..70797285c5062 --- /dev/null +++ b/docs/changelog/133980.yaml @@ -0,0 +1,5 @@ +pr: 133980 +summary: Upgrade elasticsearch to lucene 10.3.0 +area: Search +type: upgrade +issues: [] From 3142bb946a55b55a15ff29e2daee5ad4f83b425b Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Sep 2025 12:32:29 +0100 Subject: [PATCH 181/184] Update 133980.yaml --- docs/changelog/133980.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/changelog/133980.yaml b/docs/changelog/133980.yaml index 70797285c5062..fb31d16e3e49c 100644 --- a/docs/changelog/133980.yaml +++ b/docs/changelog/133980.yaml @@ -3,3 +3,7 @@ summary: Upgrade elasticsearch to lucene 10.3.0 area: Search type: upgrade issues: [] +highlight: + title: "New lucene 10.3.0 release" + body: |- + - Improved performance for lexical, vector and primary-key searches From 97a0d15100a031f750f06bb50f24baff3afb5b1c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 15 Sep 2025 13:24:30 +0100 Subject: [PATCH 182/184] Adjust commit sizes to reflect change in TieredMergePolicy --- .../lucene/read/ValuesSourceReaderOperatorTests.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java index 05981ed3803da..2ff63697f19b1 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/read/ValuesSourceReaderOperatorTests.java @@ -178,7 +178,7 @@ protected SourceOperator simpleInput(BlockFactory blockFactory, int size) { } private int commitEvery(int numDocs) { - return Math.max(1, (int) Math.ceil((double) numDocs / 10)); + return Math.max(1, (int) Math.ceil((double) numDocs / 8)); } private SourceOperator simpleInput(DriverContext context, int size, int commitEvery, int pageSize) { @@ -723,8 +723,8 @@ private void testLoadAllStatus(boolean allInOnePage) { DriverContext driverContext = driverContext(); int numDocs = between(100, 5000); List input = CannedSourceOperator.collectPages(simpleInput(driverContext, numDocs, commitEvery(numDocs), numDocs)); - assertThat(reader.leaves(), hasSize(10)); - assertThat(input, hasSize(10)); + assertThat(reader.leaves(), hasSize(8)); + assertThat(input, hasSize(8)); List cases = infoAndChecksForEachType( Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING, Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING From 94cdb01e5fb0f8d650e48b145cee4945e0281e6f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 16 Sep 2025 09:43:17 +0100 Subject: [PATCH 183/184] Small fixes --- .buildkite/pipelines/lucene-snapshot/build-snapshot.yml | 4 +--- .../src/main/java/org/elasticsearch/index/IndexVersions.java | 3 +-- .../main/java/org/elasticsearch/test/cluster/FeatureFlag.java | 3 +-- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml index 428a471e77faa..d4e066bd45f31 100644 --- a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml +++ b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml @@ -2,9 +2,7 @@ steps: - trigger: apache-lucene-build-snapshot label: Trigger pipeline to build lucene snapshot key: lucene-build - if: (build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == "") - build: - branch: branch_10x + if: build.env("LUCENE_BUILD_ID") == null || build.env("LUCENE_BUILD_ID") == "" - wait - label: Upload and update lucene snapshot command: .buildkite/scripts/lucene-snapshot/upload-snapshot.sh diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index c1d69fa75effb..539ff9be56379 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -183,8 +183,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2); public static final IndexVersion EXCLUDE_SOURCE_VECTORS_DEFAULT = def(9_035_0_00, Version.LUCENE_10_2_2); public static final IndexVersion DISABLE_NORMS_BY_DEFAULT_FOR_LOGSDB_AND_TSDB = def(9_036_0_00, Version.LUCENE_10_2_2); - - public static final IndexVersion UPGRADE_TO_LUCENE_10_3_0 = def(9_050_00_0, Version.LUCENE_10_3_0); + public static final IndexVersion UPGRADE_TO_LUCENE_10_3_0 = def(9_037_0_00, Version.LUCENE_10_3_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java index 5f3cf0a6463d7..f6492c1e19f03 100644 --- a/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java +++ b/test/test-clusters/src/main/java/org/elasticsearch/test/cluster/FeatureFlag.java @@ -20,8 +20,7 @@ public enum FeatureFlag { SUB_OBJECTS_AUTO_ENABLED("es.sub_objects_auto_feature_flag_enabled=true", Version.fromString("8.16.0"), null), DOC_VALUES_SKIPPER("es.doc_values_skipper_feature_flag_enabled=true", Version.fromString("8.18.1"), null), USE_LUCENE101_POSTINGS_FORMAT("es.use_lucene101_postings_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null), - USE_LUCENE1013POSTINGS_FORMAT("es.use_lucene103_postings_format_feature_flag_enabled=true", Version.fromString("9.2.0"), null), - INFERENCE_CUSTOM_SERVICE_ENABLED("es.inference_custom_service_feature_flag_enabled=true", Version.fromString("8.19.0"), null), + USE_LUCENE103_POSTINGS_FORMAT("es.use_lucene103_postings_format_feature_flag_enabled=true", Version.fromString("9.2.0"), null), IVF_FORMAT("es.ivf_format_feature_flag_enabled=true", Version.fromString("9.1.0"), null), LOGS_STREAM("es.logs_stream_feature_flag_enabled=true", Version.fromString("9.1.0"), null), PATTERNED_TEXT("es.patterned_text_feature_flag_enabled=true", Version.fromString("9.1.0"), null), From da8c2bdc9163fb5beb572124d22d3fa8c1b9a77b Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Tue, 16 Sep 2025 09:27:19 -0400 Subject: [PATCH 184/184] Update 133980.yaml --- docs/changelog/133980.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/changelog/133980.yaml b/docs/changelog/133980.yaml index fb31d16e3e49c..b0248c2f5bc06 100644 --- a/docs/changelog/133980.yaml +++ b/docs/changelog/133980.yaml @@ -7,3 +7,6 @@ highlight: title: "New lucene 10.3.0 release" body: |- - Improved performance for lexical, vector and primary-key searches + - Use optimistic-with-checking KNN Query execution strategy in place of cross-thread global queue min-score checking. Improves performance and consistency. + - Bulk scoring added for floating point vectors in HNSW. Improving query latency and indexing throughput + - Multiple improvements to HNSW graph traversal and storage