elastic · ChrisHegarty · May 23, 2025 · May 21, 2025 · May 21, 2025 · May 22, 2025
diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java
@@ -461,7 +461,8 @@
             org.elasticsearch.index.codec.Elasticsearch814Codec,
             org.elasticsearch.index.codec.Elasticsearch816Codec,
             org.elasticsearch.index.codec.Elasticsearch900Codec,
-            org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
+            org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec,
+            org.elasticsearch.index.codec.Elasticsearch902Lucene103Codec;
 
     provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;
 

diff --git a/...rc/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java b/...rc/main/java/org/elasticsearch/action/admin/indices/diskusage/IndexDiskUsageAnalyzer.java
@@ -10,6 +10,7 @@
 package org.elasticsearch.action.admin.indices.diskusage;
 
 import org.apache.logging.log4j.Logger;
+import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
 import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
 import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
 import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
@@ -22,7 +23,7 @@
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.StoredFieldsReader;
 import org.apache.lucene.codecs.TermVectorsReader;
-import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
+import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.ByteVectorValues;
 import org.apache.lucene.index.DirectoryReader;
@@ -318,6 +319,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
     private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
         if (term != null && termsEnum.seekExact(term)) {
             final TermState termState = termsEnum.termState();
+            if (termState instanceof final Lucene103PostingsFormat.IntBlockTermState blockTermState) {
+                return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
+            }
             if (termState instanceof final Lucene101PostingsFormat.IntBlockTermState blockTermState) {
                 return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
             }

diff --git a/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/server/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@@ -93,7 +93,7 @@
 
 public class Lucene {
 
-    public static final String LATEST_CODEC = "Lucene101";
+    public static final String LATEST_CODEC = "Lucene103";
 
     public static final String SOFT_DELETES_FIELD = "__soft_deletes";
 

diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java
@@ -12,7 +12,7 @@
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.FieldInfosFormat;
 import org.apache.lucene.codecs.FilterCodec;
-import org.apache.lucene.codecs.lucene101.Lucene101Codec;
+import org.apache.lucene.codecs.lucene103.Lucene103Codec;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.FeatureFlag;
 import org.elasticsearch.core.Nullable;
@@ -46,7 +46,7 @@ public class CodecService implements CodecProvider {
     public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
         final var codecs = new HashMap<String, Codec>();
 
-        Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, bigArrays);
+        Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
         if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
             codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
         } else {
@@ -58,7 +58,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
             BEST_COMPRESSION_CODEC,
             new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
         );
-        Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
+        Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
         codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);
 
         codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());

diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java
@@ -9,12 +9,12 @@
 
 package org.elasticsearch.index.codec;
 
+import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec;
+import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.StoredFieldsFormat;
-import org.apache.lucene.codecs.lucene101.Lucene101Codec;
-import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
 import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
 import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;

diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch902Lucene103Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch902Lucene103Codec.java
@@ -0,0 +1,131 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.index.codec;
+
+import org.apache.lucene.codecs.DocValuesFormat;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.PostingsFormat;
+import org.apache.lucene.codecs.StoredFieldsFormat;
+import org.apache.lucene.codecs.lucene103.Lucene103Codec;
+import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
+import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
+import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
+import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat;
+import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;
+
+/**
+ * Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed
+ * stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
+ */
+public class Elasticsearch902Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec {
+
+    private final StoredFieldsFormat storedFieldsFormat;
+
+    private final PostingsFormat defaultPostingsFormat;
+    private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
+        @Override
+        public PostingsFormat getPostingsFormatForField(String field) {
+            return Elasticsearch902Lucene103Codec.this.getPostingsFormatForField(field);
+        }
+    };
+
+    private final DocValuesFormat defaultDVFormat;
+    private final DocValuesFormat docValuesFormat = new XPerFieldDocValuesFormat() {
+        @Override
+        public DocValuesFormat getDocValuesFormatForField(String field) {
+            return Elasticsearch902Lucene103Codec.this.getDocValuesFormatForField(field);
+        }
+    };
+
+    private final KnnVectorsFormat defaultKnnVectorsFormat;
+    private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
+        @Override
+        public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+            return Elasticsearch902Lucene103Codec.this.getKnnVectorsFormatForField(field);
+        }
+    };
+
+    /** Public no-arg constructor, needed for SPI loading at read-time. */
+    public Elasticsearch902Lucene103Codec() {
+        this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
+    }
+
+    /**
+     * Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
+     * worse space-efficiency or vice-versa.
+     */
+    public Elasticsearch902Lucene103Codec(Zstd814StoredFieldsFormat.Mode mode) {
+        super("Elasticsearch902Lucene103", new Lucene103Codec());
+        this.storedFieldsFormat = mode.getFormat();
+        this.defaultPostingsFormat = new Lucene103PostingsFormat();
+        this.defaultDVFormat = new Lucene90DocValuesFormat();
+        this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
+    }
+
+    @Override
+    public StoredFieldsFormat storedFieldsFormat() {
+        return storedFieldsFormat;
+    }
+
+    @Override
+    public final PostingsFormat postingsFormat() {
+        return postingsFormat;
+    }
+
+    @Override
+    public final DocValuesFormat docValuesFormat() {
+        return docValuesFormat;
+    }
+
+    @Override
+    public final KnnVectorsFormat knnVectorsFormat() {
+        return knnVectorsFormat;
+    }
+
+    /**
+     * Returns the postings format that should be used for writing new segments of <code>field</code>.
+     *
+     * <p>The default implementation always returns "Lucene912".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation,
+     */
+    public PostingsFormat getPostingsFormatForField(String field) {
+        return defaultPostingsFormat;
+    }
+
+    /**
+     * Returns the docvalues format that should be used for writing new segments of <code>field</code>
+     * .
+     *
+     * <p>The default implementation always returns "Lucene912".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public DocValuesFormat getDocValuesFormatForField(String field) {
+        return defaultDVFormat;
+    }
+
+    /**
+     * Returns the vectors format that should be used for writing new segments of <code>field</code>
+     *
+     * <p>The default implementation always returns "Lucene912".
+     *
+     * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
+     * future version of Lucene are only guaranteed to be able to read the default implementation.
+     */
+    public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
+        return defaultKnnVectorsFormat;
+    }
+
+}
diff --git a/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/LegacyPerFieldMapperCodec.java
@@ -13,7 +13,7 @@
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene101.Lucene101Codec;
+import org.apache.lucene.codecs.lucene103.Lucene103Codec;
 import org.elasticsearch.common.lucene.Lucene;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.index.mapper.MapperService;
@@ -22,11 +22,11 @@
  * Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
  * changes in {@link PerFieldMapperCodec}.
  */
-public final class LegacyPerFieldMapperCodec extends Lucene101Codec {
+public final class LegacyPerFieldMapperCodec extends Lucene103Codec {
 
     private final PerFieldFormatSupplier formatSupplier;
 
-    public LegacyPerFieldMapperCodec(Lucene101Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
+    public LegacyPerFieldMapperCodec(Lucene103Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
         super(compressionMode);
         this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
         // If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch

diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldFormatSupplier.java
@@ -9,10 +9,11 @@
 
 package org.elasticsearch.index.codec;
 
+import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.PostingsFormat;
-import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
+import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
 import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
 import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
 import org.elasticsearch.common.util.BigArrays;
@@ -35,12 +36,12 @@
  */
 public class PerFieldFormatSupplier {
     public static final FeatureFlag USE_LUCENE101_POSTINGS_FORMAT = new FeatureFlag("use_lucene101_postings_format");
+    public static final FeatureFlag USE_LUCENE103_POSTINGS_FORMAT = new FeatureFlag("use_lucene103_postings_format");
 
     private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
     private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
     private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
     private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
-    private static final Lucene101PostingsFormat lucene101PostingsFormat = new Lucene101PostingsFormat();
     private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");
 
     private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
@@ -53,14 +54,19 @@ public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays)
         this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);
 
         if (mapperService != null
+            && USE_LUCENE103_POSTINGS_FORMAT.isEnabled()
+            && mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.UPGRADE_TO_LUCENE_10_3_0)
+            && mapperService.getIndexSettings().getMode() == IndexMode.STANDARD) {
+            defaultPostingsFormat = new Lucene103PostingsFormat();
+        } else if (mapperService != null
             && USE_LUCENE101_POSTINGS_FORMAT.isEnabled()
             && mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.USE_LUCENE101_POSTINGS_FORMAT)
             && mapperService.getIndexSettings().getMode() == IndexMode.STANDARD) {
-            defaultPostingsFormat = lucene101PostingsFormat;
-        } else {
-            // our own posting format using PFOR
-            defaultPostingsFormat = es812PostingsFormat;
-        }
+                defaultPostingsFormat = new Lucene101PostingsFormat();
+            } else {
+                // our own posting format using PFOR
+                defaultPostingsFormat = es812PostingsFormat;
+            }
     }
 
     public PostingsFormat getPostingsFormatForField(String field) {

diff --git a/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java b/server/src/main/java/org/elasticsearch/index/codec/PerFieldMapperCodec.java
@@ -26,7 +26,7 @@
  * per index in real time via the mapping API. If no specific postings format or vector format is
  * configured for a specific field the default postings or vector format is used.
  */
-public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec {
+public final class PerFieldMapperCodec extends Elasticsearch902Lucene103Codec {
 
     private final PerFieldFormatSupplier formatSupplier;
 

diff --git a/server/src/main/java/org/elasticsearch/index/codec/postings/CompressionAlgorithm.java b/server/src/main/java/org/elasticsearch/index/codec/postings/CompressionAlgorithm.java
@@ -0,0 +1,76 @@
+/*
+ * @notice
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Modifications copyright (C) 2025 Elasticsearch B.V.
+ */
+package org.elasticsearch.index.codec.postings;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.util.compress.LowercaseAsciiCompression;
+
+import java.io.IOException;
+
+/** Compression algorithm used for suffixes of a block of terms. */
+public enum CompressionAlgorithm {
+    NO_COMPRESSION(0x00) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            in.readBytes(out, 0, len);
+        }
+    },
+
+    LOWERCASE_ASCII(0x01) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            LowercaseAsciiCompression.decompress(in, out, len);
+        }
+    },
+
+    LZ4(0x02) {
+
+        @Override
+        void read(DataInput in, byte[] out, int len) throws IOException {
+            org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0);
+        }
+    };
+
+    private static final CompressionAlgorithm[] BY_CODE = new CompressionAlgorithm[3];
+
+    static {
+        for (CompressionAlgorithm alg : CompressionAlgorithm.values()) {
+            BY_CODE[alg.code] = alg;
+        }
+    }
+
+    /** Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}. */
+    static CompressionAlgorithm byCode(int code) {
+        if (code < 0 || code >= BY_CODE.length) {
+            throw new IllegalArgumentException("Illegal code for a compression algorithm: " + code);
+        }
+        return BY_CODE[code];
+    }
+
+    public final int code;
+
+    CompressionAlgorithm(int code) {
+        this.code = code;
+    }
+
+    abstract void read(DataInput in, byte[] out, int len) throws IOException;
+}