Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion server/src/main/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,8 @@
org.elasticsearch.index.codec.Elasticsearch814Codec,
org.elasticsearch.index.codec.Elasticsearch816Codec,
org.elasticsearch.index.codec.Elasticsearch900Codec,
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec;
org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec,
org.elasticsearch.index.codec.Elasticsearch902Lucene103Codec;

provides org.apache.logging.log4j.core.util.ContextDataProvider with org.elasticsearch.common.logging.DynamicContextDataProvider;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
package org.elasticsearch.action.admin.indices.diskusage;

import org.apache.logging.log4j.Logger;
import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.backward_codecs.lucene50.Lucene50PostingsFormat;
import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat;
import org.apache.lucene.backward_codecs.lucene90.Lucene90PostingsFormat;
Expand All @@ -22,7 +23,7 @@
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DirectoryReader;
Expand Down Expand Up @@ -318,6 +319,9 @@ private static void readProximity(Terms terms, PostingsEnum postings) throws IOE
private static BlockTermState getBlockTermState(TermsEnum termsEnum, BytesRef term) throws IOException {
if (term != null && termsEnum.seekExact(term)) {
final TermState termState = termsEnum.termState();
if (termState instanceof final Lucene103PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
if (termState instanceof final Lucene101PostingsFormat.IntBlockTermState blockTermState) {
return new BlockTermState(blockTermState.docStartFP, blockTermState.posStartFP, blockTermState.payStartFP);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@

public class Lucene {

public static final String LATEST_CODEC = "Lucene101";
public static final String LATEST_CODEC = "Lucene103";

public static final String SOFT_DELETES_FIELD = "__soft_deletes";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
import org.elasticsearch.core.Nullable;
Expand Down Expand Up @@ -46,7 +46,7 @@ public class CodecService implements CodecProvider {
public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) {
final var codecs = new HashMap<String, Codec>();

Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_SPEED, mapperService, bigArrays);
Codec legacyBestSpeedCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_SPEED, mapperService, bigArrays);
if (ZSTD_STORED_FIELDS_FEATURE_FLAG) {
codecs.put(DEFAULT_CODEC, new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_SPEED, mapperService, bigArrays));
} else {
Expand All @@ -58,7 +58,7 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
BEST_COMPRESSION_CODEC,
new PerFieldMapperCodec(Zstd814StoredFieldsFormat.Mode.BEST_COMPRESSION, mapperService, bigArrays)
);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene101Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
Codec legacyBestCompressionCodec = new LegacyPerFieldMapperCodec(Lucene103Codec.Mode.BEST_COMPRESSION, mapperService, bigArrays);
codecs.put(LEGACY_BEST_COMPRESSION_CODEC, legacyBestCompressionCodec);

codecs.put(LUCENE_DEFAULT_CODEC, Codec.getDefault());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@

package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec;
import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.elasticsearch.index.codec.perfield.XPerFieldDocValuesFormat;
import org.elasticsearch.index.codec.zstd.Zstd814StoredFieldsFormat;

/**
* Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch902Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec {

private final StoredFieldsFormat storedFieldsFormat;

private final PostingsFormat defaultPostingsFormat;
private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() {
@Override
public PostingsFormat getPostingsFormatForField(String field) {
return Elasticsearch902Lucene103Codec.this.getPostingsFormatForField(field);
}
};

private final DocValuesFormat defaultDVFormat;
private final DocValuesFormat docValuesFormat = new XPerFieldDocValuesFormat() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
return Elasticsearch902Lucene103Codec.this.getDocValuesFormatForField(field);
}
};

private final KnnVectorsFormat defaultKnnVectorsFormat;
private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return Elasticsearch902Lucene103Codec.this.getKnnVectorsFormatForField(field);
}
};

/** Public no-arg constructor, needed for SPI loading at read-time. */
public Elasticsearch902Lucene103Codec() {
this(Zstd814StoredFieldsFormat.Mode.BEST_SPEED);
}

/**
* Constructor. Takes a {@link Zstd814StoredFieldsFormat.Mode} that describes whether to optimize for retrieval speed at the expense of
* worse space-efficiency or vice-versa.
*/
public Elasticsearch902Lucene103Codec(Zstd814StoredFieldsFormat.Mode mode) {
super("Elasticsearch902Lucene103", new Lucene103Codec());
this.storedFieldsFormat = mode.getFormat();
this.defaultPostingsFormat = new Lucene103PostingsFormat();
this.defaultDVFormat = new Lucene90DocValuesFormat();
this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat();
}

@Override
public StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
public final PostingsFormat postingsFormat() {
return postingsFormat;
}

@Override
public final DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public final KnnVectorsFormat knnVectorsFormat() {
return knnVectorsFormat;
}

/**
* Returns the postings format that should be used for writing new segments of <code>field</code>.
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation,
*/
public PostingsFormat getPostingsFormatForField(String field) {
return defaultPostingsFormat;
}

/**
* Returns the docvalues format that should be used for writing new segments of <code>field</code>
* .
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public DocValuesFormat getDocValuesFormatForField(String field) {
return defaultDVFormat;
}

/**
* Returns the vectors format that should be used for writing new segments of <code>field</code>
*
* <p>The default implementation always returns "Lucene912".
*
* <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility:
* future version of Lucene are only guaranteed to be able to read the default implementation.
*/
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return defaultKnnVectorsFormat;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101Codec;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.index.mapper.MapperService;
Expand All @@ -22,11 +22,11 @@
* Legacy version of {@link PerFieldMapperCodec}. This codec is preserved to give an escape hatch in case we encounter issues with new
* changes in {@link PerFieldMapperCodec}.
*/
public final class LegacyPerFieldMapperCodec extends Lucene101Codec {
public final class LegacyPerFieldMapperCodec extends Lucene103Codec {

private final PerFieldFormatSupplier formatSupplier;

public LegacyPerFieldMapperCodec(Lucene101Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
public LegacyPerFieldMapperCodec(Lucene103Codec.Mode compressionMode, MapperService mapperService, BigArrays bigArrays) {
super(compressionMode);
this.formatSupplier = new PerFieldFormatSupplier(mapperService, bigArrays);
// If the below assertion fails, it is a sign that Lucene released a new codec. You must create a copy of the current Elasticsearch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@

package org.elasticsearch.index.codec;

import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.lucene103.Lucene103PostingsFormat;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.elasticsearch.common.util.BigArrays;
Expand All @@ -35,12 +36,12 @@
*/
public class PerFieldFormatSupplier {
public static final FeatureFlag USE_LUCENE101_POSTINGS_FORMAT = new FeatureFlag("use_lucene101_postings_format");
public static final FeatureFlag USE_LUCENE103_POSTINGS_FORMAT = new FeatureFlag("use_lucene103_postings_format");

private static final DocValuesFormat docValuesFormat = new Lucene90DocValuesFormat();
private static final KnnVectorsFormat knnVectorsFormat = new Lucene99HnswVectorsFormat();
private static final ES819TSDBDocValuesFormat tsdbDocValuesFormat = new ES819TSDBDocValuesFormat();
private static final ES812PostingsFormat es812PostingsFormat = new ES812PostingsFormat();
private static final Lucene101PostingsFormat lucene101PostingsFormat = new Lucene101PostingsFormat();
private static final PostingsFormat completionPostingsFormat = PostingsFormat.forName("Completion101");

private final ES87BloomFilterPostingsFormat bloomFilterPostingsFormat;
Expand All @@ -53,14 +54,19 @@ public PerFieldFormatSupplier(MapperService mapperService, BigArrays bigArrays)
this.bloomFilterPostingsFormat = new ES87BloomFilterPostingsFormat(bigArrays, this::internalGetPostingsFormatForField);

if (mapperService != null
&& USE_LUCENE103_POSTINGS_FORMAT.isEnabled()
&& mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.UPGRADE_TO_LUCENE_10_3_0)
&& mapperService.getIndexSettings().getMode() == IndexMode.STANDARD) {
defaultPostingsFormat = new Lucene103PostingsFormat();
} else if (mapperService != null
&& USE_LUCENE101_POSTINGS_FORMAT.isEnabled()
&& mapperService.getIndexSettings().getIndexVersionCreated().onOrAfter(IndexVersions.USE_LUCENE101_POSTINGS_FORMAT)
&& mapperService.getIndexSettings().getMode() == IndexMode.STANDARD) {
defaultPostingsFormat = lucene101PostingsFormat;
} else {
// our own posting format using PFOR
defaultPostingsFormat = es812PostingsFormat;
}
defaultPostingsFormat = new Lucene101PostingsFormat();
} else {
// our own posting format using PFOR
defaultPostingsFormat = es812PostingsFormat;
}
}

public PostingsFormat getPostingsFormatForField(String field) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* per index in real time via the mapping API. If no specific postings format or vector format is
* configured for a specific field the default postings or vector format is used.
*/
public final class PerFieldMapperCodec extends Elasticsearch900Lucene101Codec {
public final class PerFieldMapperCodec extends Elasticsearch902Lucene103Codec {

private final PerFieldFormatSupplier formatSupplier;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* @notice
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Modifications copyright (C) 2025 Elasticsearch B.V.
*/
package org.elasticsearch.index.codec.postings;

import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.compress.LowercaseAsciiCompression;

import java.io.IOException;

/** Compression algorithm used for suffixes of a block of terms. */
public enum CompressionAlgorithm {
NO_COMPRESSION(0x00) {

@Override
void read(DataInput in, byte[] out, int len) throws IOException {
in.readBytes(out, 0, len);
}
},

LOWERCASE_ASCII(0x01) {

@Override
void read(DataInput in, byte[] out, int len) throws IOException {
LowercaseAsciiCompression.decompress(in, out, len);
}
},

LZ4(0x02) {

@Override
void read(DataInput in, byte[] out, int len) throws IOException {
org.apache.lucene.util.compress.LZ4.decompress(in, len, out, 0);
}
};

private static final CompressionAlgorithm[] BY_CODE = new CompressionAlgorithm[3];

static {
for (CompressionAlgorithm alg : CompressionAlgorithm.values()) {
BY_CODE[alg.code] = alg;
}
}

/** Look up a {@link CompressionAlgorithm} by its {@link CompressionAlgorithm#code}. */
static CompressionAlgorithm byCode(int code) {
if (code < 0 || code >= BY_CODE.length) {
throw new IllegalArgumentException("Illegal code for a compression algorithm: " + code);
}
return BY_CODE[code];
}

public final int code;

CompressionAlgorithm(int code) {
this.code = code;
}

abstract void read(DataInput in, byte[] out, int len) throws IOException;
}
Loading