diff --git a/docs/changelog/125389.yaml b/docs/changelog/125389.yaml new file mode 100644 index 0000000000000..a57524ff40db5 --- /dev/null +++ b/docs/changelog/125389.yaml @@ -0,0 +1,6 @@ +pr: 125389 +summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs + as archive in current version. +area: Search +type: bug +issues: [] diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java index 25b4b685ac50f..3ed1e1877d9e9 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.lucene.bwc.codecs; -import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec; import org.apache.lucene.codecs.Codec; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldsConsumer; @@ -18,6 +17,7 @@ import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.TermVectorsFormat; +import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.Fields; @@ -27,7 +27,12 @@ import org.apache.lucene.index.Terms; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Version; import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec; import java.io.IOException; import java.util.ArrayList; @@ -39,55 +44,122 @@ */ public abstract class BWCCodec extends Codec { + private final FieldInfosFormat fieldInfosFormat; + private final SegmentInfoFormat segmentInfosFormat; + private final PostingsFormat postingsFormat; + protected BWCCodec(String name) { super(name); - } - @Override - public NormsFormat normsFormat() { - throw new UnsupportedOperationException(); - } + this.fieldInfosFormat = new FieldInfosFormat() { + final FieldInfosFormat wrappedFormat = originalFieldInfosFormat(); - @Override - public TermVectorsFormat termVectorsFormat() { - throw new UnsupportedOperationException(); - } + @Override + public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) + throws IOException { + return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext)); + } - @Override - public KnnVectorsFormat knnVectorsFormat() { - throw new UnsupportedOperationException(); - } + @Override + public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) + throws IOException { + wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context); + } + }; + + this.segmentInfosFormat = new SegmentInfoFormat() { + final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat(); - protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) { - return new SegmentInfoFormat() { @Override public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { - return wrap(wrapped.read(directory, segmentName, segmentID, context)); + return wrap(wrappedFormat.read(directory, segmentName, segmentID, context)); } @Override public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException { - wrapped.write(dir, info, ioContext); + wrappedFormat.write(dir, info, ioContext); } }; - } - protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) { - return new FieldInfosFormat() { + this.postingsFormat = new PerFieldPostingsFormat() { @Override - public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) - throws IOException { - return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext)); - } - - @Override - public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) - throws IOException { - wrapped.write(directory, segmentInfo, segmentSuffix, infos, context); + public PostingsFormat getPostingsFormatForField(String field) { + throw new UnsupportedOperationException("Old codecs can't be used for writing"); } }; } + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + @Override + public final SegmentInfoFormat segmentInfoFormat() { + return segmentInfosFormat; + } + + @Override + public PostingsFormat postingsFormat() { + return postingsFormat; + } + + /** + * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. + * This method is never called in practice, as we rewrite field infos to override the info about which features are present in + * the index. Even if norms are present, field info lies about it. + * + * @return nothing, as this method always throws an exception + * @throws UnsupportedOperationException always thrown to indicate that this method is not supported + */ + @Override + public final NormsFormat normsFormat() { + throw new UnsupportedOperationException(); + } + + /** + * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. + * This method is never called in practice, as we rewrite field infos to override the info about which features are present in + * the index. Even if term vectors are present, field info lies about it. + * + * @return nothing, as this method always throws an exception + * @throws UnsupportedOperationException always thrown to indicate that this method is not supported + */ + @Override + public final TermVectorsFormat termVectorsFormat() { + throw new UnsupportedOperationException(); + } + + /** + * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. + * The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices. + * + * @return nothing, as this method always throws an exception + * @throws UnsupportedOperationException always thrown to indicate that this method is not supported + */ + @Override + public final KnnVectorsFormat knnVectorsFormat() { + throw new UnsupportedOperationException(); + } + + /** + * Returns the original {@link SegmentInfoFormat} used by this codec. + * This method should be implemented by subclasses to provide the specific + * {@link SegmentInfoFormat} that this codec is intended to use. + * + * @return the original {@link SegmentInfoFormat} used by this codec + */ + protected abstract SegmentInfoFormat originalSegmentInfoFormat(); + + /** + * Returns the original {@link FieldInfosFormat} used by this codec. + * This method should be implemented by subclasses to provide the specific + * {@link FieldInfosFormat} that this codec is intended to use. + * + * @return the original {@link FieldInfosFormat} used by this codec + */ + protected abstract FieldInfosFormat originalFieldInfosFormat(); + // mark all fields as no term vectors, no norms, no payloads, and no vectors. private static FieldInfos filterFields(FieldInfos fieldInfos) { List fieldInfoCopy = new ArrayList<>(fieldInfos.size()); @@ -119,15 +191,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) { } public static SegmentInfo wrap(SegmentInfo segmentInfo) { - // special handling for Lucene70Codec (which is currently bundled with Lucene) - // Use BWCLucene70Codec instead as that one extends BWCCodec (similar to all other older codecs) - final Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec(); + Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec()); + final SegmentInfo segmentInfo1 = new SegmentInfo( segmentInfo.dir, // Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation) // TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed? - org.apache.lucene.util.Version.LATEST, - org.apache.lucene.util.Version.LATEST, + Version.LATEST, + Version.LATEST, segmentInfo.name, segmentInfo.maxDoc(), segmentInfo.getUseCompoundFile(), @@ -142,6 +213,28 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) { return segmentInfo1; } + /** + * Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs, + * it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec. + * Lucene 8.x codecs are still shipped with the current version of Lucene. + * Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming. + * + * This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created + * in ES 7.x can be read directly by ES if marked read-only, without going through archive indices. + */ + private static Codec getBackwardCompatibleCodec(Codec codec) { + if (codec == null) return null; + + return switch (codec.getClass().getSimpleName()) { + case "Lucene70Codec" -> new BWCLucene70Codec(); + case "Lucene80Codec" -> new BWCLucene80Codec(); + case "Lucene84Codec" -> new BWCLucene84Codec(); + case "Lucene86Codec" -> new BWCLucene86Codec(); + case "Lucene87Codec" -> new BWCLucene87Codec(); + default -> codec; + }; + } + /** * In-memory postings format that shows no postings available. */ diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java index 31973c8dd4e3e..20ef64c87cb91 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java @@ -44,8 +44,7 @@ */ @Deprecated public class Lucene60Codec extends BWCCodec { - private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); - private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene50SegmentInfoFormat()); + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); private final StoredFieldsFormat storedFieldsFormat; @@ -68,18 +67,18 @@ public Lucene60Codec() { } @Override - public final StoredFieldsFormat storedFieldsFormat() { - return storedFieldsFormat; + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); } @Override - public final FieldInfosFormat fieldInfosFormat() { - return fieldInfosFormat; + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene50SegmentInfoFormat(); } @Override - public SegmentInfoFormat segmentInfoFormat() { - return segmentInfosFormat; + public final StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; } @Override diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java index fc90a3e14b944..6499f8af72bb2 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java @@ -28,6 +28,7 @@ import java.io.IOException; /** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat} * Allows reading metadata only from Lucene 6.0 point format **/ public class Lucene60MetadataOnlyPointsFormat extends PointsFormat { diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java index 2e796a04200fe..8a5ca4acd16cb 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java @@ -34,7 +34,10 @@ import java.util.HashMap; import java.util.Map; -/** Reads the metadata of point values previously written with Lucene60PointsWriter */ +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsReader} + * Reads the metadata of point values previously written with Lucene60PointsWriter + */ public final class Lucene60MetadataOnlyPointsReader extends PointsReader { final IndexInput dataIn; final SegmentReadState readState; @@ -105,7 +108,7 @@ public Lucene60MetadataOnlyPointsReader(SegmentReadState readState) throws IOExc int fieldNumber = ent.getKey(); long fp = ent.getValue(); dataIn.seek(fp); - PointValues reader = new MetadataOnlyBKDReader(dataIn); + PointValues reader = new MetadataOnlyBKDReader(dataIn, false); readers.put(fieldNumber, reader); } diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java index f3ce3ea0755e1..2a43f070b3616 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java @@ -47,7 +47,7 @@ public class MetadataOnlyBKDReader extends PointValues { final int docCount; final int version; - public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { + public MetadataOnlyBKDReader(IndexInput metaIn, boolean isVersionPost86) throws IOException { version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT); final int numDims = metaIn.readVInt(); final int numIndexDims; @@ -85,6 +85,23 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { pointCount = metaIn.readVLong(); docCount = metaIn.readVInt(); + + // The pre-8.6 code does not read the following fields that its standard Lucene counterpart does. After experimenting with the + // code, we got to the conclusion that these are the last fields being read, which are not needed in the metadata-only reader, and + // we can safely ignore them when loading the file. Although by coincidence, nothing breaks if we read a couple of VLongs, as long + // as some bytes are available to read. + // + // The extra reads have been introduced to process IndexInput created with Lucene86Codec+, where a new BKD format has been + // introduced. We have stricter checks around the header and footer starting from the 86 formats hence we do need to + // consume all the data input there but not in previous formats. + // + // For correctness, we added version checking here. If and only if, the version is 8.6 or higher, we read the additional fields. + if (isVersionPost86) { + metaIn.readVInt(); + metaIn.readLong(); + // The following fields are not used in this class, but we need to read them to advance the pointer + metaIn.readLong(); + } } @Override diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java index 02eb75e2437c5..54d36add17159 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java @@ -44,8 +44,7 @@ */ @Deprecated public class Lucene62Codec extends BWCCodec { - private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); - private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene62SegmentInfoFormat()); + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); private final StoredFieldsFormat storedFieldsFormat; @@ -68,18 +67,18 @@ public Lucene62Codec() { } @Override - public final StoredFieldsFormat storedFieldsFormat() { - return storedFieldsFormat; + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); } @Override - public final FieldInfosFormat fieldInfosFormat() { - return fieldInfosFormat; + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene62SegmentInfoFormat(); } @Override - public SegmentInfoFormat segmentInfoFormat() { - return segmentInfosFormat; + public final StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; } @Override diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java index 0e689138acd8f..8648ac859c386 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene70/BWCLucene70Codec.java @@ -11,54 +11,58 @@ import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene70.Lucene70DocValuesFormat; import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat; import org.apache.lucene.codecs.CompoundFormat; import org.apache.lucene.codecs.DocValuesFormat; import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.LiveDocsFormat; import org.apache.lucene.codecs.PointsFormat; -import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.SegmentInfoFormat; import org.apache.lucene.codecs.StoredFieldsFormat; import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; -import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; +/** + * Implements the Lucene 7.0 index format. Loaded via SPI for indices created/written with Lucene 7.x (Elasticsearch 6.x) mounted + * as archive indices first in Elasticsearch 8.x. Lucene 9.12 retained Lucene70Codec in its classpath which required overriding the + * codec name and version in the segment infos. This codec is still needed after upgrading to Elasticsearch 9.x because its codec + * name has been written to disk. + */ public class BWCLucene70Codec extends BWCCodec { - private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); - private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene70SegmentInfoFormat()); private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); private final StoredFieldsFormat storedFieldsFormat; - private final DocValuesFormat defaultDVFormat = DocValuesFormat.forName("Lucene70"); + private final DocValuesFormat defaultDVFormat = new Lucene70DocValuesFormat(); private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { return defaultDVFormat; } }; - private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { - @Override - public PostingsFormat getPostingsFormatForField(String field) { - throw new IllegalStateException("This codec should only be used for reading, not writing"); - } - }; + private final PointsFormat pointsFormat = new Lucene60MetadataOnlyPointsFormat(); + // Needed for SPI loading + @SuppressWarnings("unused") public BWCLucene70Codec() { - super("BWCLucene70Codec"); + this("BWCLucene70Codec"); + } + + protected BWCLucene70Codec(String name) { + super(name); storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); } @Override - public FieldInfosFormat fieldInfosFormat() { - return fieldInfosFormat; + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); } @Override - public SegmentInfoFormat segmentInfoFormat() { - return segmentInfosFormat; + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene70SegmentInfoFormat(); } @Override @@ -81,13 +85,8 @@ public final DocValuesFormat docValuesFormat() { return docValuesFormat; } - @Override - public PostingsFormat postingsFormat() { - return postingsFormat; - } - @Override public PointsFormat pointsFormat() { - return new Lucene60MetadataOnlyPointsFormat(); + return pointsFormat; } } diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene80/BWCLucene80Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene80/BWCLucene80Codec.java new file mode 100644 index 0000000000000..9537b4e6f7fa0 --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene80/BWCLucene80Codec.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene80; + +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat; +import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene80.Lucene80Codec} + * Implements the Lucene 8.0 index format. Loaded via SPI for indices created/written with Lucene 8.0.0-8.3.0 + * (Elasticsearch [7.0.0-7.5.2]), mounted as archive indices in Elasticsearch 8.x / 9.x. + */ +public class BWCLucene80Codec extends BWCCodec { + + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + }; + private final DocValuesFormat defaultDVFormat = new Lucene80DocValuesFormat(); + + private final StoredFieldsFormat storedFieldsFormat; + private final PointsFormat pointsFormat = new Lucene60MetadataOnlyPointsFormat(); + + // Needed for SPI loading + @SuppressWarnings("unused") + public BWCLucene80Codec() { + super("BWCLucene80Codec"); + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); + } + + @Override + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); + } + + @Override + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene70SegmentInfoFormat(); + } + + @Override + public final StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public final CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public final PointsFormat pointsFormat() { + return pointsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene84/BWCLucene84Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene84/BWCLucene84Codec.java new file mode 100644 index 0000000000000..6771f4b3130c1 --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene84/BWCLucene84Codec.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene84; + +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat; +import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene84.Lucene84Codec} + * Implements the Lucene 8.4 index format. Loaded via SPI for indices created/written with Lucene 8.4.0-8.5.1 + * (Elasticsearch [7.6.0-7.8.1]), mounted as archive indices in Elasticsearch 8.x / 9.x. + */ +public class BWCLucene84Codec extends BWCCodec { + + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + private final DocValuesFormat defaultDVFormat; + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + }; + + private final StoredFieldsFormat storedFieldsFormat; + private final PointsFormat pointsFormat = new Lucene60MetadataOnlyPointsFormat(); + + // Needed for SPI loading + @SuppressWarnings("unused") + public BWCLucene84Codec() { + super("BWCLucene84Codec"); + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); + this.defaultDVFormat = new Lucene80DocValuesFormat(); + } + + @Override + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); + } + + @Override + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene70SegmentInfoFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return pointsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/BWCLucene86Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/BWCLucene86Codec.java new file mode 100644 index 0000000000000..1949285118aed --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/BWCLucene86Codec.java @@ -0,0 +1,89 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene86; + +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; +import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; +import org.apache.lucene.backward_codecs.lucene86.Lucene86SegmentInfoFormat; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene86.Lucene86Codec} + * Implements the Lucene 8.6 index format. Loaded via SPI for indices created/written with Lucene 8.6.0-8.6.2 + * (Elasticsearch [7.9.0-7.9.3]), mounted as archive indices in Elasticsearch 8.x / 9.x. + */ +public class BWCLucene86Codec extends BWCCodec { + + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + private final PointsFormat pointsFormat = new Lucene86MetadataOnlyPointsFormat(); + private final DocValuesFormat defaultDVFormat; + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + }; + + private final StoredFieldsFormat storedFieldsFormat; + + // Needed for SPI loading + @SuppressWarnings("unused") + public BWCLucene86Codec() { + super("BWCLucene86Codec"); + this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); + this.defaultDVFormat = new Lucene80DocValuesFormat(); + } + + @Override + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); + } + + @Override + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene86SegmentInfoFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return pointsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsFormat.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsFormat.java new file mode 100644 index 0000000000000..f7902c5c9e2a0 --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsFormat.java @@ -0,0 +1,56 @@ +/* + * @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modifications copyright (C) 2021 Elasticsearch B.V. + */ +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene86; + +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PointsReader; +import org.apache.lucene.codecs.PointsWriter; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; + +import java.io.IOException; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene86.Lucene86PointsFormat} + * Allows reading metadata only from Lucene 8.6 point format + **/ +public class Lucene86MetadataOnlyPointsFormat extends PointsFormat { + + static final String META_CODEC_NAME = "Lucene86PointsFormatMeta"; + + /** Filename extension for the meta per field */ + public static final String META_EXTENSION = "kdm"; + + static final int VERSION_START = 0; + static final int VERSION_CURRENT = VERSION_START; + + /** Sole constructor */ + public Lucene86MetadataOnlyPointsFormat() {} + + @Override + public PointsWriter fieldsWriter(SegmentWriteState state) { + throw new UnsupportedOperationException("Old codecs may only be used for reading"); + } + + @Override + public PointsReader fieldsReader(SegmentReadState state) throws IOException { + return new Lucene86MetadataOnlyPointsReader(state); + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsReader.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsReader.java new file mode 100644 index 0000000000000..55671828b4dcd --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene86/Lucene86MetadataOnlyPointsReader.java @@ -0,0 +1,121 @@ +/* + * @notice + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Modifications copyright (C) 2021 Elasticsearch B.V. + */ +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene86; + +import org.apache.lucene.backward_codecs.store.EndiannessReverserUtil; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.PointsReader; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.store.ChecksumIndexInput; +import org.elasticsearch.core.IOUtils; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.MetadataOnlyBKDReader; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene86.Lucene86PointsReader} + * Reads the metadata of point values previously written with Lucene86PointsWriter + */ +public final class Lucene86MetadataOnlyPointsReader extends PointsReader { + final SegmentReadState readState; + final Map readers = new HashMap<>(); + + public Lucene86MetadataOnlyPointsReader(SegmentReadState readState) throws IOException { + this.readState = readState; + + String metaFileName = IndexFileNames.segmentFileName( + readState.segmentInfo.name, + readState.segmentSuffix, + Lucene86MetadataOnlyPointsFormat.META_EXTENSION + ); + + boolean success = false; + try { + try ( + ChecksumIndexInput metaIn = EndiannessReverserUtil.openChecksumInput(readState.directory, metaFileName, readState.context) + ) { + Throwable priorE = null; + try { + CodecUtil.checkIndexHeader( + metaIn, + Lucene86MetadataOnlyPointsFormat.META_CODEC_NAME, + Lucene86MetadataOnlyPointsFormat.VERSION_START, + Lucene86MetadataOnlyPointsFormat.VERSION_CURRENT, + readState.segmentInfo.getId(), + readState.segmentSuffix + ); + + while (true) { + int fieldNumber = metaIn.readInt(); + if (fieldNumber == -1) { + break; + } else if (fieldNumber < 0) { + throw new CorruptIndexException("Illegal field number: " + fieldNumber, metaIn); + } + PointValues reader = new MetadataOnlyBKDReader(metaIn, true); + readers.put(fieldNumber, reader); + } + metaIn.readLong(); + metaIn.readLong(); + } catch (Throwable t) { + priorE = t; + } finally { + CodecUtil.checkFooter(metaIn, priorE); + } + } + + success = true; + } finally { + if (success == false) { + IOUtils.closeWhileHandlingException(this); + } + } + } + + @Override + public PointValues getValues(String fieldName) { + FieldInfo fieldInfo = readState.fieldInfos.fieldInfo(fieldName); + if (fieldInfo == null) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" is unrecognized"); + } + if (fieldInfo.getPointDimensionCount() == 0) { + throw new IllegalArgumentException("field=\"" + fieldName + "\" did not index point values"); + } + + return readers.get(fieldInfo.number); + } + + // We only open the metadata field, and do nothing with the other two files (index/data), + // for which Lucene checks integrity but we don't need to. + @Override + public void checkIntegrity() {} + + @Override + public void close() throws IOException { + // Free up heap: + readers.clear(); + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene87/BWCLucene87Codec.java b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene87/BWCLucene87Codec.java new file mode 100644 index 0000000000000..f461bdee8864d --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene87/BWCLucene87Codec.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.lucene.bwc.codecs.lucene87; + +import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; +import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; +import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; +import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; +import org.apache.lucene.backward_codecs.lucene86.Lucene86SegmentInfoFormat; +import org.apache.lucene.backward_codecs.lucene87.Lucene87StoredFieldsFormat; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; +import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; +import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.Lucene86MetadataOnlyPointsFormat; + +/** + * This is a fork of {@link org.apache.lucene.backward_codecs.lucene87.Lucene87Codec} + * Implements the Lucene 8.7 index format. Loaded via SPI for indices created/written with Lucene 8.7.0-8.11.3 + * (Elasticsearch [7.10.0-7-17.26]), mounted as archive indices in Elasticsearch 8.x / 9.x. + */ +public class BWCLucene87Codec extends BWCCodec { + + private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); + private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); + private final PointsFormat pointsFormat = new Lucene86MetadataOnlyPointsFormat(); + private final DocValuesFormat defaultDVFormat; + + private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { + @Override + public DocValuesFormat getDocValuesFormatForField(String field) { + return defaultDVFormat; + } + }; + + private final StoredFieldsFormat storedFieldsFormat; + + // Needed for SPI loading + @SuppressWarnings("unused") + public BWCLucene87Codec() { + super("BWCLucene87Codec"); + this.storedFieldsFormat = new Lucene87StoredFieldsFormat(Lucene87StoredFieldsFormat.Mode.BEST_COMPRESSION); + this.defaultDVFormat = new Lucene80DocValuesFormat(Lucene80DocValuesFormat.Mode.BEST_COMPRESSION); + } + + @Override + protected FieldInfosFormat originalFieldInfosFormat() { + return new Lucene60FieldInfosFormat(); + } + + @Override + protected SegmentInfoFormat originalSegmentInfoFormat() { + return new Lucene86SegmentInfoFormat(); + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return storedFieldsFormat; + } + + @Override + public final LiveDocsFormat liveDocsFormat() { + return liveDocsFormat; + } + + @Override + public CompoundFormat compoundFormat() { + return compoundFormat; + } + + @Override + public PointsFormat pointsFormat() { + return pointsFormat; + } + + @Override + public final DocValuesFormat docValuesFormat() { + return docValuesFormat; + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec b/x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec index 6e5205d664f2d..abc3ed8dd5323 100644 --- a/x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec +++ b/x-pack/plugin/old-lucene-versions/src/main/resources/META-INF/services/org.apache.lucene.codecs.Codec @@ -5,6 +5,10 @@ # 2.0. # +org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec +org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec +org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec +org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec org.elasticsearch.xpack.lucene.bwc.codecs.lucene62.Lucene62Codec org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60Codec diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodecTests.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodecTests.java new file mode 100644 index 0000000000000..219cfa29f13ce --- /dev/null +++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodecTests.java @@ -0,0 +1,92 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.lucene.bwc.codecs; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CompoundFormat; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.PointsFormat; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.SegmentInfoFormat; +import org.apache.lucene.codecs.StoredFieldsFormat; +import org.elasticsearch.test.ESTestCase; + +/** + * Unit tests for the {@link BWCCodec} class. + */ +public class BWCCodecTests extends ESTestCase { + + private final Codec codec; + + public BWCCodecTests() { + this.codec = new BWCCodec("WrapperCodec") { + @Override + protected SegmentInfoFormat originalSegmentInfoFormat() { + return null; + } + + @Override + protected FieldInfosFormat originalFieldInfosFormat() { + return null; + } + + @Override + public PostingsFormat postingsFormat() { + return null; + } + + @Override + public DocValuesFormat docValuesFormat() { + return null; + } + + @Override + public StoredFieldsFormat storedFieldsFormat() { + return null; + } + + @Override + public LiveDocsFormat liveDocsFormat() { + return null; + } + + @Override + public CompoundFormat compoundFormat() { + return null; + } + + @Override + public PointsFormat pointsFormat() { + return null; + } + }; + } + + /** + * Tests that the {@link Codec#normsFormat()} method throws an {@link UnsupportedOperationException}. + */ + public void testNormsFormatUnsupportedOperation() { + assertThrows(UnsupportedOperationException.class, codec::normsFormat); + } + + /** + * Tests that the {@link Codec#termVectorsFormat()} method throws an {@link UnsupportedOperationException}. + */ + public void testTermVectorsFormatUnsupportedOperation() { + assertThrows(UnsupportedOperationException.class, codec::termVectorsFormat); + } + + /** + * Tests that the {@link Codec#knnVectorsFormat()} method throws an {@link UnsupportedOperationException}. + */ + public void testKnnVectorsFormatUnsupportedOperation() { + assertThrows(UnsupportedOperationException.class, codec::knnVectorsFormat); + } +} diff --git a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/OldCodecsAvailableTests.java b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/OldCodecsAvailableTests.java index bf1538b4e5dd8..5674504b4eddc 100644 --- a/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/OldCodecsAvailableTests.java +++ b/x-pack/plugin/old-lucene-versions/src/test/java/org/elasticsearch/xpack/lucene/bwc/codecs/OldCodecsAvailableTests.java @@ -7,17 +7,61 @@ package org.elasticsearch.xpack.lucene.bwc.codecs; +import org.apache.lucene.codecs.Codec; import org.elasticsearch.Version; import org.elasticsearch.test.ESTestCase; +import java.util.ServiceLoader; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + public class OldCodecsAvailableTests extends ESTestCase { /** + * This test verifies for each Lucene codec available via SPI; we also provide a corresponding BWC codec counterpart. + * Using a ServiceLoader, we fetch all classes matching the codecPathRegex (this is applied for Lucne8xCodec at the moment). + * For each entry of the returned list, we intend to load the BWC counterpart reflectively. + * * Reminder to add Lucene BWC codecs under {@link org.elasticsearch.xpack.lucene.bwc.codecs} whenever Elasticsearch is upgraded * to the next major Lucene version. */ public void testLuceneBWCCodecsAvailable() { assertEquals("Add Lucene BWC codecs for Elasticsearch version 7", 8, Version.CURRENT.major); + + String codecPathRegex = ".*[\\\\.](Lucene(8[0-9])Codec)"; + Pattern codecPathPattern = Pattern.compile(codecPathRegex); + + String codecClassNameRegex = "Lucene(\\d+)Codec"; + Pattern classNamePattern = Pattern.compile(codecClassNameRegex); + + for (Codec codec : ServiceLoader.load(Codec.class)) { + Matcher codecPathMatcher = codecPathPattern.matcher(codec.getClass().getName()); + if (codecPathMatcher.matches()) { + String pathName = codec.getClass().getName(); + int lastDotIndex = pathName.lastIndexOf('.'); + String className = pathName.substring(lastDotIndex + 1); + + Matcher classNameMatcher = classNamePattern.matcher(className); + if (classNameMatcher.matches()) { + String codecVersion = classNameMatcher.group(1); + String wrappedCodecClassPath = "org.elasticsearch.xpack.lucene.bwc.codecs.lucene" + + codecVersion + + ".BWCLucene" + + codecVersion + + "Codec"; + assertTrue(isClassPresent(wrappedCodecClassPath)); + } + } + } + } + + private static boolean isClassPresent(String className) { + try { + Class.forName(className); + return true; + } catch (ClassNotFoundException e) { + return false; + } } }