- 
                Notifications
    You must be signed in to change notification settings 
- Fork 25.6k
Support 7x segments as archive in 8x / 9x #119503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 134 commits
34f2f34
              a867385
              69b6b6e
              81c9fce
              30e2cb4
              000a7aa
              2c53ff6
              d3f18a5
              b3afec7
              5fef3eb
              1596495
              1ec0501
              394dae3
              a279780
              ecb162d
              2448af7
              9d9b6af
              02ddf82
              bf79ef3
              590091c
              528f220
              f1c91bd
              ba67bff
              6d1c65a
              2c3654a
              58d4762
              bc38090
              87f790f
              4334bc4
              ef0447b
              a7a8a84
              d91844d
              fe009d7
              a747a40
              f3e47ae
              7fdc027
              5b831ec
              338dbb1
              25b216e
              50530a2
              7033ed0
              9be8cea
              420e646
              48ab5b4
              ad70e72
              6b479e2
              2bc0107
              99e816f
              3196523
              f3b3d00
              f52789e
              ec243b7
              900fcb8
              46d408f
              2abea64
              f93eb9b
              f7e0012
              2777916
              7d71f93
              623bd7b
              13806d1
              3fee6af
              008d767
              af3fff9
              a41bbad
              6010cc9
              023e8ea
              8115a61
              0609411
              11ab217
              d9dedc7
              4f85349
              6b2361e
              41dcc1c
              2343576
              2df7f62
              de31af3
              4c3f29a
              6b95b0c
              74cb102
              6f8ff50
              3ebc9f4
              eadf8cf
              41dc557
              2ca8a9f
              e0e1740
              1ba2eaa
              6820c35
              8280559
              806d442
              3e0de6d
              a57c0d1
              bb561e2
              03bbc25
              e82375e
              e98aac7
              b39ef53
              ce4c1c9
              69fc6f6
              8a66843
              ff92f92
              c0f4d18
              02ac377
              7941cbc
              bb93eac
              42aa647
              9c9dc66
              b93bf88
              1cf1afc
              6e31623
              f155836
              c668a3a
              bd258ab
              c96cd31
              6b26b66
              b20d791
              58be89d
              1539844
              80919b6
              5a4ef59
              d42aa51
              668ccb3
              952ec01
              b636061
              ca586f9
              6783285
              e911ed2
              7770291
              44cd893
              b9eb516
              bb4458d
              6fec899
              ca0f256
              b3bbc6a
              36f62ca
              36edf83
              2952e62
              43465fc
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 119503 | ||
| summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs as archive in current version. | ||
| area: Search | ||
| type: bug | ||
| issues: | ||
| - 117042 | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -17,6 +17,7 @@ | |
| import org.apache.lucene.codecs.PostingsFormat; | ||
| import org.apache.lucene.codecs.SegmentInfoFormat; | ||
| import org.apache.lucene.codecs.TermVectorsFormat; | ||
| import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; | ||
| import org.apache.lucene.index.FieldInfo; | ||
| import org.apache.lucene.index.FieldInfos; | ||
| import org.apache.lucene.index.Fields; | ||
|  | @@ -26,6 +27,13 @@ | |
| import org.apache.lucene.index.Terms; | ||
| import org.apache.lucene.store.Directory; | ||
| import org.apache.lucene.store.IOContext; | ||
| import org.apache.lucene.util.Version; | ||
| import org.elasticsearch.core.UpdateForV10; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec; | ||
|  | ||
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
|  | @@ -37,55 +45,122 @@ | |
| */ | ||
| public abstract class BWCCodec extends Codec { | ||
|  | ||
| private final FieldInfosFormat fieldInfosFormat; | ||
| private final SegmentInfoFormat segmentInfosFormat; | ||
| private final PostingsFormat postingsFormat; | ||
|  | ||
| protected BWCCodec(String name) { | ||
| super(name); | ||
| } | ||
|  | ||
| @Override | ||
| public NormsFormat normsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
| this.fieldInfosFormat = new FieldInfosFormat() { | ||
| final FieldInfosFormat wrappedFormat = originalFieldInfosFormat(); | ||
|  | ||
| @Override | ||
| public TermVectorsFormat termVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
| @Override | ||
| public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) | ||
| throws IOException { | ||
| return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext)); | ||
| } | ||
|  | ||
| @Override | ||
| public KnnVectorsFormat knnVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
| @Override | ||
| public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) | ||
| throws IOException { | ||
| wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context); | ||
| } | ||
| }; | ||
|  | ||
| this.segmentInfosFormat = new SegmentInfoFormat() { | ||
| final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat(); | ||
|  | ||
| protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) { | ||
| return new SegmentInfoFormat() { | ||
| @Override | ||
| public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { | ||
| return wrap(wrapped.read(directory, segmentName, segmentID, context)); | ||
| return wrap(wrappedFormat.read(directory, segmentName, segmentID, context)); | ||
| } | ||
|  | ||
| @Override | ||
| public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException { | ||
| wrapped.write(dir, info, ioContext); | ||
| wrappedFormat.write(dir, info, ioContext); | ||
| } | ||
| }; | ||
| } | ||
|  | ||
| protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) { | ||
| return new FieldInfosFormat() { | ||
| this.postingsFormat = new PerFieldPostingsFormat() { | ||
| @Override | ||
| public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) | ||
| throws IOException { | ||
| return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext)); | ||
| } | ||
|  | ||
| @Override | ||
| public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context) | ||
| throws IOException { | ||
| wrapped.write(directory, segmentInfo, segmentSuffix, infos, context); | ||
| public PostingsFormat getPostingsFormatForField(String field) { | ||
| throw new UnsupportedOperationException("Old codecs can't be used for writing"); | ||
| } | ||
| }; | ||
| } | ||
|  | ||
| @Override | ||
| public final FieldInfosFormat fieldInfosFormat() { | ||
| return fieldInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final SegmentInfoFormat segmentInfoFormat() { | ||
| return segmentInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public PostingsFormat postingsFormat() { | ||
| return postingsFormat; | ||
| } | ||
|  | ||
| /** | ||
| * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. | ||
| * This method is never called in practice, as we rewrite field infos to override the info about which features are present in | ||
| * the index. Even if norms are present, field info lies about it. | ||
| * | ||
| * @return nothing, as this method always throws an exception | ||
| * @throws UnsupportedOperationException always thrown to indicate that this method is not supported | ||
| */ | ||
| @Override | ||
| public final NormsFormat normsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|  | ||
| /** | ||
| * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. | ||
| * This method is never called in practice, as we rewrite field infos to override the info about which features are present in | ||
| * the index. Even if term vectors are present, field info lies about it. | ||
| * | ||
| * @return nothing, as this method always throws an exception | ||
| * @throws UnsupportedOperationException always thrown to indicate that this method is not supported | ||
| */ | ||
| @Override | ||
| public final TermVectorsFormat termVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|  | ||
| /** | ||
| * This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}. | ||
| * The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices. | ||
| * | ||
| * @return nothing, as this method always throws an exception | ||
| * @throws UnsupportedOperationException always thrown to indicate that this method is not supported | ||
| */ | ||
| @Override | ||
| public final KnnVectorsFormat knnVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|  | ||
| /** | ||
| * Returns the original {@link SegmentInfoFormat} used by this codec. | ||
| * This method should be implemented by subclasses to provide the specific | ||
| * {@link SegmentInfoFormat} that this codec is intended to use. | ||
| * | ||
| * @return the original {@link SegmentInfoFormat} used by this codec | ||
| */ | ||
| protected abstract SegmentInfoFormat originalSegmentInfoFormat(); | ||
|  | ||
| /** | ||
| * Returns the original {@link FieldInfosFormat} used by this codec. | ||
| * This method should be implemented by subclasses to provide the specific | ||
| * {@link FieldInfosFormat} that this codec is intended to use. | ||
| * | ||
| * @return the original {@link FieldInfosFormat} used by this codec | ||
| */ | ||
| protected abstract FieldInfosFormat originalFieldInfosFormat(); | ||
|         
                  javanna marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
|  | ||
| // mark all fields as no term vectors, no norms, no payloads, and no vectors. | ||
| private static FieldInfos filterFields(FieldInfos fieldInfos) { | ||
| List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size()); | ||
|  | @@ -118,13 +193,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) { | |
| } | ||
|  | ||
| public static SegmentInfo wrap(SegmentInfo segmentInfo) { | ||
| final Codec codec = segmentInfo.getCodec(); | ||
| Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec()); | ||
|  | ||
| final SegmentInfo segmentInfo1 = new SegmentInfo( | ||
| segmentInfo.dir, | ||
| // Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation) | ||
| // TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed? | ||
| org.apache.lucene.util.Version.LATEST, | ||
| org.apache.lucene.util.Version.LATEST, | ||
| Version.LATEST, | ||
| Version.LATEST, | ||
| segmentInfo.name, | ||
| segmentInfo.maxDoc(), | ||
| segmentInfo.getUseCompoundFile(), | ||
|  | @@ -139,6 +215,29 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) { | |
| return segmentInfo1; | ||
| } | ||
|  | ||
| /** | ||
| * Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs, | ||
| * it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec. | ||
|         
                  javanna marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| * Lucene 8.x codecs are still shipped with the current version of Lucene. | ||
| * Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming. | ||
| * | ||
| * This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created | ||
| * in ES 7.x can be read directly by ES if marked read-only, without going through archive indices. | ||
| */ | ||
| @UpdateForV10(owner = UpdateForV10.Owner.SEARCH_FOUNDATIONS) | ||
| private static Codec getBackwardCompatibleCodec(Codec codec) { | ||
| if (codec == null) return null; | ||
|  | ||
| return switch (codec.getClass().getSimpleName()) { | ||
| case "Lucene70Codec" -> new BWCLucene70Codec(); | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hey @drempapis I missed this during review, not a huge deal, but wrapping Lucene70Codec is not required here. Lucene70Codec is no longer shipped with Lucene 10.0, we rather ship it, and it's already extending BWCCodec. I would instead check that the codec we get extends from BWCCodec. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's true; I passed it from the debugger to verify it. I'll create a pr to adjust this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ thanks, ping me for review, it'll be quicker this time :) | ||
| case "Lucene80Codec" -> new BWCLucene80Codec(); | ||
| case "Lucene84Codec" -> new BWCLucene84Codec(); | ||
| case "Lucene86Codec" -> new BWCLucene86Codec(); | ||
| case "Lucene87Codec" -> new BWCLucene87Codec(); | ||
| default -> codec; | ||
| }; | ||
| } | ||
|  | ||
| /** | ||
| * In-memory postings format that shows no postings available. | ||
| */ | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -47,7 +47,7 @@ public class MetadataOnlyBKDReader extends PointValues { | |
| final int docCount; | ||
| final int version; | ||
|  | ||
| public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { | ||
| public MetadataOnlyBKDReader(IndexInput metaIn, boolean isVersionPost86) throws IOException { | ||
| version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT); | ||
| final int numDims = metaIn.readVInt(); | ||
| final int numIndexDims; | ||
|  | @@ -85,6 +85,14 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { | |
|  | ||
| pointCount = metaIn.readVLong(); | ||
| docCount = metaIn.readVInt(); | ||
|  | ||
| // This code has been introduced to process IndexInput created with Lucene86Codec+ | ||
|          | ||
| if (isVersionPost86) { | ||
| metaIn.readVInt(); | ||
| metaIn.readLong(); | ||
| // The following fields are not used in this class, but we need to read them to advance the pointer | ||
| metaIn.readLong(); | ||
| } | ||
|         
                  drempapis marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| } | ||
|  | ||
| @Override | ||
|  | ||
Uh oh!
There was an error while loading. Please reload this page.