- 
                Notifications
    You must be signed in to change notification settings 
- Fork 25.6k
Support 7x segments as archive in 8x / 9x #119503
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 62 commits
34f2f34
              a867385
              69b6b6e
              81c9fce
              30e2cb4
              000a7aa
              2c53ff6
              d3f18a5
              b3afec7
              5fef3eb
              1596495
              1ec0501
              394dae3
              a279780
              ecb162d
              2448af7
              9d9b6af
              02ddf82
              bf79ef3
              590091c
              528f220
              f1c91bd
              ba67bff
              6d1c65a
              2c3654a
              58d4762
              bc38090
              87f790f
              4334bc4
              ef0447b
              a7a8a84
              d91844d
              fe009d7
              a747a40
              f3e47ae
              7fdc027
              5b831ec
              338dbb1
              25b216e
              50530a2
              7033ed0
              9be8cea
              420e646
              48ab5b4
              ad70e72
              6b479e2
              2bc0107
              99e816f
              3196523
              f3b3d00
              f52789e
              ec243b7
              900fcb8
              46d408f
              2abea64
              f93eb9b
              f7e0012
              2777916
              7d71f93
              623bd7b
              13806d1
              3fee6af
              008d767
              af3fff9
              a41bbad
              6010cc9
              023e8ea
              8115a61
              0609411
              11ab217
              d9dedc7
              4f85349
              6b2361e
              41dcc1c
              2343576
              2df7f62
              de31af3
              4c3f29a
              6b95b0c
              74cb102
              6f8ff50
              3ebc9f4
              eadf8cf
              41dc557
              2ca8a9f
              e0e1740
              1ba2eaa
              6820c35
              8280559
              806d442
              3e0de6d
              a57c0d1
              bb561e2
              03bbc25
              e82375e
              e98aac7
              b39ef53
              ce4c1c9
              69fc6f6
              8a66843
              ff92f92
              c0f4d18
              02ac377
              7941cbc
              bb93eac
              42aa647
              9c9dc66
              b93bf88
              1cf1afc
              6e31623
              f155836
              c668a3a
              bd258ab
              c96cd31
              6b26b66
              b20d791
              58be89d
              1539844
              80919b6
              5a4ef59
              d42aa51
              668ccb3
              952ec01
              b636061
              ca586f9
              6783285
              e911ed2
              7770291
              44cd893
              b9eb516
              bb4458d
              6fec899
              ca0f256
              b3bbc6a
              36f62ca
              36edf83
              2952e62
              43465fc
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| pr: 119503 | ||
| summary: Support 7x segments as archive in 8x / 9x | ||
| area: Search | ||
| type: bug | ||
| issues: | ||
| - 117042 | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -17,6 +17,7 @@ | |
| import org.apache.lucene.codecs.PostingsFormat; | ||
| import org.apache.lucene.codecs.SegmentInfoFormat; | ||
| import org.apache.lucene.codecs.TermVectorsFormat; | ||
| import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; | ||
| import org.apache.lucene.index.FieldInfo; | ||
| import org.apache.lucene.index.FieldInfos; | ||
| import org.apache.lucene.index.Fields; | ||
|  | @@ -26,6 +27,11 @@ | |
| import org.apache.lucene.index.Terms; | ||
| import org.apache.lucene.store.Directory; | ||
| import org.apache.lucene.store.IOContext; | ||
| import org.apache.lucene.util.Version; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec; | ||
|  | ||
| import java.io.IOException; | ||
| import java.util.ArrayList; | ||
|  | @@ -41,18 +47,25 @@ protected BWCCodec(String name) { | |
| super(name); | ||
| } | ||
|  | ||
| protected final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { | ||
|         
                  javanna marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| @Override | ||
| public PostingsFormat getPostingsFormatForField(String field) { | ||
| throw new UnsupportedOperationException("Old codecs can't be used for writing"); | ||
|          | ||
| } | ||
| }; | ||
|  | ||
| @Override | ||
| public NormsFormat normsFormat() { | ||
| public final NormsFormat normsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|  | ||
| @Override | ||
| public TermVectorsFormat termVectorsFormat() { | ||
| public final TermVectorsFormat termVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|  | ||
| @Override | ||
| public KnnVectorsFormat knnVectorsFormat() { | ||
| public final KnnVectorsFormat knnVectorsFormat() { | ||
| throw new UnsupportedOperationException(); | ||
| } | ||
|         
                  javanna marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
|  | ||
|  | @@ -118,13 +131,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) { | |
| } | ||
|  | ||
| public static SegmentInfo wrap(SegmentInfo segmentInfo) { | ||
| final Codec codec = segmentInfo.getCodec(); | ||
| Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec()); | ||
|  | ||
| final SegmentInfo segmentInfo1 = new SegmentInfo( | ||
| segmentInfo.dir, | ||
| // Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation) | ||
| // TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed? | ||
| org.apache.lucene.util.Version.LATEST, | ||
| org.apache.lucene.util.Version.LATEST, | ||
| Version.LATEST, | ||
| Version.LATEST, | ||
| segmentInfo.name, | ||
| segmentInfo.maxDoc(), | ||
| segmentInfo.getUseCompoundFile(), | ||
|  | @@ -139,6 +153,20 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) { | |
| return segmentInfo1; | ||
| } | ||
|  | ||
| // Special handling for Lucene8xCodecs (which are currently bundled with Lucene) | ||
| // Use BWCLucene8xCodec instead as that one extends BWCCodec (similar to all other older codecs) | ||
|         
                  javanna marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
| private static Codec getBackwardCompatibleCodec(Codec codec) { | ||
| if (codec == null) return null; | ||
|  | ||
| return switch (codec.getClass().getSimpleName()) { | ||
| case "Lucene80Codec" -> new BWCLucene80Codec(); | ||
| case "Lucene84Codec" -> new BWCLucene84Codec(); | ||
| case "Lucene86Codec" -> new BWCLucene86Codec(); | ||
| case "Lucene87Codec" -> new BWCLucene87Codec(); | ||
| default -> codec; | ||
| }; | ||
| } | ||
|  | ||
| /** | ||
| * In-memory postings format that shows no postings available. | ||
| */ | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
|  | @@ -47,6 +47,10 @@ public class MetadataOnlyBKDReader extends PointValues { | |
| final int docCount; | ||
| final int version; | ||
|  | ||
| final int numIndexBytes; | ||
| final long minLeafBlockFP; | ||
| private final long indexStartPointer; | ||
|         
                  javanna marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
|  | ||
| public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { | ||
| version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT); | ||
| final int numDims = metaIn.readVInt(); | ||
|  | @@ -85,6 +89,16 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException { | |
|  | ||
| pointCount = metaIn.readVLong(); | ||
| docCount = metaIn.readVInt(); | ||
|  | ||
| numIndexBytes = metaIn.readVInt(); | ||
| if (version >= VERSION_META_FILE) { | ||
|          | ||
| minLeafBlockFP = metaIn.readLong(); | ||
| indexStartPointer = metaIn.readLong(); | ||
| } else { | ||
| indexStartPointer = metaIn.getFilePointer(); | ||
| minLeafBlockFP = metaIn.readVLong(); | ||
| metaIn.seek(indexStartPointer); | ||
| } | ||
|         
                  drempapis marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| } | ||
|  | ||
| @Override | ||
|  | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|  | ||
| package org.elasticsearch.xpack.lucene.bwc.codecs.lucene80; | ||
|  | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; | ||
| import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; | ||
| import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat; | ||
| import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; | ||
| import org.apache.lucene.codecs.CompoundFormat; | ||
| import org.apache.lucene.codecs.DocValuesFormat; | ||
| import org.apache.lucene.codecs.FieldInfosFormat; | ||
| import org.apache.lucene.codecs.LiveDocsFormat; | ||
| import org.apache.lucene.codecs.PointsFormat; | ||
| import org.apache.lucene.codecs.PostingsFormat; | ||
| import org.apache.lucene.codecs.SegmentInfoFormat; | ||
| import org.apache.lucene.codecs.StoredFieldsFormat; | ||
| import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; | ||
|  | ||
| /** | ||
| * Implements the Lucene 8.0 index format. Loaded via SPI for indices created/written with Lucene 8.0.0-8.3.0 | ||
| * (Elasticsearch [7.0.0-7.5.2]), mounted as archive indices in Elasticsearch 8.x / 9.x. | ||
| */ | ||
| public class BWCLucene80Codec extends BWCCodec { | ||
|  | ||
| private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); | ||
| private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene70SegmentInfoFormat()); | ||
| private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); | ||
| private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); | ||
|  | ||
| private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { | ||
| @Override | ||
| public DocValuesFormat getDocValuesFormatForField(String field) { | ||
| return defaultDVFormat; | ||
| } | ||
| }; | ||
| private final DocValuesFormat defaultDVFormat = new Lucene80DocValuesFormat(); | ||
|  | ||
| private final StoredFieldsFormat storedFieldsFormat; | ||
| private final PointsFormat pointsFormat = new Lucene60MetadataOnlyPointsFormat(); | ||
|  | ||
| // Needed for SPI loading | ||
| @SuppressWarnings("unused") | ||
| public BWCLucene80Codec() { | ||
| this("BWCLucene80Codec"); | ||
| } | ||
|  | ||
| public BWCLucene80Codec(String name) { | ||
| super(name); | ||
| this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); | ||
| } | ||
|  | ||
| @Override | ||
| public final StoredFieldsFormat storedFieldsFormat() { | ||
| return storedFieldsFormat; | ||
| } | ||
|         
                  drempapis marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
|  | ||
| @Override | ||
| public final PostingsFormat postingsFormat() { | ||
| return postingsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final FieldInfosFormat fieldInfosFormat() { | ||
| return fieldInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final SegmentInfoFormat segmentInfoFormat() { | ||
| return segmentInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final LiveDocsFormat liveDocsFormat() { | ||
| return liveDocsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final CompoundFormat compoundFormat() { | ||
| return compoundFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final PointsFormat pointsFormat() { | ||
| return pointsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final DocValuesFormat docValuesFormat() { | ||
| return docValuesFormat; | ||
| } | ||
|         
                  javanna marked this conversation as resolved.
              Show resolved
            Hide resolved | ||
| } | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,137 @@ | ||
| /* | ||
| * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
| * or more contributor license agreements. Licensed under the Elastic License | ||
| * 2.0; you may not use this file except in compliance with the Elastic License | ||
| * 2.0. | ||
| */ | ||
|  | ||
| package org.elasticsearch.xpack.lucene.bwc.codecs.lucene84; | ||
|  | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50CompoundFormat; | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50LiveDocsFormat; | ||
| import org.apache.lucene.backward_codecs.lucene50.Lucene50StoredFieldsFormat; | ||
| import org.apache.lucene.backward_codecs.lucene60.Lucene60FieldInfosFormat; | ||
| import org.apache.lucene.backward_codecs.lucene70.Lucene70SegmentInfoFormat; | ||
| import org.apache.lucene.backward_codecs.lucene80.Lucene80DocValuesFormat; | ||
| import org.apache.lucene.backward_codecs.lucene84.Lucene84PostingsFormat; | ||
| import org.apache.lucene.codecs.CompoundFormat; | ||
| import org.apache.lucene.codecs.DocValuesFormat; | ||
| import org.apache.lucene.codecs.FieldInfosFormat; | ||
| import org.apache.lucene.codecs.LiveDocsFormat; | ||
| import org.apache.lucene.codecs.PointsFormat; | ||
| import org.apache.lucene.codecs.PostingsFormat; | ||
| import org.apache.lucene.codecs.SegmentInfoFormat; | ||
| import org.apache.lucene.codecs.StoredFieldsFormat; | ||
| import org.apache.lucene.codecs.perfield.PerFieldDocValuesFormat; | ||
| import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.BWCCodec; | ||
| import org.elasticsearch.xpack.lucene.bwc.codecs.lucene60.Lucene60MetadataOnlyPointsFormat; | ||
|  | ||
| /** | ||
| * Implements the Lucene 8.4 index format. Loaded via SPI for indices created/written with Lucene 8.4.0-8.5.1 | ||
| * (Elasticsearch [7.6.0-7.8.1]), mounted as archive indices in Elasticsearch 8.x / 9.x. | ||
| */ | ||
| public class BWCLucene84Codec extends BWCCodec { | ||
|  | ||
| private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat()); | ||
| private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene70SegmentInfoFormat()); | ||
| private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat(); | ||
| private final CompoundFormat compoundFormat = new Lucene50CompoundFormat(); | ||
| private final PostingsFormat defaultFormat; | ||
|  | ||
| private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { | ||
| @Override | ||
| public PostingsFormat getPostingsFormatForField(String field) { | ||
| return BWCLucene84Codec.this.getPostingsFormatForField(field); | ||
| } | ||
| }; | ||
|          | ||
|  | ||
| private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { | ||
| @Override | ||
| public DocValuesFormat getDocValuesFormatForField(String field) { | ||
| return BWCLucene84Codec.this.getDocValuesFormatForField(field); | ||
| } | ||
| }; | ||
|  | ||
| private final StoredFieldsFormat storedFieldsFormat; | ||
| private final PointsFormat pointsFormat = new Lucene60MetadataOnlyPointsFormat(); | ||
|  | ||
| // Needed for SPI loading | ||
| @SuppressWarnings("unused") | ||
| public BWCLucene84Codec() { | ||
| this("BWCLucene84Codec"); | ||
| } | ||
|  | ||
| public BWCLucene84Codec(String name) { | ||
| super(name); | ||
| this.storedFieldsFormat = new Lucene50StoredFieldsFormat(Lucene50StoredFieldsFormat.Mode.BEST_SPEED); | ||
| this.defaultFormat = new Lucene84PostingsFormat(); | ||
| } | ||
|  | ||
| @Override | ||
| public StoredFieldsFormat storedFieldsFormat() { | ||
| return storedFieldsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public PostingsFormat postingsFormat() { | ||
| return postingsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final FieldInfosFormat fieldInfosFormat() { | ||
| return fieldInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public SegmentInfoFormat segmentInfoFormat() { | ||
| return segmentInfosFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public final LiveDocsFormat liveDocsFormat() { | ||
| return liveDocsFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public CompoundFormat compoundFormat() { | ||
| return compoundFormat; | ||
| } | ||
|  | ||
| @Override | ||
| public PointsFormat pointsFormat() { | ||
| return pointsFormat; | ||
| } | ||
|  | ||
| /** | ||
| * Returns the postings format that should be used for writing new segments of <code>field</code>. | ||
| * | ||
| * <p>The default implementation always returns "Lucene84". | ||
| * | ||
| * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility: | ||
| * future version of Lucene are only guaranteed to be able to read the default implementation. | ||
| */ | ||
| public PostingsFormat getPostingsFormatForField(String field) { | ||
| return defaultFormat; | ||
| } | ||
|  | ||
| /** | ||
| * Returns the docvalues format that should be used for writing new segments of <code>field</code> | ||
| * . | ||
| * | ||
| * <p>The default implementation always returns "Lucene80". | ||
| * | ||
| * <p><b>WARNING:</b> if you subclass, you are responsible for index backwards compatibility: | ||
| * future version of Lucene are only guaranteed to be able to read the default implementation. | ||
| */ | ||
| public DocValuesFormat getDocValuesFormatForField(String field) { | ||
| return defaultDVFormat; | ||
| } | ||
|         
                  javanna marked this conversation as resolved.
              Outdated
          
            Show resolved
            Hide resolved | ||
|  | ||
| @Override | ||
| public final DocValuesFormat docValuesFormat() { | ||
| return docValuesFormat; | ||
| } | ||
|  | ||
| private final DocValuesFormat defaultDVFormat = new Lucene80DocValuesFormat(); | ||
|          | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could probably need some more explanation on the cases that this fixes (i.e. archive indices that were mounted in 7x)
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, @cbuescher, for the feedback. I added a more explanatory summary description. This is still short, but it better describes what the PR does.