Skip to content

Commit d61d01a

Browse files
drempapisomricohenn
authored andcommitted
Support 7x segments as archive in 8x / 9x (elastic#119503)
Added BWCLucene8*Codecs wrapper classes for the lucene8* equivalents. A BWC wrapper is initialized for archive indices and provides read-only capabilities for an index.
1 parent fc1030d commit d61d01a

File tree

41 files changed

+1162
-641
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1162
-641
lines changed

docs/changelog/119503.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 119503
2+
summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs as archive in current version.
3+
area: Search
4+
type: bug
5+
issues:
6+
- 117042

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/BWCCodec.java

Lines changed: 131 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.lucene.codecs.PostingsFormat;
1818
import org.apache.lucene.codecs.SegmentInfoFormat;
1919
import org.apache.lucene.codecs.TermVectorsFormat;
20+
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
2021
import org.apache.lucene.index.FieldInfo;
2122
import org.apache.lucene.index.FieldInfos;
2223
import org.apache.lucene.index.Fields;
@@ -26,6 +27,13 @@
2627
import org.apache.lucene.index.Terms;
2728
import org.apache.lucene.store.Directory;
2829
import org.apache.lucene.store.IOContext;
30+
import org.apache.lucene.util.Version;
31+
import org.elasticsearch.core.UpdateForV10;
32+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
33+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec;
34+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec;
35+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec;
36+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec;
2937

3038
import java.io.IOException;
3139
import java.util.ArrayList;
@@ -37,55 +45,122 @@
3745
*/
3846
public abstract class BWCCodec extends Codec {
3947

48+
private final FieldInfosFormat fieldInfosFormat;
49+
private final SegmentInfoFormat segmentInfosFormat;
50+
private final PostingsFormat postingsFormat;
51+
4052
protected BWCCodec(String name) {
4153
super(name);
42-
}
4354

44-
@Override
45-
public NormsFormat normsFormat() {
46-
throw new UnsupportedOperationException();
47-
}
55+
this.fieldInfosFormat = new FieldInfosFormat() {
56+
final FieldInfosFormat wrappedFormat = originalFieldInfosFormat();
4857

49-
@Override
50-
public TermVectorsFormat termVectorsFormat() {
51-
throw new UnsupportedOperationException();
52-
}
58+
@Override
59+
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
60+
throws IOException {
61+
return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext));
62+
}
5363

54-
@Override
55-
public KnnVectorsFormat knnVectorsFormat() {
56-
throw new UnsupportedOperationException();
57-
}
64+
@Override
65+
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
66+
throws IOException {
67+
wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context);
68+
}
69+
};
70+
71+
this.segmentInfosFormat = new SegmentInfoFormat() {
72+
final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat();
5873

59-
protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
60-
return new SegmentInfoFormat() {
6174
@Override
6275
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
63-
return wrap(wrapped.read(directory, segmentName, segmentID, context));
76+
return wrap(wrappedFormat.read(directory, segmentName, segmentID, context));
6477
}
6578

6679
@Override
6780
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
68-
wrapped.write(dir, info, ioContext);
81+
wrappedFormat.write(dir, info, ioContext);
6982
}
7083
};
71-
}
7284

73-
protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) {
74-
return new FieldInfosFormat() {
85+
this.postingsFormat = new PerFieldPostingsFormat() {
7586
@Override
76-
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
77-
throws IOException {
78-
return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext));
79-
}
80-
81-
@Override
82-
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
83-
throws IOException {
84-
wrapped.write(directory, segmentInfo, segmentSuffix, infos, context);
87+
public PostingsFormat getPostingsFormatForField(String field) {
88+
throw new UnsupportedOperationException("Old codecs can't be used for writing");
8589
}
8690
};
8791
}
8892

93+
@Override
94+
public final FieldInfosFormat fieldInfosFormat() {
95+
return fieldInfosFormat;
96+
}
97+
98+
@Override
99+
public final SegmentInfoFormat segmentInfoFormat() {
100+
return segmentInfosFormat;
101+
}
102+
103+
@Override
104+
public PostingsFormat postingsFormat() {
105+
return postingsFormat;
106+
}
107+
108+
/**
109+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
110+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
111+
* the index. Even if norms are present, field info lies about it.
112+
*
113+
* @return nothing, as this method always throws an exception
114+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
115+
*/
116+
@Override
117+
public final NormsFormat normsFormat() {
118+
throw new UnsupportedOperationException();
119+
}
120+
121+
/**
122+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
123+
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
124+
* the index. Even if term vectors are present, field info lies about it.
125+
*
126+
* @return nothing, as this method always throws an exception
127+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
128+
*/
129+
@Override
130+
public final TermVectorsFormat termVectorsFormat() {
131+
throw new UnsupportedOperationException();
132+
}
133+
134+
/**
135+
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
136+
* The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices.
137+
*
138+
* @return nothing, as this method always throws an exception
139+
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
140+
*/
141+
@Override
142+
public final KnnVectorsFormat knnVectorsFormat() {
143+
throw new UnsupportedOperationException();
144+
}
145+
146+
/**
147+
* Returns the original {@link SegmentInfoFormat} used by this codec.
148+
* This method should be implemented by subclasses to provide the specific
149+
* {@link SegmentInfoFormat} that this codec is intended to use.
150+
*
151+
* @return the original {@link SegmentInfoFormat} used by this codec
152+
*/
153+
protected abstract SegmentInfoFormat originalSegmentInfoFormat();
154+
155+
/**
156+
* Returns the original {@link FieldInfosFormat} used by this codec.
157+
* This method should be implemented by subclasses to provide the specific
158+
* {@link FieldInfosFormat} that this codec is intended to use.
159+
*
160+
* @return the original {@link FieldInfosFormat} used by this codec
161+
*/
162+
protected abstract FieldInfosFormat originalFieldInfosFormat();
163+
89164
// mark all fields as no term vectors, no norms, no payloads, and no vectors.
90165
private static FieldInfos filterFields(FieldInfos fieldInfos) {
91166
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
@@ -118,13 +193,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
118193
}
119194

120195
public static SegmentInfo wrap(SegmentInfo segmentInfo) {
121-
final Codec codec = segmentInfo.getCodec();
196+
Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec());
197+
122198
final SegmentInfo segmentInfo1 = new SegmentInfo(
123199
segmentInfo.dir,
124200
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
125201
// TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed?
126-
org.apache.lucene.util.Version.LATEST,
127-
org.apache.lucene.util.Version.LATEST,
202+
Version.LATEST,
203+
Version.LATEST,
128204
segmentInfo.name,
129205
segmentInfo.maxDoc(),
130206
segmentInfo.getUseCompoundFile(),
@@ -139,6 +215,29 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
139215
return segmentInfo1;
140216
}
141217

218+
/**
219+
* Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs,
220+
* it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec.
221+
* Lucene 8.x codecs are still shipped with the current version of Lucene.
222+
* Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming.
223+
*
224+
* This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created
225+
* in ES 7.x can be read directly by ES if marked read-only, without going through archive indices.
226+
*/
227+
@UpdateForV10(owner = UpdateForV10.Owner.SEARCH_FOUNDATIONS)
228+
private static Codec getBackwardCompatibleCodec(Codec codec) {
229+
if (codec == null) return null;
230+
231+
return switch (codec.getClass().getSimpleName()) {
232+
case "Lucene70Codec" -> new BWCLucene70Codec();
233+
case "Lucene80Codec" -> new BWCLucene80Codec();
234+
case "Lucene84Codec" -> new BWCLucene84Codec();
235+
case "Lucene86Codec" -> new BWCLucene86Codec();
236+
case "Lucene87Codec" -> new BWCLucene87Codec();
237+
default -> codec;
238+
};
239+
}
240+
142241
/**
143242
* In-memory postings format that shows no postings available.
144243
*/

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60Codec.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
*/
4545
@Deprecated
4646
public class Lucene60Codec extends BWCCodec {
47-
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
48-
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene50SegmentInfoFormat());
47+
4948
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
5049
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
5150
private final StoredFieldsFormat storedFieldsFormat;
@@ -68,18 +67,18 @@ public Lucene60Codec() {
6867
}
6968

7069
@Override
71-
public final StoredFieldsFormat storedFieldsFormat() {
72-
return storedFieldsFormat;
70+
protected FieldInfosFormat originalFieldInfosFormat() {
71+
return new Lucene60FieldInfosFormat();
7372
}
7473

7574
@Override
76-
public final FieldInfosFormat fieldInfosFormat() {
77-
return fieldInfosFormat;
75+
protected SegmentInfoFormat originalSegmentInfoFormat() {
76+
return new Lucene50SegmentInfoFormat();
7877
}
7978

8079
@Override
81-
public SegmentInfoFormat segmentInfoFormat() {
82-
return segmentInfosFormat;
80+
public final StoredFieldsFormat storedFieldsFormat() {
81+
return storedFieldsFormat;
8382
}
8483

8584
@Override

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsFormat.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.io.IOException;
2929

3030
/**
31+
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat}
3132
* Allows reading metadata only from Lucene 6.0 point format
3233
**/
3334
public class Lucene60MetadataOnlyPointsFormat extends PointsFormat {

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/Lucene60MetadataOnlyPointsReader.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@
3434
import java.util.HashMap;
3535
import java.util.Map;
3636

37-
/** Reads the metadata of point values previously written with Lucene60PointsWriter */
37+
/**
38+
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsReader}
39+
* Reads the metadata of point values previously written with Lucene60PointsWriter
40+
*/
3841
public final class Lucene60MetadataOnlyPointsReader extends PointsReader {
3942
final IndexInput dataIn;
4043
final SegmentReadState readState;
@@ -105,7 +108,7 @@ public Lucene60MetadataOnlyPointsReader(SegmentReadState readState) throws IOExc
105108
int fieldNumber = ent.getKey();
106109
long fp = ent.getValue();
107110
dataIn.seek(fp);
108-
PointValues reader = new MetadataOnlyBKDReader(dataIn);
111+
PointValues reader = new MetadataOnlyBKDReader(dataIn, false);
109112
readers.put(fieldNumber, reader);
110113
}
111114

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene60/MetadataOnlyBKDReader.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public class MetadataOnlyBKDReader extends PointValues {
4747
final int docCount;
4848
final int version;
4949

50-
public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {
50+
public MetadataOnlyBKDReader(IndexInput metaIn, boolean isVersionPost86) throws IOException {
5151
version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT);
5252
final int numDims = metaIn.readVInt();
5353
final int numIndexDims;
@@ -85,6 +85,15 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {
8585

8686
pointCount = metaIn.readVLong();
8787
docCount = metaIn.readVInt();
88+
89+
// This code has been introduced to process IndexInput created with Lucene86Codec+. This is not necessary
90+
// in the read-only version for older formats.
91+
if (isVersionPost86) {
92+
metaIn.readVInt();
93+
metaIn.readLong();
94+
// The following fields are not used in this class, but we need to read them to advance the pointer
95+
metaIn.readLong();
96+
}
8897
}
8998

9099
@Override

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene62/Lucene62Codec.java

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
*/
4545
@Deprecated
4646
public class Lucene62Codec extends BWCCodec {
47-
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
48-
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene62SegmentInfoFormat());
47+
4948
private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
5049
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
5150
private final StoredFieldsFormat storedFieldsFormat;
@@ -68,18 +67,18 @@ public Lucene62Codec() {
6867
}
6968

7069
@Override
71-
public final StoredFieldsFormat storedFieldsFormat() {
72-
return storedFieldsFormat;
70+
protected FieldInfosFormat originalFieldInfosFormat() {
71+
return new Lucene60FieldInfosFormat();
7372
}
7473

7574
@Override
76-
public final FieldInfosFormat fieldInfosFormat() {
77-
return fieldInfosFormat;
75+
protected SegmentInfoFormat originalSegmentInfoFormat() {
76+
return new Lucene62SegmentInfoFormat();
7877
}
7978

8079
@Override
81-
public SegmentInfoFormat segmentInfoFormat() {
82-
return segmentInfosFormat;
80+
public final StoredFieldsFormat storedFieldsFormat() {
81+
return storedFieldsFormat;
8382
}
8483

8584
@Override

0 commit comments

Comments
 (0)