Skip to content

Commit 2cdd81f

Browse files
authored
[8.x] Docs and simplifications to support for Lucene ancient versions (#124053) (#124509)
* Docs and simplifications to support for Lucene ancient versions (#124053) The old lucene versions plugin allows users to read indices created by ancient Elasticsearch version, starting from 5.0. Especially for 5.x which relied on Lucene 6.x, some special logic is required around postings format support. That revolves around reading of FSTs, but has a consequence of requiring quite a few fork of other Lucene classes due to their visibility. This commit attempts to add javadocs to clarify the intent of some of these classes. It also includes some simplifications, in that Lucene50PostingsReader can be reused as-is and some other classes are only needed in tests hence are moved to the test folder. * iter * iter
1 parent f77ba09 commit 2cdd81f

File tree

18 files changed

+51
-2247
lines changed

18 files changed

+51
-2247
lines changed

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/LegacyAdaptingPerFieldPostingsFormat.java

Lines changed: 15 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,21 @@
2121

2222
import org.apache.lucene.codecs.FieldsConsumer;
2323
import org.apache.lucene.codecs.FieldsProducer;
24-
import org.apache.lucene.codecs.NormsProducer;
2524
import org.apache.lucene.codecs.PostingsFormat;
2625
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
2726
import org.apache.lucene.index.FieldInfo;
28-
import org.apache.lucene.index.Fields;
2927
import org.apache.lucene.index.IndexOptions;
30-
import org.apache.lucene.index.MergeState;
3128
import org.apache.lucene.index.SegmentReadState;
3229
import org.apache.lucene.index.SegmentWriteState;
3330
import org.apache.lucene.index.Terms;
3431
import org.elasticsearch.core.IOUtils;
32+
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene50.BWCLucene50PostingsFormat;
3533

36-
import java.io.Closeable;
3734
import java.io.IOException;
38-
import java.util.ArrayList;
3935
import java.util.Collections;
4036
import java.util.HashMap;
4137
import java.util.IdentityHashMap;
4238
import java.util.Iterator;
43-
import java.util.List;
4439
import java.util.Map;
4540
import java.util.TreeMap;
4641

@@ -52,12 +47,10 @@
5247
* latter only supports Lucene 7 and above (as it was shipped with backwards-codecs of Lucene 9 that
5348
* only has support for N-2).
5449
*
55-
* This class can probably be removed once we are on Lucene 10 and Lucene50PostingsFormat is no longer
50+
* This class can be removed once Elasticsearch gets upgraded to Lucene 11 and Lucene50PostingsFormat is no longer
5651
* shipped as part of bwc jars.
57-
*
58-
* Swapping out formats can be done via the {@link #getPostingsFormat(String) method}.
5952
*/
60-
public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat {
53+
public final class LegacyAdaptingPerFieldPostingsFormat extends PostingsFormat {
6154
/** Name of this {@link PostingsFormat}. */
6255
public static final String PER_FIELD_NAME = "PerField40";
6356

@@ -68,39 +61,19 @@ public abstract class LegacyAdaptingPerFieldPostingsFormat extends PostingsForma
6861
public static final String PER_FIELD_SUFFIX_KEY = PerFieldPostingsFormat.class.getSimpleName() + ".suffix";
6962

7063
/** Sole constructor. */
71-
protected LegacyAdaptingPerFieldPostingsFormat() {
64+
public LegacyAdaptingPerFieldPostingsFormat() {
7265
super(PER_FIELD_NAME);
7366
}
7467

7568
static String getSuffix(String formatName, String suffix) {
7669
return formatName + "_" + suffix;
7770
}
7871

79-
protected PostingsFormat getPostingsFormat(String formatName) {
80-
throw new IllegalArgumentException(formatName);
81-
}
82-
83-
private class FieldsWriter extends FieldsConsumer {
84-
final SegmentWriteState writeState;
85-
final List<Closeable> toClose = new ArrayList<Closeable>();
86-
87-
FieldsWriter(SegmentWriteState writeState) {
88-
this.writeState = writeState;
89-
}
90-
91-
@Override
92-
public void write(Fields fields, NormsProducer norms) throws IOException {
93-
throw new IllegalStateException("This codec should only be used for reading, not writing");
94-
}
95-
96-
@Override
97-
public void merge(MergeState mergeState, NormsProducer norms) throws IOException {
98-
throw new IllegalStateException("This codec should only be used for reading, not writing");
99-
}
100-
101-
@Override
102-
public void close() throws IOException {
103-
IOUtils.close(toClose);
72+
private static PostingsFormat getPostingsFormat(String formatName) {
73+
if (formatName.equals("Lucene50")) {
74+
return new BWCLucene50PostingsFormat();
75+
} else {
76+
return new BWCCodec.EmptyPostingsFormat();
10477
}
10578
}
10679

@@ -130,8 +103,7 @@ private static class FieldsReader extends FieldsProducer {
130103
segment = other.segment;
131104
}
132105

133-
FieldsReader(final SegmentReadState readState, LegacyAdaptingPerFieldPostingsFormat legacyAdaptingPerFieldPostingsFormat)
134-
throws IOException {
106+
FieldsReader(final SegmentReadState readState) throws IOException {
135107

136108
// Read _X.per and init each format:
137109
boolean success = false;
@@ -147,7 +119,7 @@ private static class FieldsReader extends FieldsProducer {
147119
if (suffix == null) {
148120
throw new IllegalStateException("missing attribute: " + PER_FIELD_SUFFIX_KEY + " for field: " + fieldName);
149121
}
150-
PostingsFormat format = legacyAdaptingPerFieldPostingsFormat.getPostingsFormat(formatName);
122+
PostingsFormat format = getPostingsFormat(formatName);
151123
String segmentSuffix = getSuffix(formatName, suffix);
152124
if (formats.containsKey(segmentSuffix) == false) {
153125
formats.put(segmentSuffix, format.fieldsProducer(new SegmentReadState(readState, segmentSuffix)));
@@ -206,12 +178,12 @@ public String toString() {
206178
}
207179

208180
@Override
209-
public final FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
210-
return new FieldsWriter(state);
181+
public FieldsConsumer fieldsConsumer(SegmentWriteState state) {
182+
throw new IllegalStateException("This codec should only be used for reading, not writing");
211183
}
212184

213185
@Override
214-
public final FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
215-
return new FieldsReader(state, this);
186+
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
187+
return new FieldsReader(state);
216188
}
217189
}

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/CompressionAlgorithm.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525

2626
import java.io.IOException;
2727

28-
/** Compression algorithm used for suffixes of a block of terms. */
28+
/**
29+
* This is a copy of the class with same name shipped with Lucene, which is though package protected hence not accessible.
30+
* We need to copy it because we have our own fork of {@link FieldReader}.
31+
*/
2932
enum CompressionAlgorithm {
3033
NO_COMPRESSION(0x00) {
3134

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/FieldReader.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@
3535

3636
/**
3737
* BlockTree's implementation of {@link Terms}.
38+
*
39+
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene40.blocktree.FieldReader} that allows to read from ancient
40+
* Lucene versions. The key difference is the FST loading which relies on {@link FST}.
3841
*/
3942
public final class FieldReader extends Terms {
4043

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnum.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import org.apache.lucene.index.ImpactsEnum;
2424
import org.apache.lucene.index.PostingsEnum;
2525
import org.apache.lucene.index.TermState;
26-
import org.apache.lucene.index.Terms;
2726
import org.apache.lucene.store.IndexInput;
2827
import org.apache.lucene.util.ArrayUtil;
2928
import org.apache.lucene.util.BytesRef;
@@ -39,11 +38,8 @@
3938
import java.io.IOException;
4039

4140
/**
42-
* This is used to implement efficient {@link Terms#intersect} for block-tree. Note that it cannot
43-
* seek, except for the initial term on init. It just "nexts" through the intersection of the
44-
* automaton and the terms. It does not use the terms index at all: on init, it loads the root
45-
* block, and scans its way to the initial term. Likewise, in next it scans until it finds a term
46-
* that matches the current automaton transition.
41+
* This is a copy of the class with same name shipped with Lucene, which is though package protected hence not accessible.
42+
* We need to copy it because we have our own fork of {@link FieldReader}.
4743
*/
4844
final class IntersectTermsEnum extends BaseTermsEnum {
4945

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/IntersectTermsEnumFrame.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@
3131
import java.io.IOException;
3232
import java.util.Arrays;
3333

34-
// TODO: can we share this with the frame in STE?
34+
/**
35+
* This is a copy of the class with same name shipped with Lucene, which is though package protected hence not accessible.
36+
* We need to copy it because we have our own fork of {@link FieldReader}.
37+
*/
3538
final class IntersectTermsEnumFrame {
3639
final int ord;
3740
long fp;

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Lucene40BlockTreeTermsReader.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,10 @@ public Lucene40BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentRe
252252
final long indexStartFP = indexMetaIn.readVLong();
253253
FieldReader previous = fieldMap.put(
254254
fieldInfo.name,
255+
/*
256+
The FieldReader used differs from the original Lucene variant, in that it is more flexible
257+
around the versions it can read from.
258+
*/
255259
new FieldReader(
256260
this,
257261
fieldInfo,

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnum.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
import java.io.IOException;
3737
import java.io.PrintStream;
3838

39-
/** Iterates through terms in this field. */
39+
/**
40+
* This is a copy of the class with same name shipped with Lucene, which is though package protected hence not accessible.
41+
* We need to copy it because we have our own fork of {@link FieldReader}.
42+
*/
4043
final class SegmentTermsEnum extends BaseTermsEnum {
4144

4245
// Lazy init:

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/SegmentTermsEnumFrame.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,10 @@
3131
import java.io.IOException;
3232
import java.util.Arrays;
3333

34+
/**
35+
* This is a copy of the class with same name shipped with Lucene, which is though package protected hence not accessible.
36+
* We need to copy it because we have our own fork of {@link FieldReader}.
37+
*/
3438
final class SegmentTermsEnumFrame {
3539
// Our index in stack[]:
3640
final int ord;

x-pack/plugin/old-lucene-versions/src/main/java/org/elasticsearch/xpack/lucene/bwc/codecs/lucene40/blocktree/Stats.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@
3030
import java.util.Locale;
3131

3232
/**
33-
* BlockTree statistics for a single field returned by {@link FieldReader#getStats()}.
33+
* This is a copy of {@link Stats} shipped with Lucene, which has though package protected constructor and methods.
34+
* We need to copy it because we have our own fork of {@link FieldReader}.
3435
*/
3536
public class Stats {
3637
/** Byte size of the index. */

0 commit comments

Comments
 (0)