Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
f468fc8
backport to 8.x
drempapis Mar 21, 2025
9479715
update after review
drempapis Mar 21, 2025
1d41a2c
[CI] Auto commit changes from spotless
Mar 21, 2025
dba071e
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 21, 2025
0036e9e
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 21, 2025
94d0360
update
drempapis Mar 21, 2025
deb4935
Update docs/changelog/125389.yaml
drempapis Mar 21, 2025
a733db7
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 21, 2025
432c523
update changelog
drempapis Mar 21, 2025
3b72963
update changelog
drempapis Mar 21, 2025
46786d5
Update docs/changelog/125389.yaml
drempapis Mar 21, 2025
28410d0
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 21, 2025
f005463
update after review
drempapis Mar 21, 2025
a01fe1f
Merge branch 'fix_8/117042_Support_7x_segments_as_archive_in_8x' of g…
drempapis Mar 21, 2025
0727c6a
[CI] Auto commit changes from spotless
Mar 21, 2025
c21178f
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 26, 2025
89e5b72
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 26, 2025
9c7132e
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 26, 2025
8059796
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 27, 2025
46b334b
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 27, 2025
0185f30
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 28, 2025
8c50f1f
revert code
drempapis Mar 28, 2025
9efe527
Update comment after review
drempapis Mar 28, 2025
717f83f
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 28, 2025
74dff86
Merge branch '8.x' into fix_8/117042_Support_7x_segments_as_archive_i…
drempapis Mar 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/125389.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 125389
summary: Support indices created in ESv6 and updated in ESV7 using different LuceneCodecs
as archive in current version.
area: Search
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

package org.elasticsearch.xpack.lucene.bwc.codecs;

import org.apache.lucene.backward_codecs.lucene70.Lucene70Codec;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FieldsConsumer;
Expand All @@ -18,6 +17,7 @@
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.SegmentInfoFormat;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
Expand All @@ -27,7 +27,12 @@
import org.apache.lucene.index.Terms;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.Version;
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene70.BWCLucene70Codec;
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene80.BWCLucene80Codec;
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene84.BWCLucene84Codec;
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene86.BWCLucene86Codec;
import org.elasticsearch.xpack.lucene.bwc.codecs.lucene87.BWCLucene87Codec;

import java.io.IOException;
import java.util.ArrayList;
Expand All @@ -39,55 +44,122 @@
*/
public abstract class BWCCodec extends Codec {

private final FieldInfosFormat fieldInfosFormat;
private final SegmentInfoFormat segmentInfosFormat;
private final PostingsFormat postingsFormat;

protected BWCCodec(String name) {
super(name);
}

@Override
public NormsFormat normsFormat() {
throw new UnsupportedOperationException();
}
this.fieldInfosFormat = new FieldInfosFormat() {
final FieldInfosFormat wrappedFormat = originalFieldInfosFormat();

@Override
public TermVectorsFormat termVectorsFormat() {
throw new UnsupportedOperationException();
}
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
throws IOException {
return filterFields(wrappedFormat.read(directory, segmentInfo, segmentSuffix, iocontext));
}

@Override
public KnnVectorsFormat knnVectorsFormat() {
throw new UnsupportedOperationException();
}
@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
throws IOException {
wrappedFormat.write(directory, segmentInfo, segmentSuffix, infos, context);
}
};

this.segmentInfosFormat = new SegmentInfoFormat() {
final SegmentInfoFormat wrappedFormat = originalSegmentInfoFormat();

protected static SegmentInfoFormat wrap(SegmentInfoFormat wrapped) {
return new SegmentInfoFormat() {
@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
return wrap(wrapped.read(directory, segmentName, segmentID, context));
return wrap(wrappedFormat.read(directory, segmentName, segmentID, context));
}

@Override
public void write(Directory dir, SegmentInfo info, IOContext ioContext) throws IOException {
wrapped.write(dir, info, ioContext);
wrappedFormat.write(dir, info, ioContext);
}
};
}

protected static FieldInfosFormat wrap(FieldInfosFormat wrapped) {
return new FieldInfosFormat() {
this.postingsFormat = new PerFieldPostingsFormat() {
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext)
throws IOException {
return filterFields(wrapped.read(directory, segmentInfo, segmentSuffix, iocontext));
}

@Override
public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos infos, IOContext context)
throws IOException {
wrapped.write(directory, segmentInfo, segmentSuffix, infos, context);
public PostingsFormat getPostingsFormatForField(String field) {
throw new UnsupportedOperationException("Old codecs can't be used for writing");
}
};
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}

@Override
public final SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
}

@Override
public PostingsFormat postingsFormat() {
return postingsFormat;
}

/**
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
* the index. Even if norms are present, field info lies about it.
*
* @return nothing, as this method always throws an exception
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
*/
@Override
public final NormsFormat normsFormat() {
throw new UnsupportedOperationException();
}

/**
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
* This method is never called in practice, as we rewrite field infos to override the info about which features are present in
* the index. Even if term vectors are present, field info lies about it.
*
* @return nothing, as this method always throws an exception
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
*/
@Override
public final TermVectorsFormat termVectorsFormat() {
throw new UnsupportedOperationException();
}

/**
* This method is not supported for archive indices and older codecs and will always throw an {@link UnsupportedOperationException}.
* The knn vectors can't be present because it is not supported yet in any of the lucene versions that we support for archive indices.
*
* @return nothing, as this method always throws an exception
* @throws UnsupportedOperationException always thrown to indicate that this method is not supported
*/
@Override
public final KnnVectorsFormat knnVectorsFormat() {
throw new UnsupportedOperationException();
}

/**
* Returns the original {@link SegmentInfoFormat} used by this codec.
* This method should be implemented by subclasses to provide the specific
* {@link SegmentInfoFormat} that this codec is intended to use.
*
* @return the original {@link SegmentInfoFormat} used by this codec
*/
protected abstract SegmentInfoFormat originalSegmentInfoFormat();

/**
* Returns the original {@link FieldInfosFormat} used by this codec.
* This method should be implemented by subclasses to provide the specific
* {@link FieldInfosFormat} that this codec is intended to use.
*
* @return the original {@link FieldInfosFormat} used by this codec
*/
protected abstract FieldInfosFormat originalFieldInfosFormat();

// mark all fields as no term vectors, no norms, no payloads, and no vectors.
private static FieldInfos filterFields(FieldInfos fieldInfos) {
List<FieldInfo> fieldInfoCopy = new ArrayList<>(fieldInfos.size());
Expand Down Expand Up @@ -119,15 +191,14 @@ private static FieldInfos filterFields(FieldInfos fieldInfos) {
}

public static SegmentInfo wrap(SegmentInfo segmentInfo) {
// special handling for Lucene70Codec (which is currently bundled with Lucene)
// Use BWCLucene70Codec instead as that one extends BWCCodec (similar to all other older codecs)
final Codec codec = segmentInfo.getCodec() instanceof Lucene70Codec ? new BWCLucene70Codec() : segmentInfo.getCodec();
Codec codec = getBackwardCompatibleCodec(segmentInfo.getCodec());

final SegmentInfo segmentInfo1 = new SegmentInfo(
segmentInfo.dir,
// Use Version.LATEST instead of original version, otherwise SegmentCommitInfo will bark when processing (N-1 limitation)
// TODO: perhaps store the original version information in attributes so that we can retrieve it later when needed?
org.apache.lucene.util.Version.LATEST,
org.apache.lucene.util.Version.LATEST,
Version.LATEST,
Version.LATEST,
segmentInfo.name,
segmentInfo.maxDoc(),
segmentInfo.getUseCompoundFile(),
Expand All @@ -142,6 +213,28 @@ public static SegmentInfo wrap(SegmentInfo segmentInfo) {
return segmentInfo1;
}

/**
* Returns a backward-compatible codec for the given codec. If the codec is one of the known Lucene 8.x codecs,
* it returns a corresponding read-only backward-compatible codec. Otherwise, it returns the original codec.
* Lucene 8.x codecs are still shipped with the current version of Lucene.
* Earlier codecs we are providing directly they will also be read-only backward-compatible, but they don't require the renaming.
*
* This switch is only for indices created in ES 6.x, later written into in ES 7.x (Lucene 8.x). Indices created
* in ES 7.x can be read directly by ES if marked read-only, without going through archive indices.
*/
private static Codec getBackwardCompatibleCodec(Codec codec) {
if (codec == null) return null;

return switch (codec.getClass().getSimpleName()) {
case "Lucene70Codec" -> new BWCLucene70Codec();
case "Lucene80Codec" -> new BWCLucene80Codec();
case "Lucene84Codec" -> new BWCLucene84Codec();
case "Lucene86Codec" -> new BWCLucene86Codec();
case "Lucene87Codec" -> new BWCLucene87Codec();
default -> codec;
};
}

/**
* In-memory postings format that shows no postings available.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
*/
@Deprecated
public class Lucene60Codec extends BWCCodec {
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene50SegmentInfoFormat());

private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
private final StoredFieldsFormat storedFieldsFormat;
Expand All @@ -68,18 +67,18 @@ public Lucene60Codec() {
}

@Override
public final StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
protected FieldInfosFormat originalFieldInfosFormat() {
return new Lucene60FieldInfosFormat();
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
protected SegmentInfoFormat originalSegmentInfoFormat() {
return new Lucene50SegmentInfoFormat();
}

@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
public final StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.io.IOException;

/**
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsFormat}
* Allows reading metadata only from Lucene 6.0 point format
**/
public class Lucene60MetadataOnlyPointsFormat extends PointsFormat {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
import java.util.HashMap;
import java.util.Map;

/** Reads the metadata of point values previously written with Lucene60PointsWriter */
/**
* This is a fork of {@link org.apache.lucene.backward_codecs.lucene60.Lucene60PointsReader}
* Reads the metadata of point values previously written with Lucene60PointsWriter
*/
public final class Lucene60MetadataOnlyPointsReader extends PointsReader {
final IndexInput dataIn;
final SegmentReadState readState;
Expand Down Expand Up @@ -105,7 +108,7 @@ public Lucene60MetadataOnlyPointsReader(SegmentReadState readState) throws IOExc
int fieldNumber = ent.getKey();
long fp = ent.getValue();
dataIn.seek(fp);
PointValues reader = new MetadataOnlyBKDReader(dataIn);
PointValues reader = new MetadataOnlyBKDReader(dataIn, false);
readers.put(fieldNumber, reader);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class MetadataOnlyBKDReader extends PointValues {
final int docCount;
final int version;

public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {
public MetadataOnlyBKDReader(IndexInput metaIn, boolean isVersionPost86) throws IOException {
version = CodecUtil.checkHeader(metaIn, "BKD", VERSION_START, VERSION_CURRENT);
final int numDims = metaIn.readVInt();
final int numIndexDims;
Expand Down Expand Up @@ -85,6 +85,23 @@ public MetadataOnlyBKDReader(IndexInput metaIn) throws IOException {

pointCount = metaIn.readVLong();
docCount = metaIn.readVInt();

// The pre-8.6 code does not read the following fields that its standard Lucene counterpart does. After experimenting with the
// code, we got to the conclusion that these are the last fields being read, which are not needed in the metadata-only reader, and
// we can safely ignore them when loading the file. Although by coincidence, nothing breaks if we read a couple of VLongs, as long
// as some bytes are available to read.
//
// The extra reads have been introduced to process IndexInput created with Lucene86Codec+, where a new BKD format has been
// introduced. We have stricter checks around the header and footer starting from the 86 formats hence we do need to
// consume all the data input there but not in previous formats.
//
// For correctness, we added version checking here. If and only if, the version is 8.6 or higher, we read the additional fields.
if (isVersionPost86) {
metaIn.readVInt();
metaIn.readLong();
// The following fields are not used in this class, but we need to read them to advance the pointer
metaIn.readLong();
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@
*/
@Deprecated
public class Lucene62Codec extends BWCCodec {
private final FieldInfosFormat fieldInfosFormat = wrap(new Lucene60FieldInfosFormat());
private final SegmentInfoFormat segmentInfosFormat = wrap(new Lucene62SegmentInfoFormat());

private final LiveDocsFormat liveDocsFormat = new Lucene50LiveDocsFormat();
private final CompoundFormat compoundFormat = new Lucene50CompoundFormat();
private final StoredFieldsFormat storedFieldsFormat;
Expand All @@ -68,18 +67,18 @@ public Lucene62Codec() {
}

@Override
public final StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
protected FieldInfosFormat originalFieldInfosFormat() {
return new Lucene60FieldInfosFormat();
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
protected SegmentInfoFormat originalSegmentInfoFormat() {
return new Lucene62SegmentInfoFormat();
}

@Override
public SegmentInfoFormat segmentInfoFormat() {
return segmentInfosFormat;
public final StoredFieldsFormat storedFieldsFormat() {
return storedFieldsFormat;
}

@Override
Expand Down
Loading