Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137967.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137967
summary: Single loop for `FielfInfo` processing
area: TSDB
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
Expand Down Expand Up @@ -71,16 +69,9 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
assert useTsdbSyntheticId == false || mapperService.getIndexSettings().getMode() == IndexMode.TIME_SERIES;

this.codecs = codecs.entrySet().stream().collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, e -> {
Codec codec;
if (e.getValue() instanceof DeduplicateFieldInfosCodec dedupCodec) {
codec = dedupCodec;
} else {
codec = new DeduplicateFieldInfosCodec(e.getValue().getName(), e.getValue());
}
if (useTsdbSyntheticId && codec instanceof TSDBSyntheticIdCodec == false) {
codec = new TSDBSyntheticIdCodec(codec.getName(), codec);
}
return codec;
String name = e.getValue().getName();
Codec codec = e.getValue();
return useTsdbSyntheticId ? new TSDBSyntheticIdCodec(codec) : new DeduplicateFieldInfosCodec(codec);
}));
}

Expand All @@ -100,24 +91,4 @@ public String[] availableCodecs() {
return codecs.keySet().toArray(new String[0]);
}

public static class DeduplicateFieldInfosCodec extends FilterCodec {

private final DeduplicatingFieldInfosFormat deduplicatingFieldInfosFormat;

@SuppressWarnings("this-escape")
protected DeduplicateFieldInfosCodec(String name, Codec delegate) {
super(name, delegate);
this.deduplicatingFieldInfosFormat = new DeduplicatingFieldInfosFormat(super.fieldInfosFormat());
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return deduplicatingFieldInfosFormat;
}

public final Codec delegate() {
return delegate;
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;

public sealed class DeduplicateFieldInfosCodec extends FilterCodec permits TSDBSyntheticIdCodec {

private final DeduplicatingFieldInfosFormat fieldInfosFormat;

@SuppressWarnings("this-escape")
protected DeduplicateFieldInfosCodec(Codec delegate) {
super(delegate.getName(), delegate);
this.fieldInfosFormat = createFieldInfosFormat(delegate.fieldInfosFormat());
}

protected DeduplicatingFieldInfosFormat createFieldInfosFormat(FieldInfosFormat delegate) {
return new DeduplicatingFieldInfosFormat(delegate);
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}

public Codec delegate() {
return delegate;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.StringLiteralDeduplicator;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
import org.elasticsearch.index.mapper.FieldMapper;

import java.io.IOException;
Expand All @@ -28,7 +29,7 @@
* cases attribute maps on read. We use this to reduce the per-field overhead for Elasticsearch instances holding a large number of
* segments.
*/
public final class DeduplicatingFieldInfosFormat extends FieldInfosFormat {
public sealed class DeduplicatingFieldInfosFormat extends FieldInfosFormat permits TSDBSyntheticIdCodec.RewriteFieldInfosFormat {

private static final Map<Map<String, String>, Map<String, String>> attributeDeduplicator = ConcurrentCollections.newConcurrentMap();

Expand All @@ -43,33 +44,40 @@ public DeduplicatingFieldInfosFormat(FieldInfosFormat delegate) {
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final FieldInfos fieldInfos = delegate.read(directory, segmentInfo, segmentSuffix, iocontext);
validateFieldInfos(fieldInfos);
final FieldInfo[] deduplicated = new FieldInfo[fieldInfos.size()];
int i = 0;
for (FieldInfo fi : fieldInfos) {
deduplicated[i++] = new FieldInfo(
FieldMapper.internFieldName(fi.getName()),
fi.number,
fi.hasTermVectors(),
fi.omitsNorms(),
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
internStringStringMap(fi.attributes()),
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField(),
fi.isParentField()
);
deduplicated[i++] = wrapFieldInfo(fi);
}
return new FieldInfosWithUsages(deduplicated);
}

protected void validateFieldInfos(FieldInfos fieldInfos) {}

protected FieldInfo wrapFieldInfo(FieldInfo fi) {
return new FieldInfo(
FieldMapper.internFieldName(fi.getName()),
fi.number,
fi.hasTermVectors(),
fi.omitsNorms(),
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
internStringStringMap(fi.attributes()),
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField(),
fi.isParentField()
);
}

private static Map<String, String> internStringStringMap(Map<String, String> m) {
if (m.size() > 10) {
return m;
Expand All @@ -94,5 +102,4 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu
throws IOException {
delegate.write(directory, segmentInfo, segmentSuffix, infos, context);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 8.14. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch814Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch814Codec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 8.16. This extends the Lucene 9.12 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch816Codec extends FilterCodec {

private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec();
private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.0-snapshot relying on Lucene 10.0. This extends the Lucene 10.0 codec to compressed stored fields
* with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch900Codec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec;
import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.0 relying on Lucene 10.1. This extends the Lucene 10.1 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch900Lucene101Codec extends FilterCodec {

static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene101PostingsFormat();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch92Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch92Lucene103Codec extends FilterCodec {

static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene103PostingsFormat();

Expand Down
Loading