Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137967.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137967
summary: Single loop for `FielfInfo` processing
area: TSDB
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene103.Lucene103Codec;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.FeatureFlag;
Expand Down Expand Up @@ -71,16 +69,9 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays)
assert useTsdbSyntheticId == false || mapperService.getIndexSettings().getMode() == IndexMode.TIME_SERIES;

this.codecs = codecs.entrySet().stream().collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, e -> {
Codec codec;
if (e.getValue() instanceof DeduplicateFieldInfosCodec dedupCodec) {
codec = dedupCodec;
} else {
codec = new DeduplicateFieldInfosCodec(e.getValue().getName(), e.getValue());
}
if (useTsdbSyntheticId && codec instanceof TSDBSyntheticIdCodec == false) {
codec = new TSDBSyntheticIdCodec(codec.getName(), codec);
}
return codec;
String name = e.getValue().getName();
Codec codec = e.getValue();
return useTsdbSyntheticId ? new TSDBSyntheticIdCodec(name, codec) : new DeduplicateFieldInfosCodec(name, codec);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
return useTsdbSyntheticId ? new TSDBSyntheticIdCodec(name, codec) : new DeduplicateFieldInfosCodec(name, codec);
return useTsdbSyntheticId ? new TSDBSyntheticIdCodec(codec) : new DeduplicateFieldInfosCodec(codec);

}));
}

Expand All @@ -100,24 +91,4 @@ public String[] availableCodecs() {
return codecs.keySet().toArray(new String[0]);
}

public static class DeduplicateFieldInfosCodec extends FilterCodec {

private final DeduplicatingFieldInfosFormat deduplicatingFieldInfosFormat;

@SuppressWarnings("this-escape")
protected DeduplicateFieldInfosCodec(String name, Codec delegate) {
super(name, delegate);
this.deduplicatingFieldInfosFormat = new DeduplicatingFieldInfosFormat(super.fieldInfosFormat());
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return deduplicatingFieldInfosFormat;
}

public final Codec delegate() {
return delegate;
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.FieldInfosFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;

public sealed class DeduplicateFieldInfosCodec extends FilterCodec permits TSDBSyntheticIdCodec {

private final DeduplicatingFieldInfosFormat fieldInfosFormat;

@SuppressWarnings("this-escape")
protected DeduplicateFieldInfosCodec(String name, Codec delegate) {
super(name, delegate);
this.fieldInfosFormat = createFieldInfosFormat(delegate.fieldInfosFormat());
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
protected DeduplicateFieldInfosCodec(String name, Codec delegate) {
super(name, delegate);
this.fieldInfosFormat = createFieldInfosFormat(delegate.fieldInfosFormat());
}
protected DeduplicateFieldInfosCodec(Codec delegate) {
super(delegate.getName(), delegate);
this.fieldInfosFormat = createFieldInfosFormat(delegate.fieldInfosFormat());
}


protected DeduplicatingFieldInfosFormat createFieldInfosFormat(FieldInfosFormat delegate) {
return new DeduplicatingFieldInfosFormat(delegate);
}

@Override
public final FieldInfosFormat fieldInfosFormat() {
return fieldInfosFormat;
}

public Codec delegate() {
return delegate;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.StringLiteralDeduplicator;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec;
import org.elasticsearch.index.mapper.FieldMapper;

import java.io.IOException;
Expand All @@ -28,7 +29,7 @@
* cases attribute maps on read. We use this to reduce the per-field overhead for Elasticsearch instances holding a large number of
* segments.
*/
public final class DeduplicatingFieldInfosFormat extends FieldInfosFormat {
public sealed class DeduplicatingFieldInfosFormat extends FieldInfosFormat permits TSDBSyntheticIdCodec.RewriteFieldInfosFormat {

private static final Map<Map<String, String>, Map<String, String>> attributeDeduplicator = ConcurrentCollections.newConcurrentMap();

Expand All @@ -43,33 +44,40 @@ public DeduplicatingFieldInfosFormat(FieldInfosFormat delegate) {
@Override
public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException {
final FieldInfos fieldInfos = delegate.read(directory, segmentInfo, segmentSuffix, iocontext);
validateFieldInfos(fieldInfos);
final FieldInfo[] deduplicated = new FieldInfo[fieldInfos.size()];
int i = 0;
for (FieldInfo fi : fieldInfos) {
deduplicated[i++] = new FieldInfo(
FieldMapper.internFieldName(fi.getName()),
fi.number,
fi.hasTermVectors(),
fi.omitsNorms(),
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
internStringStringMap(fi.attributes()),
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField(),
fi.isParentField()
);
deduplicated[i++] = processFieldInfo(fi);
}
return new FieldInfosWithUsages(deduplicated);
}

protected void validateFieldInfos(FieldInfos fieldInfos) {}

protected FieldInfo processFieldInfo(FieldInfo fi) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
protected FieldInfo processFieldInfo(FieldInfo fi) {
protected FieldInfo wrapFieldInfo(FieldInfo fi) {

return new FieldInfo(
FieldMapper.internFieldName(fi.getName()),
fi.number,
fi.hasTermVectors(),
fi.omitsNorms(),
fi.hasPayloads(),
fi.getIndexOptions(),
fi.getDocValuesType(),
fi.docValuesSkipIndexType(),
fi.getDocValuesGen(),
internStringStringMap(fi.attributes()),
fi.getPointDimensionCount(),
fi.getPointIndexDimensionCount(),
fi.getPointNumBytes(),
fi.getVectorDimension(),
fi.getVectorEncoding(),
fi.getVectorSimilarityFunction(),
fi.isSoftDeletesField(),
fi.isParentField()
);
}

private static Map<String, String> internStringStringMap(Map<String, String> m) {
if (m.size() > 10) {
return m;
Expand All @@ -94,5 +102,4 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu
throws IOException {
delegate.write(directory, segmentInfo, segmentSuffix, infos, context);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec;
import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 8.14. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch814Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch814Codec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 8.16. This extends the Lucene 9.12 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See
* {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch816Codec extends FilterCodec {

private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec();
private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec;
import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.0-snapshot relying on Lucene 10.0. This extends the Lucene 10.0 codec to compressed stored fields
* with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch900Codec extends FilterCodec {

private final StoredFieldsFormat storedFieldsFormat;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec;
import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.0 relying on Lucene 10.1. This extends the Lucene 10.1 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch900Lucene101Codec extends FilterCodec {

static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene101PostingsFormat();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
package org.elasticsearch.index.codec;

import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.StoredFieldsFormat;
Expand All @@ -26,7 +27,7 @@
* Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed
* stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}.
*/
public class Elasticsearch92Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec {
public class Elasticsearch92Lucene103Codec extends FilterCodec {

static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene103PostingsFormat();

Expand Down
Loading