diff --git a/docs/changelog/137967.yaml b/docs/changelog/137967.yaml new file mode 100644 index 0000000000000..11c45b7cc86da --- /dev/null +++ b/docs/changelog/137967.yaml @@ -0,0 +1,5 @@ +pr: 137967 +summary: Single loop for `FielfInfo` processing +area: TSDB +type: enhancement +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java index 5d6e377d57db9..8f2ab54877360 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/CodecService.java +++ b/server/src/main/java/org/elasticsearch/index/codec/CodecService.java @@ -10,8 +10,6 @@ package org.elasticsearch.index.codec; import org.apache.lucene.codecs.Codec; -import org.apache.lucene.codecs.FieldInfosFormat; -import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.lucene103.Lucene103Codec; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.common.util.FeatureFlag; @@ -71,16 +69,9 @@ public CodecService(@Nullable MapperService mapperService, BigArrays bigArrays) assert useTsdbSyntheticId == false || mapperService.getIndexSettings().getMode() == IndexMode.TIME_SERIES; this.codecs = codecs.entrySet().stream().collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, e -> { - Codec codec; - if (e.getValue() instanceof DeduplicateFieldInfosCodec dedupCodec) { - codec = dedupCodec; - } else { - codec = new DeduplicateFieldInfosCodec(e.getValue().getName(), e.getValue()); - } - if (useTsdbSyntheticId && codec instanceof TSDBSyntheticIdCodec == false) { - codec = new TSDBSyntheticIdCodec(codec.getName(), codec); - } - return codec; + String name = e.getValue().getName(); + Codec codec = e.getValue(); + return useTsdbSyntheticId ? new TSDBSyntheticIdCodec(codec) : new DeduplicateFieldInfosCodec(codec); })); } @@ -100,24 +91,4 @@ public String[] availableCodecs() { return codecs.keySet().toArray(new String[0]); } - public static class DeduplicateFieldInfosCodec extends FilterCodec { - - private final DeduplicatingFieldInfosFormat deduplicatingFieldInfosFormat; - - @SuppressWarnings("this-escape") - protected DeduplicateFieldInfosCodec(String name, Codec delegate) { - super(name, delegate); - this.deduplicatingFieldInfosFormat = new DeduplicatingFieldInfosFormat(super.fieldInfosFormat()); - } - - @Override - public final FieldInfosFormat fieldInfosFormat() { - return deduplicatingFieldInfosFormat; - } - - public final Codec delegate() { - return delegate; - } - - } } diff --git a/server/src/main/java/org/elasticsearch/index/codec/DeduplicateFieldInfosCodec.java b/server/src/main/java/org/elasticsearch/index/codec/DeduplicateFieldInfosCodec.java new file mode 100644 index 0000000000000..83dadc5714e33 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/codec/DeduplicateFieldInfosCodec.java @@ -0,0 +1,40 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.index.codec; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.FilterCodec; +import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec; + +public sealed class DeduplicateFieldInfosCodec extends FilterCodec permits TSDBSyntheticIdCodec { + + private final DeduplicatingFieldInfosFormat fieldInfosFormat; + + @SuppressWarnings("this-escape") + protected DeduplicateFieldInfosCodec(Codec delegate) { + super(delegate.getName(), delegate); + this.fieldInfosFormat = createFieldInfosFormat(delegate.fieldInfosFormat()); + } + + protected DeduplicatingFieldInfosFormat createFieldInfosFormat(FieldInfosFormat delegate) { + return new DeduplicatingFieldInfosFormat(delegate); + } + + @Override + public final FieldInfosFormat fieldInfosFormat() { + return fieldInfosFormat; + } + + public Codec delegate() { + return delegate; + } + +} diff --git a/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java b/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java index 00614140e237a..8aee7ca24decb 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/DeduplicatingFieldInfosFormat.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.util.Maps; import org.elasticsearch.common.util.StringLiteralDeduplicator; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; +import org.elasticsearch.index.codec.tsdb.TSDBSyntheticIdCodec; import org.elasticsearch.index.mapper.FieldMapper; import java.io.IOException; @@ -28,7 +29,7 @@ * cases attribute maps on read. We use this to reduce the per-field overhead for Elasticsearch instances holding a large number of * segments. */ -public final class DeduplicatingFieldInfosFormat extends FieldInfosFormat { +public sealed class DeduplicatingFieldInfosFormat extends FieldInfosFormat permits TSDBSyntheticIdCodec.RewriteFieldInfosFormat { private static final Map, Map> attributeDeduplicator = ConcurrentCollections.newConcurrentMap(); @@ -43,33 +44,40 @@ public DeduplicatingFieldInfosFormat(FieldInfosFormat delegate) { @Override public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException { final FieldInfos fieldInfos = delegate.read(directory, segmentInfo, segmentSuffix, iocontext); + validateFieldInfos(fieldInfos); final FieldInfo[] deduplicated = new FieldInfo[fieldInfos.size()]; int i = 0; for (FieldInfo fi : fieldInfos) { - deduplicated[i++] = new FieldInfo( - FieldMapper.internFieldName(fi.getName()), - fi.number, - fi.hasTermVectors(), - fi.omitsNorms(), - fi.hasPayloads(), - fi.getIndexOptions(), - fi.getDocValuesType(), - fi.docValuesSkipIndexType(), - fi.getDocValuesGen(), - internStringStringMap(fi.attributes()), - fi.getPointDimensionCount(), - fi.getPointIndexDimensionCount(), - fi.getPointNumBytes(), - fi.getVectorDimension(), - fi.getVectorEncoding(), - fi.getVectorSimilarityFunction(), - fi.isSoftDeletesField(), - fi.isParentField() - ); + deduplicated[i++] = wrapFieldInfo(fi); } return new FieldInfosWithUsages(deduplicated); } + protected void validateFieldInfos(FieldInfos fieldInfos) {} + + protected FieldInfo wrapFieldInfo(FieldInfo fi) { + return new FieldInfo( + FieldMapper.internFieldName(fi.getName()), + fi.number, + fi.hasTermVectors(), + fi.omitsNorms(), + fi.hasPayloads(), + fi.getIndexOptions(), + fi.getDocValuesType(), + fi.docValuesSkipIndexType(), + fi.getDocValuesGen(), + internStringStringMap(fi.attributes()), + fi.getPointDimensionCount(), + fi.getPointIndexDimensionCount(), + fi.getPointNumBytes(), + fi.getVectorDimension(), + fi.getVectorEncoding(), + fi.getVectorSimilarityFunction(), + fi.isSoftDeletesField(), + fi.isParentField() + ); + } + private static Map internStringStringMap(Map m) { if (m.size() > 10) { return m; @@ -94,5 +102,4 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu throws IOException { delegate.write(directory, segmentInfo, segmentSuffix, infos, context); } - } diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java index ae372ea8194bc..08e87f5fe771c 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java @@ -12,6 +12,7 @@ import org.apache.lucene.backward_codecs.lucene99.Lucene99Codec; import org.apache.lucene.backward_codecs.lucene99.Lucene99PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -26,7 +27,7 @@ * Elasticsearch codec as of 8.14. This extends the Lucene 9.9 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See * {@link Zstd814StoredFieldsFormat}. */ -public class Elasticsearch814Codec extends CodecService.DeduplicateFieldInfosCodec { +public class Elasticsearch814Codec extends FilterCodec { private final StoredFieldsFormat storedFieldsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java index d58c4e2cdc34a..6060351882042 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java @@ -12,6 +12,7 @@ import org.apache.lucene.backward_codecs.lucene912.Lucene912Codec; import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -26,7 +27,7 @@ * Elasticsearch codec as of 8.16. This extends the Lucene 9.12 codec to compressed stored fields with ZSTD instead of LZ4/DEFLATE. See * {@link Zstd814StoredFieldsFormat}. */ -public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec { +public class Elasticsearch816Codec extends FilterCodec { private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec(); private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Codec.java index 04428d5b37fba..b7590e010ae3d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Codec.java @@ -12,6 +12,7 @@ import org.apache.lucene.backward_codecs.lucene100.Lucene100Codec; import org.apache.lucene.backward_codecs.lucene912.Lucene912PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -26,7 +27,7 @@ * Elasticsearch codec as of 9.0-snapshot relying on Lucene 10.0. This extends the Lucene 10.0 codec to compressed stored fields * with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}. */ -public class Elasticsearch900Codec extends CodecService.DeduplicateFieldInfosCodec { +public class Elasticsearch900Codec extends FilterCodec { private final StoredFieldsFormat storedFieldsFormat; diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java index ad2c40950b6c9..b5eebc7049f42 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch900Lucene101Codec.java @@ -12,6 +12,7 @@ import org.apache.lucene.backward_codecs.lucene101.Lucene101Codec; import org.apache.lucene.backward_codecs.lucene101.Lucene101PostingsFormat; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -26,7 +27,7 @@ * Elasticsearch codec as of 9.0 relying on Lucene 10.1. This extends the Lucene 10.1 codec to compressed * stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}. */ -public class Elasticsearch900Lucene101Codec extends CodecService.DeduplicateFieldInfosCodec { +public class Elasticsearch900Lucene101Codec extends FilterCodec { static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene101PostingsFormat(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java index c26d485fc8c99..df84581c5c592 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch92Lucene103Codec.java @@ -10,6 +10,7 @@ package org.elasticsearch.index.codec; import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.StoredFieldsFormat; @@ -26,7 +27,7 @@ * Elasticsearch codec as of 9.2 relying on Lucene 10.3. This extends the Lucene 10.3 codec to compressed * stored fields with ZSTD instead of LZ4/DEFLATE. See {@link Zstd814StoredFieldsFormat}. */ -public class Elasticsearch92Lucene103Codec extends CodecService.DeduplicateFieldInfosCodec { +public class Elasticsearch92Lucene103Codec extends FilterCodec { static final PostingsFormat DEFAULT_POSTINGS_FORMAT = new Lucene103PostingsFormat(); diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java index aa6936cb65df9..236832b50c897 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/TSDBSyntheticIdCodec.java @@ -13,7 +13,6 @@ import org.apache.lucene.codecs.FieldInfosFormat; import org.apache.lucene.codecs.FieldsConsumer; import org.apache.lucene.codecs.FieldsProducer; -import org.apache.lucene.codecs.FilterCodec; import org.apache.lucene.codecs.NormsProducer; import org.apache.lucene.codecs.PostingsFormat; import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat; @@ -26,6 +25,8 @@ import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; +import org.elasticsearch.index.codec.DeduplicateFieldInfosCodec; +import org.elasticsearch.index.codec.DeduplicatingFieldInfosFormat; import org.elasticsearch.index.mapper.SyntheticIdField; import java.io.IOException; @@ -53,20 +54,17 @@ * synthetic _id field. *

*/ -public class TSDBSyntheticIdCodec extends FilterCodec { - - private final RewriteFieldInfosFormat fieldInfosFormat; +public final class TSDBSyntheticIdCodec extends DeduplicateFieldInfosCodec { private final EnsureNoPostingsFormat postingsFormat; - public TSDBSyntheticIdCodec(String name, Codec delegate) { - super(name, delegate); - this.fieldInfosFormat = new RewriteFieldInfosFormat(delegate.fieldInfosFormat()); + public TSDBSyntheticIdCodec(Codec delegate) { + super(delegate); this.postingsFormat = new EnsureNoPostingsFormat(delegate.postingsFormat()); } @Override - public final FieldInfosFormat fieldInfosFormat() { - return fieldInfosFormat; + protected DeduplicatingFieldInfosFormat createFieldInfosFormat(FieldInfosFormat delegate) { + return new RewriteFieldInfosFormat(delegate); } @Override @@ -77,12 +75,10 @@ public PostingsFormat postingsFormat() { /** * {@link FieldInfosFormat} that overwrites the {@link FieldInfos}. */ - private static class RewriteFieldInfosFormat extends FieldInfosFormat { - - private final FieldInfosFormat delegate; + public static final class RewriteFieldInfosFormat extends DeduplicatingFieldInfosFormat { - private RewriteFieldInfosFormat(FieldInfosFormat delegate) { - this.delegate = delegate; + RewriteFieldInfosFormat(FieldInfosFormat delegate) { + super(delegate); } private void ensureSyntheticIdFields(FieldInfos fieldInfos) { @@ -126,7 +122,6 @@ private void ensureSyntheticIdFields(FieldInfos fieldInfos) { @Override public void write(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, FieldInfos fieldInfos, IOContext context) throws IOException { - // Change the _id field index options from IndexOptions.DOCS to IndexOptions.NONE final var infos = new FieldInfo[fieldInfos.size()]; int i = 0; @@ -170,56 +165,53 @@ public void write(Directory directory, SegmentInfo segmentInfo, String segmentSu fieldInfos = new FieldInfos(infos); ensureSyntheticIdFields(fieldInfos); - delegate.write(directory, segmentInfo, segmentSuffix, fieldInfos, context); + super.write(directory, segmentInfo, segmentSuffix, fieldInfos, context); } @Override - public FieldInfos read(Directory directory, SegmentInfo segmentInfo, String segmentSuffix, IOContext iocontext) throws IOException { - final var fieldInfos = delegate.read(directory, segmentInfo, segmentSuffix, iocontext); + protected void validateFieldInfos(FieldInfos fieldInfos) { ensureSyntheticIdFields(fieldInfos); + } + @Override + protected FieldInfo wrapFieldInfo(FieldInfo fi) { // Change the _id field index options from IndexOptions.NONE to IndexOptions.DOCS, so that terms and postings work when // applying doc values updates in Lucene. - final var infos = new FieldInfo[fieldInfos.size()]; - int i = 0; - for (FieldInfo fi : fieldInfos) { - if (SYNTHETIC_ID.equals(fi.getName())) { - final var attributes = new HashMap<>(fi.attributes()); + if (SYNTHETIC_ID.equals(fi.getName())) { + final var attributes = new HashMap<>(fi.attributes()); - // Assert that PerFieldPostingsFormat are not written to field infos on disk - assert attributes.containsKey(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY) == false; - assert attributes.containsKey(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY) == false; + // Assert that PerFieldPostingsFormat are not written to field infos on disk + assert attributes.containsKey(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY) == false; + assert attributes.containsKey(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY) == false; - // Inject attributes so that PerFieldPostingsFormat maps the synthetic _id field to the TSDBSyntheticIdPostingsFormat - // This would normally be handled transparently by PerFieldPostingsFormat, but such attributes are only added if terms - // are produced during indexing, which is not the case for the synthetic _id field. - attributes.put(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY, TSDBSyntheticIdPostingsFormat.FORMAT_NAME); - attributes.put(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY, TSDBSyntheticIdPostingsFormat.SUFFIX); + // Inject attributes so that PerFieldPostingsFormat maps the synthetic _id field to the TSDBSyntheticIdPostingsFormat + // This would normally be handled transparently by PerFieldPostingsFormat, but such attributes are only added if terms + // are produced during indexing, which is not the case for the synthetic _id field. + attributes.put(PerFieldPostingsFormat.PER_FIELD_FORMAT_KEY, TSDBSyntheticIdPostingsFormat.FORMAT_NAME); + attributes.put(PerFieldPostingsFormat.PER_FIELD_SUFFIX_KEY, TSDBSyntheticIdPostingsFormat.SUFFIX); - fi = new FieldInfo( - fi.getName(), - fi.getFieldNumber(), - fi.hasTermVectors(), - true, - fi.hasPayloads(), - IndexOptions.DOCS, - fi.getDocValuesType(), - fi.docValuesSkipIndexType(), - fi.getDocValuesGen(), - attributes, - fi.getPointDimensionCount(), - fi.getPointIndexDimensionCount(), - fi.getPointNumBytes(), - fi.getVectorDimension(), - fi.getVectorEncoding(), - fi.getVectorSimilarityFunction(), - fi.isSoftDeletesField(), - fi.isParentField() - ); - } - infos[i++] = fi; + fi = new FieldInfo( + fi.getName(), + fi.getFieldNumber(), + fi.hasTermVectors(), + true, + fi.hasPayloads(), + IndexOptions.DOCS, + fi.getDocValuesType(), + fi.docValuesSkipIndexType(), + fi.getDocValuesGen(), + attributes, + fi.getPointDimensionCount(), + fi.getPointIndexDimensionCount(), + fi.getPointNumBytes(), + fi.getVectorDimension(), + fi.getVectorEncoding(), + fi.getVectorSimilarityFunction(), + fi.isSoftDeletesField(), + fi.isParentField() + ); } - return new FieldInfos(infos); + return super.wrapFieldInfo(fi); } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java index 331d84d9e4f61..95ae90732aa1a 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/CodecTests.java @@ -49,13 +49,13 @@ public class CodecTests extends ESTestCase { public void testResolveDefaultCodecs() throws Exception { assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG); CodecService codecService = createCodecService(); - assertThat(codecService.codec("default"), instanceOf(PerFieldMapperCodec.class)); - assertThat(codecService.codec("default"), instanceOf(Elasticsearch92Lucene103Codec.class)); + assertThat(unwrappedCodec(codecService, "default"), instanceOf(PerFieldMapperCodec.class)); + assertThat(unwrappedCodec(codecService, "default"), instanceOf(Elasticsearch92Lucene103Codec.class)); } public void testDefault() throws Exception { assumeTrue("Only when zstd_stored_fields feature flag is enabled", CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG); - Codec codec = createCodecService().codec("default"); + Codec codec = unwrappedCodec(createCodecService(), "default"); assertEquals( "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=1), chunkSize=14336, maxDocsPerChunk=128, blockShift=10)", codec.storedFieldsFormat().toString() @@ -63,7 +63,7 @@ public void testDefault() throws Exception { } public void testBestCompression() throws Exception { - Codec codec = createCodecService().codec("best_compression"); + Codec codec = unwrappedCodec(createCodecService(), "best_compression"); assertEquals( "Zstd814StoredFieldsFormat(compressionMode=ZSTD(level=3), chunkSize=245760, maxDocsPerChunk=2048, blockShift=10)", codec.storedFieldsFormat().toString() @@ -71,7 +71,7 @@ public void testBestCompression() throws Exception { } public void testLegacyDefault() throws Exception { - Codec codec = createCodecService().codec("legacy_default"); + Codec codec = unwrappedCodec(createCodecService(), "legacy_default"); assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class)); // Make sure the legacy codec is writable try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) { @@ -84,7 +84,7 @@ public void testLegacyDefault() throws Exception { } public void testLegacyBestCompression() throws Exception { - Codec codec = createCodecService().codec("legacy_best_compression"); + Codec codec = unwrappedCodec(createCodecService(), "legacy_best_compression"); assertThat(codec.storedFieldsFormat(), Matchers.instanceOf(Lucene90StoredFieldsFormat.class)); // Make sure the legacy codec is writable try (Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setCodec(codec))) { @@ -98,7 +98,10 @@ public void testLegacyBestCompression() throws Exception { public void testCodecRetrievalForUnknownCodec() throws Exception { CodecService codecService = createCodecService(); - IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> codecService.codec("unknown_codec")); + IllegalArgumentException exception = assertThrows( + IllegalArgumentException.class, + () -> unwrappedCodec(codecService, "unknown_codec") + ); assertEquals("failed to find codec [unknown_codec]", exception.getMessage()); } @@ -148,4 +151,11 @@ private CodecService createCodecService() throws IOException { return new CodecService(service, BigArrays.NON_RECYCLING_INSTANCE); } + private static Codec unwrappedCodec(CodecService codecService, String codecName) { + Codec codec = codecService.codec(codecName); + if (codec instanceof DeduplicateFieldInfosCodec deduplicatingCodec) { + return deduplicatingCodec.delegate(); + } + return codec; + } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java index 079521b26b666..b90d457b268f9 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/CompletionFieldMapperTests.java @@ -40,6 +40,7 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.DeduplicateFieldInfosCodec; import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.xcontent.ToXContent; @@ -153,13 +154,13 @@ public void testPostingsFormat() throws IOException { MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); Codec codec = codecService.codec("default"); + if (codec instanceof DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); assertThat(((PerFieldMapperCodec) codec).getPostingsFormatForField("field"), instanceOf(latestLuceneCPClass)); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); assertThat(((LegacyPerFieldMapperCodec) codec).getPostingsFormatForField("field"), instanceOf(latestLuceneCPClass)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index aaf01aef48217..58a07f0043ed4 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.DeduplicateFieldInfosCodec; import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.index.codec.vectors.BFloat16; @@ -1918,15 +1919,12 @@ public void testKnnVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -1963,15 +1961,12 @@ public void testKnnQuantizedFlatVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -2014,15 +2009,12 @@ public void testKnnQuantizedHNSWVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -2060,15 +2052,12 @@ public void testKnnBBQHNSWVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -2103,15 +2092,12 @@ public void testKnnBBQIVFVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -2157,15 +2143,12 @@ public void testKnnHalfByteQuantizedHNSWVectorsFormat() throws IOException { b.endObject(); })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); - Codec codec = codecService.codec("default"); + Codec codec = getUnwrappedCodec(codecService); KnnVectorsFormat knnVectorsFormat; if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); knnVectorsFormat = ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); knnVectorsFormat = ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } @@ -2203,6 +2186,14 @@ public void testInvalidVectorDimensions() { } } + private static Codec getUnwrappedCodec(CodecService codecService) { + Codec codec = codecService.codec("default"); + if (codec instanceof DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } + return codec; + } + @Override protected IngestScriptSupport ingestScriptSupport() { throw new AssumptionViolatedException("not supported"); diff --git a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java index 3f235ac37458f..91b0f7fda64e2 100644 --- a/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java +++ b/x-pack/plugin/gpu/src/test/java/org/elasticsearch/xpack/gpu/codec/GPUDenseVectorFieldMapperTests.java @@ -11,6 +11,7 @@ import org.apache.lucene.codecs.KnnVectorsFormat; import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.index.codec.CodecService; +import org.elasticsearch.index.codec.DeduplicateFieldInfosCodec; import org.elasticsearch.index.codec.LegacyPerFieldMapperCodec; import org.elasticsearch.index.codec.PerFieldMapperCodec; import org.elasticsearch.index.mapper.MapperService; @@ -70,13 +71,13 @@ private KnnVectorsFormat getKnnVectorsFormat(String indexOptionsType) throws IOE })); CodecService codecService = new CodecService(mapperService, BigArrays.NON_RECYCLING_INSTANCE); Codec codec = codecService.codec("default"); + if (codec instanceof DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { + codec = deduplicateFieldInfosCodec.delegate(); + } if (CodecService.ZSTD_STORED_FIELDS_FEATURE_FLAG) { assertThat(codec, instanceOf(PerFieldMapperCodec.class)); return ((PerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); } else { - if (codec instanceof CodecService.DeduplicateFieldInfosCodec deduplicateFieldInfosCodec) { - codec = deduplicateFieldInfosCodec.delegate(); - } assertThat(codec, instanceOf(LegacyPerFieldMapperCodec.class)); return ((LegacyPerFieldMapperCodec) codec).getKnnVectorsFormatForField("field"); }