diff --git a/docs/changelog/134582.yaml b/docs/changelog/134582.yaml new file mode 100644 index 0000000000000..269b267a4bedb --- /dev/null +++ b/docs/changelog/134582.yaml @@ -0,0 +1,6 @@ +pr: 134582 +summary: Fixed match only text block loader not working when a keyword multi field + is present +area: Mapping +type: bug +issues: [] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 645c0cffe8065..bc7fec81c69d6 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -133,27 +133,28 @@ protected Parameter[] getParameters() { return new Parameter[] { meta }; } - private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) { + private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context, MultiFields multiFields) { NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer); - MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( + return new MatchOnlyTextFieldType( context.buildFullName(leafName()), tsi, indexAnalyzer, context.isSourceSynthetic(), meta.getValue(), withinMultiField, - multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(), - storedFieldInBinaryFormat + storedFieldInBinaryFormat, + // match only text fields are not stored by definition + TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(false, multiFields) ); - return ft; } @Override public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { - MatchOnlyTextFieldType tft = buildFieldType(context); + BuilderParams builderParams = builderParams(this, context); + MatchOnlyTextFieldType tft = buildFieldType(context, builderParams.multiFields()); final boolean storeSource; if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) { storeSource = context.isSourceSynthetic() @@ -164,6 +165,7 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) { } return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this); } + } private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) { @@ -191,7 +193,6 @@ public static class MatchOnlyTextFieldType extends StringFieldType { private final String originalName; private final boolean withinMultiField; - private final boolean hasCompatibleMultiFields; private final boolean storedFieldInBinaryFormat; public MatchOnlyTextFieldType( @@ -201,15 +202,14 @@ public MatchOnlyTextFieldType( boolean isSyntheticSource, Map meta, boolean withinMultiField, - boolean hasCompatibleMultiFields, - boolean storedFieldInBinaryFormat + boolean storedFieldInBinaryFormat, + KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate ) { super(name, true, false, false, tsi, meta); this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer); - this.textFieldType = new TextFieldType(name, isSyntheticSource); + this.textFieldType = new TextFieldType(name, isSyntheticSource, syntheticSourceDelegate); this.originalName = isSyntheticSource ? name + "._original" : null; this.withinMultiField = withinMultiField; - this.hasCompatibleMultiFields = hasCompatibleMultiFields; this.storedFieldInBinaryFormat = storedFieldInBinaryFormat; } @@ -222,7 +222,7 @@ public MatchOnlyTextFieldType(String name) { Collections.emptyMap(), false, false, - false + null ); } @@ -270,26 +270,23 @@ private IOFunction, IOExcepti } else { assert false : "parent field should either be stored or have doc values"; } - } else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) { - var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name()); - var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper); + } else if (searchExecutionContext.isSourceSynthetic() && textFieldType.syntheticSourceDelegate() != null) { + var kwd = textFieldType.syntheticSourceDelegate(); if (kwd != null) { - var fieldType = kwd.fieldType(); - - if (fieldType.ignoreAbove().isSet()) { - if (fieldType.isStored()) { - return storedFieldFetcher(fieldType.name(), fieldType.originalName()); - } else if (fieldType.hasDocValues()) { - var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH); - return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName())); + if (kwd.ignoreAbove().isSet()) { + if (kwd.isStored()) { + return storedFieldFetcher(kwd.name(), kwd.originalName()); + } else if (kwd.hasDocValues()) { + var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH); + return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(kwd.originalName())); } } - if (fieldType.isStored()) { - return storedFieldFetcher(fieldType.name()); - } else if (fieldType.hasDocValues()) { - var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH); + if (kwd.isStored()) { + return storedFieldFetcher(kwd.name()); + } else if (kwd.hasDocValues()) { + var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH); return docValuesFieldFetcher(ifd); } else { assert false : "multi field should either be stored or have doc values"; @@ -512,7 +509,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, return toQuery(query, queryShardContext); } - private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader { + static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader { BytesFromMixedStringsBytesRefBlockLoader(String field) { super(field); } @@ -543,12 +540,27 @@ protected BytesRef toBytesRef(Object v) { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (textFieldType.isSyntheticSource()) { - if (storedFieldInBinaryFormat) { - return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource()); - } else { - return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + // if there is no synthetic source delegate, then this match only text field would've created StoredFields for us to use + if (textFieldType.syntheticSourceDelegate() == null) { + if (storedFieldInBinaryFormat) { + return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource()); + } else { + return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource()); + } + } + + // otherwise, delegate block loading to the synthetic source delegate if possible + if (textFieldType.canUseSyntheticSourceDelegateForLoading()) { + return new BlockLoader.Delegating(textFieldType.syntheticSourceDelegate().blockLoader(blContext)) { + @Override + protected String delegatingTo() { + return textFieldType.syntheticSourceDelegate().name(); + } + }; } } + + // fallback to _source (synthetic or not) SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name())); // MatchOnlyText never has norms, so we have to use the field names field BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name()); diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java index 6970dd6739ecf..342b093f7f2e9 100644 --- a/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldTypeTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.mapper.extras; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; @@ -27,20 +28,38 @@ import org.apache.lucene.tests.analysis.Token; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.common.lucene.BytesRefs; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.AutomatonQueries; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.Fuzziness; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.elasticsearch.index.mapper.BlockLoader; +import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.FieldTypeTestCase; +import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.index.mapper.MappingParserContext; +import org.elasticsearch.index.mapper.TextFieldMapper; +import org.elasticsearch.index.mapper.TextSearchInfo; import org.elasticsearch.index.mapper.extras.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType; +import org.elasticsearch.script.ScriptCompiler; import org.hamcrest.Matchers; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.List; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; + public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase { public void testTermQuery() { @@ -205,4 +224,149 @@ public void testRangeIntervals() { ((SourceIntervalsSource) rangeIntervals).getIntervalsSource() ); } + + public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_source_delegate_is_absent() { + // given + MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "parent", + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + mock(NamedAnalyzer.class), + true, + Collections.emptyMap(), + false, + false, + null + ); + + // when + BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); + + // then + // verify that we delegate block loading to the synthetic source delegate + assertThat(blockLoader, Matchers.instanceOf(MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader.class)); + } + + public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() { + // given + KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType( + "child", + true, + true, + Collections.emptyMap() + ); + + MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "parent", + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + mock(NamedAnalyzer.class), + true, + Collections.emptyMap(), + false, + false, + syntheticSourceDelegate + ); + + // when + BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); + + // then + // verify that we delegate block loading to the synthetic source delegate + assertThat(blockLoader, Matchers.instanceOf(BlockLoader.Delegating.class)); + } + + public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() { + // given + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(IndexSettings.MODE.getKey(), IndexMode.STANDARD) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .build(); + IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings); + MappingParserContext mappingParserContext = mock(MappingParserContext.class); + doReturn(settings).when(mappingParserContext).getSettings(); + doReturn(indexSettings).when(mappingParserContext).getIndexSettings(); + doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler(); + + KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext); + builder.ignoreAbove(123); + + KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType( + "child", + mock(FieldType.class), + mock(NamedAnalyzer.class), + mock(NamedAnalyzer.class), + mock(NamedAnalyzer.class), + builder, + true + ); + + MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "parent", + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + mock(NamedAnalyzer.class), + true, + Collections.emptyMap(), + false, + false, + syntheticSourceDelegate + ); + + // when + MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class); + doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames(); + BlockLoader blockLoader = ft.blockLoader(blContext); + + // then + // verify that we don't delegate anything + assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class))); + } + + public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() { + // given + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current()) + .put(IndexSettings.MODE.getKey(), IndexMode.STANDARD) + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1) + .put(IndexSettings.IGNORE_ABOVE_SETTING.getKey(), 123) + .build(); + IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings); + MappingParserContext mappingParserContext = mock(MappingParserContext.class); + doReturn(settings).when(mappingParserContext).getSettings(); + doReturn(indexSettings).when(mappingParserContext).getIndexSettings(); + doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler(); + + KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext); + + KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType( + "child", + mock(FieldType.class), + mock(NamedAnalyzer.class), + mock(NamedAnalyzer.class), + mock(NamedAnalyzer.class), + builder, + true + ); + + MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "parent", + new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + mock(NamedAnalyzer.class), + true, + Collections.emptyMap(), + false, + false, + syntheticSourceDelegate + ); + + // when + MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class); + doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames(); + BlockLoader blockLoader = ft.blockLoader(blContext); + + // then + // verify that we don't delegate anything + assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class))); + } } diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 8ee639ffc8431..ef2aa9af4ca24 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -135,7 +135,7 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder store.getValue(), tsi, context.isSourceSynthetic(), - TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields), + TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields), meta.getValue() ); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 2912bf4e86921..ca344c134783b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -38,6 +38,8 @@ public class MapperFeatures implements FeatureSpecification { public static final NodeFeature SORT_FIELDS_CHECK_FOR_NESTED_OBJECT_FIX = new NodeFeature("mapper.nested.sorting_fields_check_fix"); public static final NodeFeature DYNAMIC_HANDLING_IN_COPY_TO = new NodeFeature("mapper.copy_to.dynamic_handling"); public static final NodeFeature DOC_VALUES_SKIPPER = new NodeFeature("mapper.doc_values_skipper"); + public static final NodeFeature MATCH_ONLY_TEXT_BLOCK_LOADER_FIX = new NodeFeature("mapper.match_only_text_block_loader_fix"); + static final NodeFeature UKNOWN_FIELD_MAPPING_UPDATE_ERROR_MESSAGE = new NodeFeature( "mapper.unknown_field_mapping_update_error_message" ); @@ -80,7 +82,8 @@ public Set getTestFeatures() { SEARCH_LOAD_PER_SHARD, SPARSE_VECTOR_INDEX_OPTIONS_FEATURE, PATTERNED_TEXT, - MULTI_FIELD_UNICODE_OPTIMISATION_FIX + MULTI_FIELD_UNICODE_OPTIMISATION_FIX, + MATCH_ONLY_TEXT_BLOCK_LOADER_FIX ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 96bb20a1bde4f..70bf14c553be7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -410,7 +410,7 @@ private TextFieldType buildFieldType( store.getValue(), tsi, context.isSourceSynthetic(), - SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields), + SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields), meta.getValue(), eagerGlobalOrdinals.getValue(), indexPhrases.getValue() @@ -744,6 +744,20 @@ public TextFieldType(String name, boolean isSyntheticSource) { ); } + public TextFieldType(String name, boolean isSyntheticSource, KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate) { + this( + name, + true, + false, + new TextSearchInfo(Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER), + isSyntheticSource, + syntheticSourceDelegate, + Collections.emptyMap(), + false, + false + ); + } + public boolean fielddata() { return fielddata; } @@ -1598,8 +1612,9 @@ protected void write(XContentBuilder b, Object value) throws IOException { } public static class SyntheticSourceHelper { - public static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(FieldType fieldType, MultiFields multiFields) { - if (fieldType.stored()) { + public static KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate(boolean isParentFieldStored, MultiFields multiFields) { + // if the parent field is stored, there is no need to delegate anything as we can get source directly from the stored field + if (isParentFieldStored) { return null; } var kwd = getKeywordFieldMapperForSyntheticSource(multiFields); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java index 50074c50d51c4..fd17c678a4e51 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldTypeTests.java @@ -323,7 +323,6 @@ public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_i ); // when - ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); // then @@ -372,7 +371,6 @@ public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore ); // when - ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); // then @@ -420,7 +418,6 @@ public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore ); // when - ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class)); // then diff --git a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java index df4ef4db1b29d..fcb882f6a63a5 100644 --- a/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java +++ b/x-pack/plugin/logsdb/qa/rolling-upgrade/src/javaRestTest/java/org/elasticsearch/upgrades/MatchOnlyTextRollingUpgradeIT.java @@ -18,6 +18,7 @@ import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.common.time.FormatNames; import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.index.mapper.MapperFeatures; import org.elasticsearch.test.rest.ObjectPath; import org.elasticsearch.xcontent.XContentType; @@ -30,6 +31,7 @@ import static org.elasticsearch.upgrades.StandardToLogsDbIndexModeRollingUpgradeIT.enableLogsdbByDefault; import static org.elasticsearch.upgrades.StandardToLogsDbIndexModeRollingUpgradeIT.getWriteBackingIndex; +import static org.elasticsearch.upgrades.TextRollingUpgradeIT.randomAlphasDelimitedBySpace; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; @@ -37,8 +39,15 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSecurityTestCase { + private static final String DATA_STREAM = "logs-bwc-test"; + + private static final int IGNORE_ABOVE_MAX = 256; + private static final int NUM_REQUESTS = 4; + private static final int NUM_DOCS_PER_REQUEST = 1024; + static String BULK_ITEM_TEMPLATE = """ + { "create": {} } {"@timestamp": "$now", "host.name": "$host", "method": "$method", "ip": "$ip", "message": "$message", "length": $length, "factor": $factor} """; @@ -53,7 +62,13 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec "type": "keyword" }, "message": { - "type": "match_only_text" + "type": "match_only_text", + "fields": { + "keyword": { + "ignore_above": $IGNORE_ABOVE, + "type": "keyword" + } + } }, "ip": { "type": "ip" @@ -68,55 +83,82 @@ public class MatchOnlyTextRollingUpgradeIT extends AbstractRollingUpgradeWithSec } }"""; + // when sorted, this message will appear at the top and hence can be used to validate query results + private static String smallestMessage; + public MatchOnlyTextRollingUpgradeIT(@Name("upgradedNodes") int upgradedNodes) { super(upgradedNodes); } public void testIndexing() throws Exception { - String dataStreamName = "logs-bwc-test"; + assumeTrue( + "Match only text block loader fix is not present in this cluster", + oldClusterHasFeature(MapperFeatures.MATCH_ONLY_TEXT_BLOCK_LOADER_FIX) + ); + if (isOldCluster()) { + // given - enable logsdb and create a template startTrial(); enableLogsdbByDefault(); - createTemplate(dataStreamName, getClass().getSimpleName().toLowerCase(Locale.ROOT), TEMPLATE); + String templateId = getClass().getSimpleName().toLowerCase(Locale.ROOT); + createTemplate(DATA_STREAM, templateId, prepareTemplate()); - Instant startTime = Instant.now().minusSeconds(60 * 60); - bulkIndex(dataStreamName, 4, 1024, startTime); + // when - index some documents + bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST); - String firstBackingIndex = getWriteBackingIndex(client(), dataStreamName, 0); + // then - verify that logsdb and synthetic source are both enabled + String firstBackingIndex = getWriteBackingIndex(client(), DATA_STREAM, 0); var settings = (Map) getIndexSettingsWithDefaults(firstBackingIndex).get(firstBackingIndex); assertThat(((Map) settings.get("settings")).get("index.mode"), equalTo("logsdb")); assertThat(((Map) settings.get("defaults")).get("index.mapping.source.mode"), equalTo("SYNTHETIC")); - ensureGreen(dataStreamName); - search(dataStreamName); - query(dataStreamName); + // when/then - run some queries and verify results + ensureGreen(DATA_STREAM); + search(DATA_STREAM); + query(DATA_STREAM); + } else if (isMixedCluster()) { - Instant startTime = Instant.now().minusSeconds(60 * 30); - bulkIndex(dataStreamName, 4, 1024, startTime); + // when + bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST); + + // when/then + ensureGreen(DATA_STREAM); + search(DATA_STREAM); + query(DATA_STREAM); - ensureGreen(dataStreamName); - search(dataStreamName); - query(dataStreamName); } else if (isUpgradedCluster()) { - ensureGreen(dataStreamName); - Instant startTime = Instant.now(); - bulkIndex(dataStreamName, 4, 1024, startTime); - search(dataStreamName); - query(dataStreamName); + // when/then + ensureGreen(DATA_STREAM); + bulkIndex(NUM_REQUESTS, NUM_DOCS_PER_REQUEST); + search(DATA_STREAM); + query(DATA_STREAM); - var forceMergeRequest = new Request("POST", "/" + dataStreamName + "/_forcemerge"); + // when/then continued - force merge all shard segments into one + var forceMergeRequest = new Request("POST", "/" + DATA_STREAM + "/_forcemerge"); forceMergeRequest.addParameter("max_num_segments", "1"); assertOK(client().performRequest(forceMergeRequest)); - ensureGreen(dataStreamName); - search(dataStreamName); - query(dataStreamName); + // then continued + ensureGreen(DATA_STREAM); + search(DATA_STREAM); + query(DATA_STREAM); + } + } + + private String prepareTemplate() { + boolean shouldSetIgnoreAbove = randomBoolean(); + if (shouldSetIgnoreAbove) { + return TEMPLATE.replace("$IGNORE_ABOVE", String.valueOf(randomInt(IGNORE_ABOVE_MAX))); } + + // removes the entire line that defines ignore_above + return TEMPLATE.replaceAll("(?m)^\\s*\"ignore_above\":\\s*\\$IGNORE_ABOVE\\s*,?\\s*\\n?", ""); } static void createTemplate(String dataStreamName, String id, String template) throws IOException { final String INDEX_TEMPLATE = """ { + "priority": 500, "index_patterns": ["$DATASTREAM"], "template": $TEMPLATE, "data_stream": { @@ -127,46 +169,59 @@ static void createTemplate(String dataStreamName, String id, String template) th assertOK(client().performRequest(putIndexTemplateRequest)); } - static String bulkIndex(String dataStreamName, int numRequest, int numDocs, Instant startTime) throws Exception { + private void bulkIndex(int numRequest, int numDocs) throws Exception { String firstIndex = null; + Instant startTime = Instant.now().minusSeconds(60 * 60); + for (int i = 0; i < numRequest; i++) { - var bulkRequest = new Request("POST", "/" + dataStreamName + "/_bulk"); - StringBuilder requestBody = new StringBuilder(); - for (int j = 0; j < numDocs; j++) { - String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier. - String methodName = "method" + j % 5; - String ip = NetworkAddress.format(randomIp(true)); - String param = "chicken" + randomInt(5); - String message = "the quick brown fox jumps over the " + param; - long length = randomLong(); - double factor = randomDouble(); - - requestBody.append("{\"create\": {}}"); - requestBody.append('\n'); - requestBody.append( - BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime)) - .replace("$host", hostName) - .replace("$method", methodName) - .replace("$ip", ip) - .replace("$message", message) - .replace("$length", Long.toString(length)) - .replace("$factor", Double.toString(factor)) - ); - requestBody.append('\n'); - - startTime = startTime.plusMillis(1); - } - bulkRequest.setJsonEntity(requestBody.toString()); + var bulkRequest = new Request("POST", "/" + DATA_STREAM + "/_bulk"); + bulkRequest.setJsonEntity(bulkIndexRequestBody(numDocs, startTime)); bulkRequest.addParameter("refresh", "true"); + var response = client().performRequest(bulkRequest); - assertOK(response); var responseBody = entityAsMap(response); + + assertOK(response); assertThat("errors in response:\n " + responseBody, responseBody.get("errors"), equalTo(false)); if (firstIndex == null) { firstIndex = (String) ((Map) ((Map) ((List) responseBody.get("items")).get(0)).get("create")).get("_index"); } } - return firstIndex; + } + + private String bulkIndexRequestBody(int numDocs, Instant startTime) { + StringBuilder requestBody = new StringBuilder(); + + for (int j = 0; j < numDocs; j++) { + String hostName = "host" + j % 50; // Not realistic, but makes asserting search / query response easier. + String methodName = "method" + j % 5; + String ip = NetworkAddress.format(randomIp(true)); + String message = randomAlphasDelimitedBySpace(10, 1, 15); + recordSmallestMessage(message); + long length = randomLong(); + double factor = randomDouble(); + + requestBody.append( + BULK_ITEM_TEMPLATE.replace("$now", formatInstant(startTime)) + .replace("$host", hostName) + .replace("$method", methodName) + .replace("$ip", ip) + .replace("$message", message) + .replace("$length", Long.toString(length)) + .replace("$factor", Double.toString(factor)) + ); + requestBody.append('\n'); + + startTime = startTime.plusMillis(1); + } + + return requestBody.toString(); + } + + private void recordSmallestMessage(final String message) { + if (smallestMessage == null || message.compareTo(smallestMessage) < 0) { + smallestMessage = message; + } } void search(String dataStreamName) throws Exception { @@ -174,24 +229,19 @@ void search(String dataStreamName) throws Exception { searchRequest.addParameter("pretty", "true"); searchRequest.setJsonEntity(""" { - "size": 500, - "query": { - "match_phrase": { - "message": "chicken" - } - } + "size": 500 } - """.replace("chicken", "chicken" + randomInt(5))); + """); var response = client().performRequest(searchRequest); assertOK(response); var responseBody = entityAsMap(response); logger.info("{}", responseBody); Integer totalCount = ObjectPath.evaluate(responseBody, "hits.total.value"); - assertThat(totalCount, greaterThanOrEqualTo(512)); + assertThat(totalCount, greaterThanOrEqualTo(NUM_REQUESTS * NUM_DOCS_PER_REQUEST)); } - void query(String dataStreamName) throws Exception { + private void query(String dataStreamName) throws Exception { var queryRequest = new Request("POST", "/_query"); queryRequest.addParameter("pretty", "true"); queryRequest.setJsonEntity(""" @@ -205,18 +255,18 @@ void query(String dataStreamName) throws Exception { logger.info("{}", responseBody); String column1 = ObjectPath.evaluate(responseBody, "columns.0.name"); - String column2 = ObjectPath.evaluate(responseBody, "columns.1.name"); - String column3 = ObjectPath.evaluate(responseBody, "columns.2.name"); assertThat(column1, equalTo("max(length)")); + String column2 = ObjectPath.evaluate(responseBody, "columns.1.name"); assertThat(column2, equalTo("max(factor)")); + String column3 = ObjectPath.evaluate(responseBody, "columns.2.name"); assertThat(column3, equalTo("message")); - String key = ObjectPath.evaluate(responseBody, "values.0.2"); - assertThat(key, equalTo("the quick brown fox jumps over the chicken0")); Long maxRx = ObjectPath.evaluate(responseBody, "values.0.0"); assertThat(maxRx, notNullValue()); Double maxTx = ObjectPath.evaluate(responseBody, "values.0.1"); assertThat(maxTx, notNullValue()); + String key = ObjectPath.evaluate(responseBody, "values.0.2"); + assertThat(key, equalTo(smallestMessage)); } protected static void startTrial() throws IOException {