Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/134582.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 134582
summary: Fixed match only text block loader not working when a keyword multi field
is present
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -133,27 +133,27 @@ protected Parameter<?>[] getParameters() {
return new Parameter<?>[] { meta };
}

private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context, MultiFields multiFields) {
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
TextSearchInfo tsi = new TextSearchInfo(Defaults.FIELD_TYPE, null, searchAnalyzer, searchQuoteAnalyzer);
MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(
return new MatchOnlyTextFieldType(
context.buildFullName(leafName()),
tsi,
indexAnalyzer,
context.isSourceSynthetic(),
meta.getValue(),
withinMultiField,
multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField(),
storedFieldInBinaryFormat
storedFieldInBinaryFormat,
TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(getFieldType(), multiFields)
);
return ft;
}

@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
BuilderParams builderParams = builderParams(this, context);
MatchOnlyTextFieldType tft = buildFieldType(context, builderParams.multiFields());
final boolean storeSource;
if (multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
storeSource = context.isSourceSynthetic()
Expand All @@ -164,6 +164,16 @@ public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
}
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}

/**
* This is a helper function that's useful in TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate()
*/
private FieldType getFieldType() {
FieldType fieldType = new FieldType();
// by definition, match_only_text fields are not stored
fieldType.setStored(false);
return fieldType;
}
}

private static boolean isSyntheticSourceStoredFieldInBinaryFormat(IndexVersion indexCreatedVersion) {
Expand Down Expand Up @@ -191,7 +201,6 @@ public static class MatchOnlyTextFieldType extends StringFieldType {
private final String originalName;

private final boolean withinMultiField;
private final boolean hasCompatibleMultiFields;
private final boolean storedFieldInBinaryFormat;

public MatchOnlyTextFieldType(
Expand All @@ -201,15 +210,14 @@ public MatchOnlyTextFieldType(
boolean isSyntheticSource,
Map<String, String> meta,
boolean withinMultiField,
boolean hasCompatibleMultiFields,
boolean storedFieldInBinaryFormat
boolean storedFieldInBinaryFormat,
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate
) {
super(name, true, false, false, tsi, meta);
this.indexAnalyzer = Objects.requireNonNull(indexAnalyzer);
this.textFieldType = new TextFieldType(name, isSyntheticSource);
this.textFieldType = new TextFieldType(name, isSyntheticSource, syntheticSourceDelegate);
this.originalName = isSyntheticSource ? name + "._original" : null;
this.withinMultiField = withinMultiField;
this.hasCompatibleMultiFields = hasCompatibleMultiFields;
this.storedFieldInBinaryFormat = storedFieldInBinaryFormat;
}

Expand All @@ -222,7 +230,7 @@ public MatchOnlyTextFieldType(String name) {
Collections.emptyMap(),
false,
false,
false
null
);
}

Expand Down Expand Up @@ -270,26 +278,23 @@ private IOFunction<LeafReaderContext, CheckedIntFunction<List<Object>, IOExcepti
} else {
assert false : "parent field should either be stored or have doc values";
}
} else if (searchExecutionContext.isSourceSynthetic() && hasCompatibleMultiFields) {
var mapper = (MatchOnlyTextFieldMapper) searchExecutionContext.getMappingLookup().getMapper(name());
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(mapper);
} else if (searchExecutionContext.isSourceSynthetic() && textFieldType.syntheticSourceDelegate() != null) {
var kwd = textFieldType.syntheticSourceDelegate();

if (kwd != null) {
var fieldType = kwd.fieldType();

if (fieldType.ignoreAbove().isSet()) {
if (fieldType.isStored()) {
return storedFieldFetcher(fieldType.name(), fieldType.originalName());
} else if (fieldType.hasDocValues()) {
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(fieldType.originalName()));
if (kwd.ignoreAbove().isSet()) {
if (kwd.isStored()) {
return storedFieldFetcher(kwd.name(), kwd.originalName());
} else if (kwd.hasDocValues()) {
var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH);
return combineFieldFetchers(docValuesFieldFetcher(ifd), storedFieldFetcher(kwd.originalName()));
}
}

if (fieldType.isStored()) {
return storedFieldFetcher(fieldType.name());
} else if (fieldType.hasDocValues()) {
var ifd = searchExecutionContext.getForField(fieldType, MappedFieldType.FielddataOperation.SEARCH);
if (kwd.isStored()) {
return storedFieldFetcher(kwd.name());
} else if (kwd.hasDocValues()) {
var ifd = searchExecutionContext.getForField(kwd, MappedFieldType.FielddataOperation.SEARCH);
return docValuesFieldFetcher(ifd);
} else {
assert false : "multi field should either be stored or have doc values";
Expand Down Expand Up @@ -512,7 +517,7 @@ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions,
return toQuery(query, queryShardContext);
}

private static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
static class BytesFromMixedStringsBytesRefBlockLoader extends BlockStoredFieldsReader.StoredFieldsBlockLoader {
BytesFromMixedStringsBytesRefBlockLoader(String field) {
super(field);
}
Expand Down Expand Up @@ -543,12 +548,27 @@ protected BytesRef toBytesRef(Object v) {
@Override
public BlockLoader blockLoader(BlockLoaderContext blContext) {
if (textFieldType.isSyntheticSource()) {
if (storedFieldInBinaryFormat) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
} else {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
// if there is no synthetic source delegate, then this match only text field would've created StoredFields for us to use
if (textFieldType.syntheticSourceDelegate() == null) {
if (storedFieldInBinaryFormat) {
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(storedFieldNameForSyntheticSource());
} else {
return new BytesFromMixedStringsBytesRefBlockLoader(storedFieldNameForSyntheticSource());
}
}

// otherwise, delegate block loading to the synthetic source delegate if possible
if (textFieldType.canUseSyntheticSourceDelegateForLoading()) {
return new BlockLoader.Delegating(textFieldType.syntheticSourceDelegate().blockLoader(blContext)) {
@Override
protected String delegatingTo() {
return textFieldType.syntheticSourceDelegate().name();
}
};
}
}

// fallback to _source (synthetic or not)
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
// MatchOnlyText never has norms, so we have to use the field names field
BlockSourceReader.LeafIteratorLookup lookup = BlockSourceReader.lookupFromFieldNames(blContext.fieldNames(), name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.elasticsearch.index.mapper.extras;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
Expand All @@ -27,20 +28,38 @@
import org.apache.lucene.tests.analysis.Token;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.AutomatonQueries;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexMode;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.BlockLoader;
import org.elasticsearch.index.mapper.FieldNamesFieldMapper;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MappingParserContext;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextSearchInfo;
import org.elasticsearch.index.mapper.extras.MatchOnlyTextFieldMapper.MatchOnlyTextFieldType;
import org.elasticsearch.script.ScriptCompiler;
import org.hamcrest.Matchers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;

public class MatchOnlyTextFieldTypeTests extends FieldTypeTestCase {

public void testTermQuery() {
Expand Down Expand Up @@ -205,4 +224,149 @@ public void testRangeIntervals() {
((SourceIntervalsSource) rangeIntervals).getIntervalsSource()
);
}

public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_source_delegate_is_absent() {
// given
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
"parent",
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
mock(NamedAnalyzer.class),
true,
Collections.emptyMap(),
false,
false,
null
);

// when
BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));

// then
// verify that we delegate block loading to the synthetic source delegate
assertTrue(blockLoader instanceof MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader);
}

public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() {
// given
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
"child",
true,
true,
Collections.emptyMap()
);

MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
"parent",
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
mock(NamedAnalyzer.class),
true,
Collections.emptyMap(),
false,
false,
syntheticSourceDelegate
);

// when
BlockLoader blockLoader = ft.blockLoader(mock(MappedFieldType.BlockLoaderContext.class));

// then
// verify that we delegate block loading to the synthetic source delegate
assertTrue(blockLoader instanceof BlockLoader.Delegating);
}

public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() {
// given
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.build();
IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
MappingParserContext mappingParserContext = mock(MappingParserContext.class);
doReturn(settings).when(mappingParserContext).getSettings();
doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();

KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);
builder.ignoreAbove(123);

KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
"child",
mock(FieldType.class),
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
builder,
true
);

MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
"parent",
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
mock(NamedAnalyzer.class),
true,
Collections.emptyMap(),
false,
false,
syntheticSourceDelegate
);

// when
MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
BlockLoader blockLoader = ft.blockLoader(blContext);

// then
// verify that we don't delegate anything
assertFalse(blockLoader instanceof BlockLoader.Delegating);
}

public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() {
// given
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
.put(IndexSettings.MODE.getKey(), IndexMode.STANDARD)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.put(IndexSettings.IGNORE_ABOVE_SETTING.getKey(), 123)
.build();
IndexSettings indexSettings = new IndexSettings(IndexMetadata.builder("index").settings(settings).build(), settings);
MappingParserContext mappingParserContext = mock(MappingParserContext.class);
doReturn(settings).when(mappingParserContext).getSettings();
doReturn(indexSettings).when(mappingParserContext).getIndexSettings();
doReturn(mock(ScriptCompiler.class)).when(mappingParserContext).scriptCompiler();

KeywordFieldMapper.Builder builder = new KeywordFieldMapper.Builder("child", mappingParserContext);

KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
"child",
mock(FieldType.class),
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
mock(NamedAnalyzer.class),
builder,
true
);

MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
"parent",
new TextSearchInfo(TextFieldMapper.Defaults.FIELD_TYPE, null, Lucene.STANDARD_ANALYZER, Lucene.STANDARD_ANALYZER),
mock(NamedAnalyzer.class),
true,
Collections.emptyMap(),
false,
false,
syntheticSourceDelegate
);

// when
MappedFieldType.BlockLoaderContext blContext = mock(MappedFieldType.BlockLoaderContext.class);
doReturn(FieldNamesFieldMapper.FieldNamesFieldType.get(false)).when(blContext).fieldNames();
BlockLoader blockLoader = ft.blockLoader(blContext);

// then
// verify that we don't delegate anything
assertFalse(blockLoader instanceof BlockLoader.Delegating);
}
}
2 changes: 2 additions & 0 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,8 @@ tests:
- class: org.elasticsearch.xpack.esql.ccq.MultiClusterSpecIT
method: test {csv-spec:fork.FiveFork}
issue: https://github.com/elastic/elasticsearch/issues/134560
- class: org.elasticsearch.upgrades.MatchOnlyTextRollingUpgradeIT
issue: https://github.com/elastic/elasticsearch/issues/134097
Copy link
Contributor Author

@Kubik42 Kubik42 Sep 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Preemptively muting this test as it'll fail until the backport is completed. I've ran this test locally and it passes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Normally we don't manually mute tests.

I think the way this is normally addressed, you'd add a cluster feature representing this change (or use the built-in cluster features representing release versions), then add an assumeTrue gating the test on the presence of that feature.

assumeTrue("requires storing leaf array offsets", oldClusterHasFeature("gte_v9.1.0"));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even if this is temporary? My idea was to mute this test now, backport the fix, and then unmute the test. Without that, the test will fail against previous versions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The convention is to use a node feature for this. Tests are muted for genuine test failures.

So let's add a node feature for this (in MapperFeatures#getTestFeatures(...)). The upside it is that the test can be back ported without having think about older versions not having this change (e.g. when changes to MatchOnlyTextRollingUpgradeIT in this PR land in 8.19 branch).


# Examples:
#
Expand Down
Loading