Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/129126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -140,14 +140,10 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper(
leafName(),
Defaults.FIELD_TYPE,
tft,
builderParams(this, context),
context.isSourceSynthetic(),
this
);
boolean storeSource = context.isSourceSynthetic()
&& currentFieldIsAMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,8 +47,10 @@
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);

{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,28 @@ public abstract class FieldMapper extends Mapper {
* @param sourceKeepMode mode for storing the field source in synthetic source mode
* @param hasScript whether a script is defined for the field
* @param onScriptError the behaviour for when the defined script fails at runtime
* @param currentFieldIsAMultiField whether current field is part of a multi-field definition
*/
protected record BuilderParams(
MultiFields multiFields,
CopyTo copyTo,
Optional<SourceKeepMode> sourceKeepMode,
boolean hasScript,
OnScriptError onScriptError
OnScriptError onScriptError,
boolean currentFieldIsAMultiField
) {
public static BuilderParams empty() {
return empty;
}

private static final BuilderParams empty = new BuilderParams(MultiFields.empty(), CopyTo.empty(), Optional.empty(), false, null);
private static final BuilderParams empty = new BuilderParams(
MultiFields.empty(),
CopyTo.empty(),
Optional.empty(),
false,
null,
false
);
}

protected final MappedFieldType mappedFieldType;
Expand Down Expand Up @@ -602,6 +611,7 @@ public static class Builder {
private boolean hasSyntheticSourceCompatibleKeywordField;

public Builder add(FieldMapper.Builder builder) {
builder.currentFieldIsAMultiField = true;
mapperBuilders.put(builder.leafName(), builder::build);

if (builder instanceof KeywordFieldMapper.Builder kwd) {
Expand Down Expand Up @@ -1384,6 +1394,7 @@ public abstract static class Builder extends Mapper.Builder implements ToXConten
protected Optional<SourceKeepMode> sourceKeepMode = Optional.empty();
protected boolean hasScript = false;
protected OnScriptError onScriptError = null;
protected boolean currentFieldIsAMultiField = false;

/**
* Creates a new Builder with a field name
Expand All @@ -1396,6 +1407,7 @@ protected Builder(String name) {
* Initialises all parameters from an existing mapper
*/
public Builder init(FieldMapper initializer) {
this.currentFieldIsAMultiField = initializer.builderParams.currentFieldIsAMultiField;
for (Parameter<?> param : getParameters()) {
param.init(initializer);
}
Expand All @@ -1410,8 +1422,15 @@ public Builder addMultiField(FieldMapper.Builder builder) {
return this;
}

protected BuilderParams builderParams(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) {
return new BuilderParams(multiFieldsBuilder.build(mainFieldBuilder, context), copyTo, sourceKeepMode, hasScript, onScriptError);
protected BuilderParams builderParams(FieldMapper.Builder mainFieldBuilder, MapperBuilderContext context) {
return new BuilderParams(
multiFieldsBuilder.build(mainFieldBuilder, context),
copyTo,
sourceKeepMode,
hasScript,
onScriptError,
mainFieldBuilder.currentFieldIsAMultiField
);
}

protected void merge(FieldMapper in, Conflicts conflicts, MapperMergeContext mapperMergeContext) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -300,9 +300,12 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.store = Parameter.storeParam(
m -> ((TextFieldMapper) m).store,
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
() -> isSyntheticSourceEnabled
&& currentFieldIsAMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
}
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down
Loading