Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/129126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,8 @@ public static class Builder extends FieldMapper.Builder {

private final TextParams.Analyzers analyzers;

public Builder(String name, IndexAnalyzers indexAnalyzers) {
this(name, IndexVersion.current(), indexAnalyzers);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name);
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
super(name, withinMultiField);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
Expand Down Expand Up @@ -140,18 +136,16 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper(
leafName(),
Defaults.FIELD_TYPE,
tft,
builderParams(this, context),
context.isSourceSynthetic(),
this
);
boolean storeSource = context.isSourceSynthetic()
&& withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
);

public static class MatchOnlyTextFieldType extends StringFieldType {

Expand Down Expand Up @@ -406,6 +400,7 @@ private String storedFieldNameForSyntheticSource() {
private final int positionIncrementGap;
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;

private MatchOnlyTextFieldMapper(
String simpleName,
Expand All @@ -424,6 +419,7 @@ private MatchOnlyTextFieldMapper(
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.isWithinMultiField();
}

@Override
Expand All @@ -433,7 +429,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,8 +47,10 @@
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);

{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,11 @@ public abstract class FieldMapper extends Mapper {
static final Parameter<?>[] EMPTY_PARAMETERS = new Parameter[0];

/**
* @param multiFields sub fields of this mapper
* @param copyTo copyTo fields of this mapper
* @param sourceKeepMode mode for storing the field source in synthetic source mode
* @param hasScript whether a script is defined for the field
* @param onScriptError the behaviour for when the defined script fails at runtime
* @param multiFields sub fields of this mapper
* @param copyTo copyTo fields of this mapper
* @param sourceKeepMode mode for storing the field source in synthetic source mode
* @param hasScript whether a script is defined for the field
* @param onScriptError the behaviour for when the defined script fails at runtime
*/
protected record BuilderParams(
MultiFields multiFields,
Expand Down Expand Up @@ -1384,12 +1384,18 @@ public abstract static class Builder extends Mapper.Builder implements ToXConten
protected Optional<SourceKeepMode> sourceKeepMode = Optional.empty();
protected boolean hasScript = false;
protected OnScriptError onScriptError = null;
protected final boolean withinMultiField;

/**
* Creates a new Builder with a field name
*/
protected Builder(String name) {
this(name, false);
}

protected Builder(String name, boolean withinMultiField) {
super(name);
this.withinMultiField = withinMultiField;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep this in the TextFieldMapper and MatchOnlyTextFieldMapper? That duplicates the code but exposing FieldMapper#isWithinMultiField is dangerous IMO since none of the other builders are setting this correctly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, it could open unintended misuse. Pushed: a98f040

}

/**
Expand All @@ -1410,7 +1416,7 @@ public Builder addMultiField(FieldMapper.Builder builder) {
return this;
}

protected BuilderParams builderParams(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) {
protected BuilderParams builderParams(FieldMapper.Builder mainFieldBuilder, MapperBuilderContext context) {
return new BuilderParams(multiFieldsBuilder.build(mainFieldBuilder, context), copyTo, sourceKeepMode, hasScript, onScriptError);
}

Expand All @@ -1433,6 +1439,10 @@ protected final void validate() {
}
}

public boolean isWithinMultiField() {
return withinMultiField;
}

/**
* @return the list of parameters defined for this mapper
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -288,21 +288,30 @@ public static class Builder extends FieldMapper.Builder {
final TextParams.Analyzers analyzers;

public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
super(name);
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name, withinMultiField);

// If synthetic source is used we need to either store this field
// to recreate the source or use keyword multi-fields for that.
// So if there are no suitable multi-fields we will default to
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.store = Parameter.storeParam(
m -> ((TextFieldMapper) m).store,
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
() -> isSyntheticSourceEnabled
&& this.withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand Down Expand Up @@ -482,7 +491,13 @@ public TextFieldMapper build(MapperBuilderContext context) {
}

public static final TypeParser PARSER = createTypeParserWithLegacySupport(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
)
);

private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
Expand Down Expand Up @@ -1304,6 +1319,7 @@ public Query existsQuery(SearchExecutionContext context) {
private final SubFieldInfo phraseFieldInfo;

private final boolean isSyntheticSourceEnabled;
private final boolean isWithinMultiField;

private TextFieldMapper(
String simpleName,
Expand Down Expand Up @@ -1337,6 +1353,7 @@ private TextFieldMapper(
this.freqFilter = builder.freqFilter.getValue();
this.fieldData = builder.fieldData.get();
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
this.isWithinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -1360,7 +1377,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
}
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down
Loading