Skip to content
6 changes: 6 additions & 0 deletions docs/changelog/129126.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 129126
summary: "Synthetic source: avoid storing multi fields of type text and `match_only_text`\
\ by default"
area: Mapping
type: bug
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.FieldDataContext;
Expand Down Expand Up @@ -101,12 +102,9 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

private final TextParams.Analyzers analyzers;
private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers) {
this(name, IndexVersion.current(), indexAnalyzers);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean withinMultiField) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
Expand All @@ -115,6 +113,7 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
m -> ((MatchOnlyTextFieldMapper) m).positionIncrementGap,
indexCreatedVersion
);
this.withinMultiField = withinMultiField;
}

@Override
Expand All @@ -140,18 +139,21 @@ private MatchOnlyTextFieldType buildFieldType(MapperBuilderContext context) {
@Override
public MatchOnlyTextFieldMapper build(MapperBuilderContext context) {
MatchOnlyTextFieldType tft = buildFieldType(context);
return new MatchOnlyTextFieldMapper(
leafName(),
Defaults.FIELD_TYPE,
tft,
builderParams(this, context),
context.isSourceSynthetic(),
this
);
final boolean storeSource;
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
storeSource = context.isSourceSynthetic()
&& withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
storeSource = context.isSourceSynthetic();
}
return new MatchOnlyTextFieldMapper(leafName(), Defaults.FIELD_TYPE, tft, builderParams(this, context), storeSource, this);
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
);

public static class MatchOnlyTextFieldType extends StringFieldType {

Expand Down Expand Up @@ -406,6 +408,7 @@ private String storedFieldNameForSyntheticSource() {
private final int positionIncrementGap;
private final boolean storeSource;
private final FieldType fieldType;
private final boolean withinMultiField;

private MatchOnlyTextFieldMapper(
String simpleName,
Expand All @@ -424,6 +427,7 @@ private MatchOnlyTextFieldMapper(
this.indexAnalyzer = builder.analyzers.getIndexAnalyzer();
this.positionIncrementGap = builder.analyzers.positionIncrementGap.getValue();
this.storeSource = storeSource;
this.withinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -433,7 +437,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, withinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.elasticsearch.common.Strings;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.LuceneDocument;
Expand All @@ -46,8 +47,10 @@
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.core.Is.is;

public class MatchOnlyTextFieldMapperTests extends MapperTestCase {

Expand Down Expand Up @@ -255,4 +258,91 @@ public void testDocValuesLoadedFromSynthetic() throws IOException {
protected IngestScriptSupport ingestScriptSupport() {
throw new AssumptionViolatedException("not supported");
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);

{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "match_only_text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name._original");
assertThat(fields, empty());
}
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "match_only_text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}
{
List<IndexableField> fields = doc.rootDoc().getFields("name.text._original");
assertThat(fields, empty());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ private static Version parseUnchecked(String version) {
public static final IndexVersion DEFAULT_TO_ACORN_HNSW_FILTER_HEURISTIC = def(9_026_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion SEQ_NO_WITHOUT_POINTS = def(9_027_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion INDEX_INT_SORT_INT_TYPE = def(9_028_0_00, Version.LUCENE_10_2_1);
public static final IndexVersion MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED = def(9_029_0_00, Version.LUCENE_10_2_1);

/*
* STOP! READ THIS FIRST! No, really,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,11 +287,19 @@ public static class Builder extends FieldMapper.Builder {

final TextParams.Analyzers analyzers;

private final boolean withinMultiField;

public Builder(String name, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled);
this(name, IndexVersion.current(), indexAnalyzers, isSyntheticSourceEnabled, false);
}

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean withinMultiField
) {
super(name);

// If synthetic source is used we need to either store this field
Expand All @@ -300,10 +308,17 @@ public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers ind
// storing the field without requiring users to explicitly set 'store'.
//
// If 'store' parameter was explicitly provided we'll reject the request.
this.store = Parameter.storeParam(
m -> ((TextFieldMapper) m).store,
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
// Note that if current builder is a multi field, then we don't need to store, given that responsibility lies with parent field
this.withinMultiField = withinMultiField;
this.store = Parameter.storeParam(m -> ((TextFieldMapper) m).store, () -> {
if (indexCreatedVersion.onOrAfter(IndexVersions.MAPPER_TEXT_MATCH_ONLY_MULTI_FIELDS_DEFAULT_NOT_STORED)) {
return isSyntheticSourceEnabled
&& this.withinMultiField == false
&& multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
} else {
return isSyntheticSourceEnabled;
}
});
this.indexCreatedVersion = indexCreatedVersion;
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
Expand Down Expand Up @@ -482,7 +497,13 @@ public TextFieldMapper build(MapperBuilderContext context) {
}

public static final TypeParser PARSER = createTypeParserWithLegacySupport(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
)
);

private static class PhraseWrappedAnalyzer extends AnalyzerWrapper {
Expand Down Expand Up @@ -1304,6 +1325,7 @@ public Query existsQuery(SearchExecutionContext context) {
private final SubFieldInfo phraseFieldInfo;

private final boolean isSyntheticSourceEnabled;
private final boolean isWithinMultiField;

private TextFieldMapper(
String simpleName,
Expand Down Expand Up @@ -1337,6 +1359,7 @@ private TextFieldMapper(
this.freqFilter = builder.freqFilter.getValue();
this.fieldData = builder.fieldData.get();
this.isSyntheticSourceEnabled = builder.isSyntheticSourceEnabled;
this.isWithinMultiField = builder.withinMultiField;
}

@Override
Expand All @@ -1360,7 +1383,7 @@ public Map<String, NamedAnalyzer> indexAnalyzers() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled).init(this);
return new Builder(leafName(), indexCreatedVersion, indexAnalyzers, isSyntheticSourceEnabled, isWithinMultiField).init(this);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,73 @@ public void testStoreParameterDefaults() throws IOException {
}
}

public void testStoreParameterDefaultsSyntheticSource() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(true));
}

public void testStoreParameterDefaultsSyntheticSourceWithKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "keyword");
b.startObject("fields");
b.startObject("text");
b.field("type", "text");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "quick brown fox"));
ParsedDocument doc = mapper.parse(source);
List<IndexableField> fields = doc.rootDoc().getFields("name.text");
IndexableFieldType fieldType = fields.get(0).fieldType();
assertThat(fieldType.stored(), is(false));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down
Loading