Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -229,3 +229,87 @@ synthetic_source text with ignored multi-field and multiple values in the same d
- match: { hits.total.value: 1 }
- match: { hits.hits.0._source.foo.0: "This value is short" }
- match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" }

---
synthetic_source text with normalized keyword with store original value:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: text
fields:
raw:
type: keyword
normalizer: lowercase
normalizer_skip_store_original_value: false

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache LUCENE powers ELASTIC"

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: lucene

- match: { hits.total.value: 1 }
- match: { hits.hits.0._source.foo: "Apache LUCENE powers ELASTIC" }

---
synthetic_source text with normalized keyword with skip store original value:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: text
fields:
raw:
type: keyword
normalizer: lowercase
normalizer_skip_store_original_value: true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe also add a yaml test where we don't set this mapping attribute? This should have the same outcome as this yaml test.


- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: "Apache LUCENE powers ELASTIC"

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: lucene

- match: { hits.total.value: 1 }
- match: { hits.hits.0._source.foo: "apache lucene powers elastic" }
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,8 @@ public Builder add(FieldMapper.Builder builder) {
mapperBuilders.put(builder.leafName(), builder::build);

if (builder instanceof KeywordFieldMapper.Builder kwd) {
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
if ((kwd.hasNormalizer() == false || kwd.isNormalizerSkipStoreOriginalValue())
&& (kwd.hasDocValues() || kwd.isStored())) {
hasSyntheticSourceCompatibleKeywordField = true;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,10 @@ public boolean hasNormalizer() {
return this.normalizer.get() != null;
}

public boolean isNormalizerSkipStoreOriginalValue() {
return this.normalizerSkipStoreOriginalValue.getValue();
}

Builder nullValue(String nullValue) {
this.nullValue.setValue(nullValue);
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1748,7 +1748,8 @@ public static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterabl
*/
private static boolean keywordFieldSupportsSyntheticSource(final KeywordFieldMapper keyword) {
// the field must be stored in some way, whether that be via store or doc values
return keyword.hasNormalizer() == false && (keyword.fieldType().hasDocValues()) || keyword.fieldType().isStored();
return (keyword.hasNormalizer() == false || keyword.isNormalizerSkipStoreOriginalValue())
&& (keyword.fieldType().hasDocValues() || keyword.fieldType().isStored());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,14 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.in;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.core.Is.is;

public class TextFieldMapperTests extends MapperTestCase {
Expand Down Expand Up @@ -451,6 +455,114 @@ public void testStoreParameterDefaultsSyntheticSourceTextFieldIsMultiFieldBwc()
assertThat(fieldType.stored(), is(true));
}

public void testDelegatesSyntheticSourceToKeywordMultiField() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.field("store", false);
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "QUICK Brown fox"));
ParsedDocument doc = mapper.parse(source);
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
assertThat(textFallbackField, nullValue());
IndexableFieldType keywordFieldType = doc.rootDoc().getField("name.keyword").fieldType();
assertThat(keywordFieldType.docValuesType(), not(is(DocValuesType.NONE)));

Set<String> ignoredFields = doc.rootDoc()
.getFields(IgnoredSourceFieldMapper.NAME)
.stream()
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
.map(IgnoredSourceFieldMapper.NameValue::name)
.collect(Collectors.toSet());
assertThat("name", not(in(ignoredFields)));

assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}"));
}

public void testDoesNotDelegateSyntheticSourceForNormalizedKeywordMultiFieldWhenStoreOriginalValue() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.field("store", false);
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.field("normalizer", "lowercase");
b.field("normalizer_skip_store_original_value", false);
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "QUICK Brown fox"));
ParsedDocument doc = mapper.parse(source);
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
assertThat(textFallbackField, nullValue());

Set<String> ignoredFields = doc.rootDoc()
.getFields(IgnoredSourceFieldMapper.NAME)
.stream()
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
.map(IgnoredSourceFieldMapper.NameValue::name)
.collect(Collectors.toSet());
assertThat("name", in(ignoredFields));

assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"QUICK Brown fox\"}"));
}

public void testDelegatesSyntheticSourceForNormalizedKeywordMultiFieldWhenSkipStoreOriginalValue() throws IOException {
var indexSettingsBuilder = getIndexSettingsBuilder();
indexSettingsBuilder.put(IndexSettings.INDEX_MAPPER_SOURCE_MODE_SETTING.getKey(), "synthetic");
var indexSettings = indexSettingsBuilder.build();

var mapping = mapping(b -> {
b.startObject("name");
b.field("type", "text");
b.field("store", false);
b.startObject("fields");
b.startObject("keyword");
b.field("type", "keyword");
b.field("normalizer", "lowercase");
b.field("normalizer_skip_store_original_value", true);
b.endObject();
b.endObject();
b.endObject();
});
DocumentMapper mapper = createMapperService(indexSettings, mapping).documentMapper();

var source = source(b -> b.field("name", "QUICK Brown fox"));
ParsedDocument doc = mapper.parse(source);
IndexableField textFallbackField = doc.rootDoc().getField("name._original");
assertThat(textFallbackField, nullValue());

Set<String> ignoredFields = doc.rootDoc()
.getFields(IgnoredSourceFieldMapper.NAME)
.stream()
.flatMap(field -> IgnoredSourceFieldMapper.CoalescedIgnoredSourceEncoding.decode(field.binaryValue()).stream())
.map(IgnoredSourceFieldMapper.NameValue::name)
.collect(Collectors.toSet());
assertThat("name", not(in(ignoredFields)));

assertThat(syntheticSource(mapper, b -> b.field("name", "QUICK Brown fox")), equalTo("{\"name\":\"quick brown fox\"}"));
}

public void testBWCSerialization() throws IOException {
MapperService mapperService = createMapperService(fieldMapping(b -> {
b.field("type", "text");
Expand Down