Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/112151.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112151
summary: Store original source for keywords using a normalizer
area: Logs
type: enhancement
issues: []
3 changes: 1 addition & 2 deletions docs/reference/mapping/types/date_nanos.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ of official GA features.

`date_nanos` fields support <<synthetic-source,synthetic `_source`>> in their
default configuration. Synthetic `_source` cannot be used together with
<<copy-to,`copy_to`>>, <<ignore-malformed,`ignore_malformed`>> set to true
or with <<doc-values,`doc_values`>> disabled.
<<copy-to,`copy_to`>> or with <<doc-values,`doc_values`>> disabled.

Synthetic source always sorts `date_nanos` fields. For example:
[source,console,id=synthetic-source-date-nanos-example]
Expand Down
2 changes: 1 addition & 1 deletion docs/reference/mapping/types/keyword.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ of official GA features.

`keyword` fields support <<synthetic-source,synthetic `_source`>> in their
default configuration. Synthetic `_source` cannot be used together with
a <<normalizer,`normalizer`>> or <<copy-to,`copy_to`>>.
<<copy-to,`copy_to`>>.

By default, synthetic source sorts `keyword` fields and removes duplicates.
For example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,94 @@ keyword:
docs.1._source:
kwd: bar

---
keyword with normalizer:
- requires:
cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
reason: support for normalizer on keyword fields
- do:
indices.create:
index: test-keyword-with-normalizer
body:
settings:
analysis:
normalizer:
lowercase:
type: custom
filter:
- lowercase
mappings:
_source:
mode: synthetic
properties:
keyword:
type: keyword
normalizer: lowercase
keyword_with_ignore_above:
type: keyword
normalizer: lowercase
ignore_above: 10
keyword_without_doc_values:
type: keyword
normalizer: lowercase
doc_values: false

- do:
index:
index: test-keyword-with-normalizer
id: 1
body:
keyword: "the Quick Brown Fox jumps over the lazy Dog"
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"

- do:
index:
index: test-keyword-with-normalizer
id: 2
body:
keyword: "The five BOXING wizards jump Quickly"
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
keyword_without_doc_values: "The five BOXING wizards jump Quickly"

- do:
index:
index: test-keyword-with-normalizer
id: 3
body:
keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]

- do:
mget:
index: test-keyword-with-normalizer
body:
ids: [ 1, 2, 3 ]
- match: { docs.0._index: "test-keyword-with-normalizer" }
- match: { docs.0._id: "1" }
- match:
docs.0._source:
keyword: "the Quick Brown Fox jumps over the lazy Dog"
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"

- match: { docs.1._index: "test-keyword-with-normalizer" }
- match: { docs.1._id: "2" }
- match:
docs.1._source:
keyword: "The five BOXING wizards jump Quickly"
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
keyword_without_doc_values: "The five BOXING wizards jump Quickly"

- match: { docs.2._index: "test-keyword-with-normalizer" }
- match: { docs.2._id: "3" }
- match:
docs.2._source:
keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]

---
stored text:
- requires:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "keyword";

static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above");
static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source");

public static class Defaults {
public static final FieldType FIELD_TYPE;
Expand Down Expand Up @@ -856,7 +857,7 @@ public boolean hasNormalizer() {
private final Script script;
private final ScriptCompiler scriptCompiler;
private final IndexVersion indexCreatedVersion;
private final boolean storeIgnored;
private final boolean isSyntheticSource;

private final IndexAnalyzers indexAnalyzers;

Expand All @@ -866,7 +867,7 @@ private KeywordFieldMapper(
KeywordFieldType mappedFieldType,
MultiFields multiFields,
CopyTo copyTo,
boolean storeIgnored,
boolean isSyntheticSource,
Builder builder
) {
super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue());
Expand All @@ -881,7 +882,7 @@ private KeywordFieldMapper(
this.indexAnalyzers = builder.indexAnalyzers;
this.scriptCompiler = builder.scriptCompiler;
this.indexCreatedVersion = builder.indexCreatedVersion;
this.storeIgnored = storeIgnored;
this.isSyntheticSource = isSyntheticSource;
}

@Override
Expand Down Expand Up @@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) {

if (value.length() > fieldType().ignoreAbove()) {
context.addIgnoredField(fullPath());
if (storeIgnored) {
if (isSyntheticSource) {
// Save a copy of the field so synthetic source can load it
context.doc().add(new StoredField(originalName(), new BytesRef(value)));
}
Expand Down Expand Up @@ -1026,6 +1027,11 @@ private String originalName() {

@Override
protected SyntheticSourceMode syntheticSourceMode() {
if (hasNormalizer()) {
// NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value
// whose doc values would be altered by the normalizer
return SyntheticSourceMode.FALLBACK;
}
if (fieldType.stored() || hasDocValues) {
return SyntheticSourceMode.NATIVE;
}
Expand All @@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName)
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
);
}
if (hasNormalizer()) {
throw new IllegalArgumentException(
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer"
);
}

if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) {
return super.syntheticFieldLoader();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ public Set<NodeFeature> getFeatures() {
NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS,
BooleanFieldMapper.BOOLEAN_DIMENSION,
ObjectMapper.SUBOBJECTS_AUTO,
KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE,
SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.hamcrest.Matchers.equalTo;

public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport {
private final Integer ignoreAbove;
private final boolean allIgnored;
Expand Down Expand Up @@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException {

@Override
public List<MapperTestCase.SyntheticSourceInvalidExample> invalidExample() throws IOException {
return List.of(
new MapperTestCase.SyntheticSourceInvalidExample(
equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"),
b -> b.field("type", "keyword").field("normalizer", "lowercase")
)
);
return List.of();
}
}
Loading