Skip to content

Commit c1fc7cb

Browse files
salvatore-campagnadakrone
authored andcommitted
Store original source for keywords using a normalizer (elastic#112151)
Using a normalizer for a keyword field might result in not being able to reconstruct the original source when using synthetic source. Here if synthetic source is enabled and a normalizer is configured we store the original value in a stored field which is later used at document reconstruction time to reconstruct the field value as it was in the original document. We use the same fallback solution we use in other places like `ignore_malformed`.
1 parent edcc5ca commit c1fc7cb

File tree

5 files changed

+105
-17
lines changed

5 files changed

+105
-17
lines changed

docs/changelog/112151.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 112151
2+
summary: Store original source for keywords using a normalizer
3+
area: Logs
4+
type: enhancement
5+
issues: []

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/mget/90_synthetic_source.yml

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,94 @@ keyword:
4646
docs.1._source:
4747
kwd: bar
4848

49+
---
50+
keyword with normalizer:
51+
- requires:
52+
cluster_features: [ "mapper.keyword_normalizer_synthetic_source" ]
53+
reason: support for normalizer on keyword fields
54+
- do:
55+
indices.create:
56+
index: test-keyword-with-normalizer
57+
body:
58+
settings:
59+
analysis:
60+
normalizer:
61+
lowercase:
62+
type: custom
63+
filter:
64+
- lowercase
65+
mappings:
66+
_source:
67+
mode: synthetic
68+
properties:
69+
keyword:
70+
type: keyword
71+
normalizer: lowercase
72+
keyword_with_ignore_above:
73+
type: keyword
74+
normalizer: lowercase
75+
ignore_above: 10
76+
keyword_without_doc_values:
77+
type: keyword
78+
normalizer: lowercase
79+
doc_values: false
80+
81+
- do:
82+
index:
83+
index: test-keyword-with-normalizer
84+
id: 1
85+
body:
86+
keyword: "the Quick Brown Fox jumps over the lazy Dog"
87+
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
88+
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"
89+
90+
- do:
91+
index:
92+
index: test-keyword-with-normalizer
93+
id: 2
94+
body:
95+
keyword: "The five BOXING wizards jump Quickly"
96+
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
97+
keyword_without_doc_values: "The five BOXING wizards jump Quickly"
98+
99+
- do:
100+
index:
101+
index: test-keyword-with-normalizer
102+
id: 3
103+
body:
104+
keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
105+
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
106+
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
107+
108+
- do:
109+
mget:
110+
index: test-keyword-with-normalizer
111+
body:
112+
ids: [ 1, 2, 3 ]
113+
- match: { docs.0._index: "test-keyword-with-normalizer" }
114+
- match: { docs.0._id: "1" }
115+
- match:
116+
docs.0._source:
117+
keyword: "the Quick Brown Fox jumps over the lazy Dog"
118+
keyword_with_ignore_above: "the Quick Brown Fox jumps over the lazy Dog"
119+
keyword_without_doc_values: "the Quick Brown Fox jumps over the lazy Dog"
120+
121+
- match: { docs.1._index: "test-keyword-with-normalizer" }
122+
- match: { docs.1._id: "2" }
123+
- match:
124+
docs.1._source:
125+
keyword: "The five BOXING wizards jump Quickly"
126+
keyword_with_ignore_above: "The five BOXING wizards jump Quickly"
127+
keyword_without_doc_values: "The five BOXING wizards jump Quickly"
128+
129+
- match: { docs.2._index: "test-keyword-with-normalizer" }
130+
- match: { docs.2._id: "3" }
131+
- match:
132+
docs.2._source:
133+
keyword: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
134+
keyword_with_ignore_above: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
135+
keyword_without_doc_values: [ "May the FORCE be with You!", "Do or Do Not, There is no Try" ]
136+
49137
---
50138
stored text:
51139
- requires:

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ public final class KeywordFieldMapper extends FieldMapper {
8989
public static final String CONTENT_TYPE = "keyword";
9090

9191
static final NodeFeature KEYWORD_DIMENSION_IGNORE_ABOVE = new NodeFeature("mapper.keyword_dimension_ignore_above");
92+
static final NodeFeature KEYWORD_NORMALIZER_SYNTHETIC_SOURCE = new NodeFeature("mapper.keyword_normalizer_synthetic_source");
9293

9394
public static class Defaults {
9495
public static final FieldType FIELD_TYPE;
@@ -856,7 +857,7 @@ public boolean hasNormalizer() {
856857
private final Script script;
857858
private final ScriptCompiler scriptCompiler;
858859
private final IndexVersion indexCreatedVersion;
859-
private final boolean storeIgnored;
860+
private final boolean isSyntheticSource;
860861

861862
private final IndexAnalyzers indexAnalyzers;
862863

@@ -866,7 +867,7 @@ private KeywordFieldMapper(
866867
KeywordFieldType mappedFieldType,
867868
MultiFields multiFields,
868869
CopyTo copyTo,
869-
boolean storeIgnored,
870+
boolean isSyntheticSource,
870871
Builder builder
871872
) {
872873
super(simpleName, mappedFieldType, multiFields, copyTo, builder.script.get() != null, builder.onScriptError.getValue());
@@ -881,7 +882,7 @@ private KeywordFieldMapper(
881882
this.indexAnalyzers = builder.indexAnalyzers;
882883
this.scriptCompiler = builder.scriptCompiler;
883884
this.indexCreatedVersion = builder.indexCreatedVersion;
884-
this.storeIgnored = storeIgnored;
885+
this.isSyntheticSource = isSyntheticSource;
885886
}
886887

887888
@Override
@@ -916,7 +917,7 @@ private void indexValue(DocumentParserContext context, String value) {
916917

917918
if (value.length() > fieldType().ignoreAbove()) {
918919
context.addIgnoredField(fullPath());
919-
if (storeIgnored) {
920+
if (isSyntheticSource) {
920921
// Save a copy of the field so synthetic source can load it
921922
context.doc().add(new StoredField(originalName(), new BytesRef(value)));
922923
}
@@ -1026,6 +1027,11 @@ private String originalName() {
10261027

10271028
@Override
10281029
protected SyntheticSourceMode syntheticSourceMode() {
1030+
if (hasNormalizer()) {
1031+
// NOTE: no matter if we have doc values or not we use a stored field to reconstruct the original value
1032+
// whose doc values would be altered by the normalizer
1033+
return SyntheticSourceMode.FALLBACK;
1034+
}
10291035
if (fieldType.stored() || hasDocValues) {
10301036
return SyntheticSourceMode.NATIVE;
10311037
}
@@ -1047,11 +1053,6 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String simpleName)
10471053
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to"
10481054
);
10491055
}
1050-
if (hasNormalizer()) {
1051-
throw new IllegalArgumentException(
1052-
"field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares a normalizer"
1053-
);
1054-
}
10551056

10561057
if (syntheticSourceMode() != SyntheticSourceMode.NATIVE) {
10571058
return super.syntheticFieldLoader();

server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ public Set<NodeFeature> getFeatures() {
3333
NodeMappingStats.SEGMENT_LEVEL_FIELDS_STATS,
3434
BooleanFieldMapper.BOOLEAN_DIMENSION,
3535
ObjectMapper.SUBOBJECTS_AUTO,
36+
KeywordFieldMapper.KEYWORD_NORMALIZER_SYNTHETIC_SOURCE,
3637
SourceFieldMapper.SYNTHETIC_SOURCE_STORED_FIELDS_ADVANCE_FIX
3738
);
3839
}

test/framework/src/main/java/org/elasticsearch/index/mapper/KeywordFieldSyntheticSourceSupport.java

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@
2121
import java.util.stream.Collectors;
2222
import java.util.stream.Stream;
2323

24-
import static org.hamcrest.Matchers.equalTo;
25-
2624
public class KeywordFieldSyntheticSourceSupport implements MapperTestCase.SyntheticSourceSupport {
2725
private final Integer ignoreAbove;
2826
private final boolean allIgnored;
@@ -128,11 +126,6 @@ private void mapping(XContentBuilder b) throws IOException {
128126

129127
@Override
130128
public List<MapperTestCase.SyntheticSourceInvalidExample> invalidExample() throws IOException {
131-
return List.of(
132-
new MapperTestCase.SyntheticSourceInvalidExample(
133-
equalTo("field [field] of type [keyword] doesn't support synthetic source because it declares a normalizer"),
134-
b -> b.field("type", "keyword").field("normalizer", "lowercase")
135-
)
136-
);
129+
return List.of();
137130
}
138131
}

0 commit comments

Comments
 (0)