-
Notifications
You must be signed in to change notification settings - Fork 25.6k
Enable synthetic source on normalized keyword mappings #126623
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 34 commits
2a878cf
0720fe7
625938f
e26e543
c8a0025
f8194ae
6b20735
e829823
d2efb0b
c2a11e3
417b084
949f42f
0ed6a2b
8d29695
4478a76
e1f5958
70dd8e7
0984db0
63338d7
8c91da5
9110abe
c747219
35153c1
aed9477
5c9f989
270f066
5039f3c
2fda1a4
a739e87
030bb9c
10cf2d1
d28b168
4ed5e3e
f158c00
fbe4adb
0ca54e9
86e5b22
63bf7d1
43b1075
2b9d384
3b33b15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| pr: 126623 | ||
| summary: Enable synthetic source on normalized keyword mappings | ||
| area: Mapping | ||
| type: "breaking" | ||
| issues: | ||
| - 124369 | ||
| - 121358 | ||
| breaking: | ||
| title: Enable synthetic source on normalized keyword mappings | ||
| area: Mapping | ||
| details: |- | ||
| This changes the default behavior for Synthetic Source on keyword fields using normalizers. Prior to this change, normalized keywords were always stored to allow returning the non-normalized values. Under this change, such field will NOT be stored (i.e they will be synthesized from the index when returning source, like all other synthetic source fields). This should result in considerable space improvement for this use case. | ||
| Users can opt out of this behavior on a per-field basis by setting `synthetic_source_keep` to `all` on the field. | ||
| impact: "By default, normalized keyword fields in synthetic source indices will\ | ||
| \ no longer return the non-normalized value in the source." | ||
| notable: false |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1008,17 +1008,21 @@ protected String delegatingTo() { | |
| } | ||
| }; | ||
| } | ||
| if (isStored()) { | ||
|
||
| return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name()); | ||
| } | ||
| /* | ||
| * If this is a sub-text field try and return the parent's loader. Text | ||
| * fields will always be slow to load and if the parent is exact then we | ||
| * should use that instead. | ||
| */ | ||
| // TODO: should this be removed? I think SyntheticSourceHelper already does this: | ||
|
||
| String parentField = blContext.parentField(name()); | ||
| if (parentField != null) { | ||
| MappedFieldType parent = blContext.lookup().fieldType(parentField); | ||
| if (parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) { | ||
| KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent; | ||
| if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) { | ||
| if (kwd.hasDocValues() || kwd.isStored()) { | ||
| return new BlockLoader.Delegating(kwd.blockLoader(blContext)) { | ||
| @Override | ||
| protected String delegatingTo() { | ||
|
|
@@ -1028,9 +1032,6 @@ protected String delegatingTo() { | |
| } | ||
| } | ||
| } | ||
| if (isStored()) { | ||
| return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name()); | ||
| } | ||
|
|
||
| // _ignored_source field will contain entries for this field if it is not stored | ||
| // and there is no syntheticSourceDelegate. | ||
|
|
@@ -1579,7 +1580,7 @@ public static KeywordFieldMapper getKeywordFieldMapperForSyntheticSource(Iterabl | |
| for (Mapper sub : multiFields) { | ||
| if (sub.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) { | ||
| KeywordFieldMapper kwd = (KeywordFieldMapper) sub; | ||
| if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) { | ||
| if (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored()) { | ||
| return kwd; | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we mention why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.