-
Notifications
You must be signed in to change notification settings - Fork 25.6k
[DRAFT] Dont track source for keyworld fields that are multi-field, unless specifically designated so #132231
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -612,12 +612,46 @@ public static class Builder { | |
|
|
||
| private boolean hasSyntheticSourceCompatibleKeywordField; | ||
|
|
||
| /** | ||
| * Returns whether the given builder should be used to track source for synthetic source purposes. | ||
| */ | ||
| private boolean shouldTrackSource(KeywordFieldMapper.Builder keywordBuilder) { | ||
| // we only need one compatible keyword multi-field, so if one was already found, ignore the rest | ||
| return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordBuilder); | ||
| } | ||
|
|
||
| /** | ||
| * Returns whether the given builder is synthetic source compatible. To be compatible, the builder must | ||
| * track source in some way, whether that be via the "store" param or using doc values. | ||
| */ | ||
| private boolean isSyntheticSourceCompatible(KeywordFieldMapper.Builder keywordBuilder) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you think we can make There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree that its growing too much. I was also thinking about moving this logic somewhere closer to where it belongs. I'll see what I can do. |
||
| return keywordBuilder.hasNormalizer() == false && (keywordBuilder.hasDocValues() || keywordBuilder.isStored()); | ||
| } | ||
|
|
||
| /** | ||
| * Same as {@link #shouldTrackSource(KeywordFieldMapper.Builder)} but with a | ||
| * {@link KeywordFieldMapper} instead. | ||
| */ | ||
| private boolean shouldTrackSource(KeywordFieldMapper keywordMapper) { | ||
| return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordMapper); | ||
| } | ||
|
|
||
| /** | ||
| * Same as {@link #isSyntheticSourceCompatible(KeywordFieldMapper.Builder)} but with a | ||
| * {@link KeywordFieldMapper} instead. | ||
| */ | ||
| private boolean isSyntheticSourceCompatible(KeywordFieldMapper keyworddMapper) { | ||
| return keyworddMapper.hasNormalizer() == false | ||
| && (keyworddMapper.fieldType().hasDocValues() || keyworddMapper.fieldType().isStored()); | ||
| } | ||
|
|
||
| public Builder add(FieldMapper.Builder builder) { | ||
| mapperBuilders.put(builder.leafName(), builder::build); | ||
|
|
||
| if (builder instanceof KeywordFieldMapper.Builder kwd) { | ||
| if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) { | ||
| if (shouldTrackSource(kwd)) { | ||
| hasSyntheticSourceCompatibleKeywordField = true; | ||
| kwd.shouldTrackSourceForSyntheticSource(true); | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -628,8 +662,9 @@ private void add(FieldMapper mapper) { | |
| mapperBuilders.put(mapper.leafName(), context -> mapper); | ||
|
|
||
| if (mapper instanceof KeywordFieldMapper kwd) { | ||
| if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) { | ||
| if (shouldTrackSource(kwd)) { | ||
| hasSyntheticSourceCompatibleKeywordField = true; | ||
| kwd.setTracksSourceForSyntheticSource(true); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -213,6 +213,11 @@ public static final class Builder extends FieldMapper.DimensionBuilder { | |
| private final boolean forceDocValuesSkipper; | ||
| private final SourceKeepMode indexSourceKeepMode; | ||
|
|
||
| private boolean withinMultiField; | ||
|
|
||
| // used when in a multi-field - indicates whether this keyword field should store source for synthetic source purposes | ||
| private boolean tracksSourceForSyntheticSource = false; | ||
|
|
||
| public Builder(final String name, final MappingParserContext mappingParserContext) { | ||
| this( | ||
| name, | ||
|
|
@@ -327,6 +332,11 @@ public static Builder buildWithDocValuesSkipper( | |
| ); | ||
| } | ||
|
|
||
| public Builder shouldTrackSourceForSyntheticSource(boolean shouldTrackSourceForSyntheticSource) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same - |
||
| this.tracksSourceForSyntheticSource = shouldTrackSourceForSyntheticSource; | ||
| return this; | ||
| } | ||
|
|
||
| public Builder ignoreAbove(int ignoreAbove) { | ||
| this.ignoreAbove.setValue(ignoreAbove); | ||
| return this; | ||
|
|
@@ -1117,6 +1127,9 @@ public String originalName() { | |
| private final ScriptCompiler scriptCompiler; | ||
| private final IndexVersion indexCreatedVersion; | ||
| private final boolean isSyntheticSource; | ||
| private final boolean isWithinMultiField; | ||
|
|
||
| private boolean tracksSourceForSyntheticSource; | ||
|
|
||
| private final IndexAnalyzers indexAnalyzers; | ||
| private final int ignoreAboveDefault; | ||
|
|
@@ -1159,6 +1172,8 @@ private KeywordFieldMapper( | |
| this.offsetsFieldName = offsetsFieldName; | ||
| this.indexSourceKeepMode = indexSourceKeepMode; | ||
| this.originalName = mappedFieldType.originalName(); | ||
| this.isWithinMultiField = builder.withinMultiField; | ||
| this.tracksSourceForSyntheticSource = builder.tracksSourceForSyntheticSource; | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -1202,6 +1217,14 @@ private boolean indexValue(DocumentParserContext context, String value) { | |
| return indexValue(context, new Text(value)); | ||
| } | ||
|
|
||
| /** | ||
| * Returns whether this field should be stored separately as a {@link StoredField} for supporting synthetic source. | ||
| */ | ||
| private boolean shouldStoreThisFieldForSyntheticSource() { | ||
| // skip all fields that are multi-fields unless they've been explicitly designated as a source tracker | ||
| return isSyntheticSource && (isWithinMultiField == false || tracksSourceForSyntheticSource); | ||
| } | ||
|
|
||
| private boolean indexValue(DocumentParserContext context, XContentString value) { | ||
| if (value == null) { | ||
| return false; | ||
|
|
@@ -1212,10 +1235,11 @@ private boolean indexValue(DocumentParserContext context, XContentString value) | |
| return false; | ||
| } | ||
|
|
||
| // if this field's value exceeds ignore_above, then don't index this field | ||
| if (value.stringLength() > fieldType().ignoreAbove()) { | ||
| context.addIgnoredField(fullPath()); | ||
| if (isSyntheticSource) { | ||
| // Save a copy of the field so synthetic source can load it | ||
| // unless synthetic source is enabled, then we need to save a copy of the field so synthetic source can load it | ||
| if (shouldStoreThisFieldForSyntheticSource()) { | ||
| var utfBytes = value.bytes(); | ||
| var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); | ||
| context.doc().add(new StoredField(originalName, bytesRef)); | ||
|
|
@@ -1334,6 +1358,10 @@ boolean hasNormalizer() { | |
| return normalizerName != null; | ||
| } | ||
|
|
||
| void setTracksSourceForSyntheticSource(boolean tracksSourceForSyntheticSource) { | ||
| this.tracksSourceForSyntheticSource = tracksSourceForSyntheticSource; | ||
| } | ||
|
|
||
| @Override | ||
| protected SyntheticSourceSupport syntheticSourceSupport() { | ||
| if (hasNormalizer()) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am struggling with this name. Isn't this something like
isUsedForSyntheticSourceByParent?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah... my thinking was to not expose this edge case as much through naming, but I can also see someone misunderstanding what this is used for. I'll change it