diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index a43575b8f990c..a1de3a4a63ed8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -612,12 +612,46 @@ public static class Builder { private boolean hasSyntheticSourceCompatibleKeywordField; + /** + * Returns whether the given builder should be used to track source for synthetic source purposes. + */ + private boolean shouldTrackSource(KeywordFieldMapper.Builder keywordBuilder) { + // we only need one compatible keyword multi-field, so if one was already found, ignore the rest + return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordBuilder); + } + + /** + * Returns whether the given builder is synthetic source compatible. To be compatible, the builder must + * track source in some way, whether that be via the "store" param or using doc values. + */ + private boolean isSyntheticSourceCompatible(KeywordFieldMapper.Builder keywordBuilder) { + return keywordBuilder.hasNormalizer() == false && (keywordBuilder.hasDocValues() || keywordBuilder.isStored()); + } + + /** + * Same as {@link #shouldTrackSource(KeywordFieldMapper.Builder)} but with a + * {@link KeywordFieldMapper} instead. + */ + private boolean shouldTrackSource(KeywordFieldMapper keywordMapper) { + return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordMapper); + } + + /** + * Same as {@link #isSyntheticSourceCompatible(KeywordFieldMapper.Builder)} but with a + * {@link KeywordFieldMapper} instead. + */ + private boolean isSyntheticSourceCompatible(KeywordFieldMapper keyworddMapper) { + return keyworddMapper.hasNormalizer() == false + && (keyworddMapper.fieldType().hasDocValues() || keyworddMapper.fieldType().isStored()); + } + public Builder add(FieldMapper.Builder builder) { mapperBuilders.put(builder.leafName(), builder::build); if (builder instanceof KeywordFieldMapper.Builder kwd) { - if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) { + if (shouldTrackSource(kwd)) { hasSyntheticSourceCompatibleKeywordField = true; + kwd.shouldTrackSourceForSyntheticSource(true); } } @@ -628,8 +662,9 @@ private void add(FieldMapper mapper) { mapperBuilders.put(mapper.leafName(), context -> mapper); if (mapper instanceof KeywordFieldMapper kwd) { - if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) { + if (shouldTrackSource(kwd)) { hasSyntheticSourceCompatibleKeywordField = true; + kwd.setTracksSourceForSyntheticSource(true); } } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index e26c969f7d495..7d6a740c3c19e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -213,6 +213,11 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final boolean forceDocValuesSkipper; private final SourceKeepMode indexSourceKeepMode; + private boolean withinMultiField; + + // used when in a multi-field - indicates whether this keyword field should store source for synthetic source purposes + private boolean tracksSourceForSyntheticSource = false; + public Builder(final String name, final MappingParserContext mappingParserContext) { this( name, @@ -327,6 +332,11 @@ public static Builder buildWithDocValuesSkipper( ); } + public Builder shouldTrackSourceForSyntheticSource(boolean shouldTrackSourceForSyntheticSource) { + this.tracksSourceForSyntheticSource = shouldTrackSourceForSyntheticSource; + return this; + } + public Builder ignoreAbove(int ignoreAbove) { this.ignoreAbove.setValue(ignoreAbove); return this; @@ -1117,6 +1127,9 @@ public String originalName() { private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; private final boolean isSyntheticSource; + private final boolean isWithinMultiField; + + private boolean tracksSourceForSyntheticSource; private final IndexAnalyzers indexAnalyzers; private final int ignoreAboveDefault; @@ -1159,6 +1172,8 @@ private KeywordFieldMapper( this.offsetsFieldName = offsetsFieldName; this.indexSourceKeepMode = indexSourceKeepMode; this.originalName = mappedFieldType.originalName(); + this.isWithinMultiField = builder.withinMultiField; + this.tracksSourceForSyntheticSource = builder.tracksSourceForSyntheticSource; } @Override @@ -1202,6 +1217,14 @@ private boolean indexValue(DocumentParserContext context, String value) { return indexValue(context, new Text(value)); } + /** + * Returns whether this field should be stored separately as a {@link StoredField} for supporting synthetic source. + */ + private boolean shouldStoreThisFieldForSyntheticSource() { + // skip all fields that are multi-fields unless they've been explicitly designated as a source tracker + return isSyntheticSource && (isWithinMultiField == false || tracksSourceForSyntheticSource); + } + private boolean indexValue(DocumentParserContext context, XContentString value) { if (value == null) { return false; @@ -1212,10 +1235,11 @@ private boolean indexValue(DocumentParserContext context, XContentString value) return false; } + // if this field's value exceeds ignore_above, then don't index this field if (value.stringLength() > fieldType().ignoreAbove()) { context.addIgnoredField(fullPath()); - if (isSyntheticSource) { - // Save a copy of the field so synthetic source can load it + // unless synthetic source is enabled, then we need to save a copy of the field so synthetic source can load it + if (shouldStoreThisFieldForSyntheticSource()) { var utfBytes = value.bytes(); var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length()); context.doc().add(new StoredField(originalName, bytesRef)); @@ -1334,6 +1358,10 @@ boolean hasNormalizer() { return normalizerName != null; } + void setTracksSourceForSyntheticSource(boolean tracksSourceForSyntheticSource) { + this.tracksSourceForSyntheticSource = tracksSourceForSyntheticSource; + } + @Override protected SyntheticSourceSupport syntheticSourceSupport() { if (hasNormalizer()) {