Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -612,12 +612,46 @@ public static class Builder {

private boolean hasSyntheticSourceCompatibleKeywordField;

/**
* Returns whether the given builder should be used to track source for synthetic source purposes.
*/
private boolean shouldTrackSource(KeywordFieldMapper.Builder keywordBuilder) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am struggling with this name. Isn't this something like isUsedForSyntheticSourceByParent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah... my thinking was to not expose this edge case as much through naming, but I can also see someone misunderstanding what this is used for. I'll change it

// we only need one compatible keyword multi-field, so if one was already found, ignore the rest
return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordBuilder);
}

/**
* Returns whether the given builder is synthetic source compatible. To be compatible, the builder must
* track source in some way, whether that be via the "store" param or using doc values.
*/
private boolean isSyntheticSourceCompatible(KeywordFieldMapper.Builder keywordBuilder) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we can make MultiFields.Builder an interface and do this stuff in a decorator or something inside TextFieldMapper? I know we have hasSyntheticSourceCompatibleKeywordField already but this is growing more and more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that its growing too much. I was also thinking about moving this logic somewhere closer to where it belongs. I'll see what I can do.

return keywordBuilder.hasNormalizer() == false && (keywordBuilder.hasDocValues() || keywordBuilder.isStored());
}

/**
* Same as {@link #shouldTrackSource(KeywordFieldMapper.Builder)} but with a
* {@link KeywordFieldMapper} instead.
*/
private boolean shouldTrackSource(KeywordFieldMapper keywordMapper) {
return hasSyntheticSourceCompatibleKeywordField == false && isSyntheticSourceCompatible(keywordMapper);
}

/**
* Same as {@link #isSyntheticSourceCompatible(KeywordFieldMapper.Builder)} but with a
* {@link KeywordFieldMapper} instead.
*/
private boolean isSyntheticSourceCompatible(KeywordFieldMapper keyworddMapper) {
return keyworddMapper.hasNormalizer() == false
&& (keyworddMapper.fieldType().hasDocValues() || keyworddMapper.fieldType().isStored());
}

public Builder add(FieldMapper.Builder builder) {
mapperBuilders.put(builder.leafName(), builder::build);

if (builder instanceof KeywordFieldMapper.Builder kwd) {
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
if (shouldTrackSource(kwd)) {
hasSyntheticSourceCompatibleKeywordField = true;
kwd.shouldTrackSourceForSyntheticSource(true);
}
}

Expand All @@ -628,8 +662,9 @@ private void add(FieldMapper mapper) {
mapperBuilders.put(mapper.leafName(), context -> mapper);

if (mapper instanceof KeywordFieldMapper kwd) {
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
if (shouldTrackSource(kwd)) {
hasSyntheticSourceCompatibleKeywordField = true;
kwd.setTracksSourceForSyntheticSource(true);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,11 @@ public static final class Builder extends FieldMapper.DimensionBuilder {
private final boolean forceDocValuesSkipper;
private final SourceKeepMode indexSourceKeepMode;

private boolean withinMultiField;

// used when in a multi-field - indicates whether this keyword field should store source for synthetic source purposes
private boolean tracksSourceForSyntheticSource = false;

public Builder(final String name, final MappingParserContext mappingParserContext) {
this(
name,
Expand Down Expand Up @@ -327,6 +332,11 @@ public static Builder buildWithDocValuesSkipper(
);
}

public Builder shouldTrackSourceForSyntheticSource(boolean shouldTrackSourceForSyntheticSource) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same - usedByParentFieldForSyntheticSource?

this.tracksSourceForSyntheticSource = shouldTrackSourceForSyntheticSource;
return this;
}

public Builder ignoreAbove(int ignoreAbove) {
this.ignoreAbove.setValue(ignoreAbove);
return this;
Expand Down Expand Up @@ -1117,6 +1127,9 @@ public String originalName() {
private final ScriptCompiler scriptCompiler;
private final IndexVersion indexCreatedVersion;
private final boolean isSyntheticSource;
private final boolean isWithinMultiField;

private boolean tracksSourceForSyntheticSource;

private final IndexAnalyzers indexAnalyzers;
private final int ignoreAboveDefault;
Expand Down Expand Up @@ -1159,6 +1172,8 @@ private KeywordFieldMapper(
this.offsetsFieldName = offsetsFieldName;
this.indexSourceKeepMode = indexSourceKeepMode;
this.originalName = mappedFieldType.originalName();
this.isWithinMultiField = builder.withinMultiField;
this.tracksSourceForSyntheticSource = builder.tracksSourceForSyntheticSource;
}

@Override
Expand Down Expand Up @@ -1202,6 +1217,14 @@ private boolean indexValue(DocumentParserContext context, String value) {
return indexValue(context, new Text(value));
}

/**
* Returns whether this field should be stored separately as a {@link StoredField} for supporting synthetic source.
*/
private boolean shouldStoreThisFieldForSyntheticSource() {
// skip all fields that are multi-fields unless they've been explicitly designated as a source tracker
return isSyntheticSource && (isWithinMultiField == false || tracksSourceForSyntheticSource);
}

private boolean indexValue(DocumentParserContext context, XContentString value) {
if (value == null) {
return false;
Expand All @@ -1212,10 +1235,11 @@ private boolean indexValue(DocumentParserContext context, XContentString value)
return false;
}

// if this field's value exceeds ignore_above, then don't index this field
if (value.stringLength() > fieldType().ignoreAbove()) {
context.addIgnoredField(fullPath());
if (isSyntheticSource) {
// Save a copy of the field so synthetic source can load it
// unless synthetic source is enabled, then we need to save a copy of the field so synthetic source can load it
if (shouldStoreThisFieldForSyntheticSource()) {
var utfBytes = value.bytes();
var bytesRef = new BytesRef(utfBytes.bytes(), utfBytes.offset(), utfBytes.length());
context.doc().add(new StoredField(originalName, bytesRef));
Expand Down Expand Up @@ -1334,6 +1358,10 @@ boolean hasNormalizer() {
return normalizerName != null;
}

void setTracksSourceForSyntheticSource(boolean tracksSourceForSyntheticSource) {
this.tracksSourceForSyntheticSource = tracksSourceForSyntheticSource;
}

@Override
protected SyntheticSourceSupport syntheticSourceSupport() {
if (hasNormalizer()) {
Expand Down