Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/132962.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 132962
summary: Don't store keyword multi fields when they trip `ignore_above`
area: Mapping
type: bug
issues: []

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ public void testRangeIntervals() {
);
}

public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_source_delegate_is_absent() {
public void testBlockLoaderUsesStoredFieldsForLoadingWhenSyntheticSourceDelegateIsAbsent() {
// given
MatchOnlyTextFieldMapper.MatchOnlyTextFieldType ft = new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType(
"parent",
Expand All @@ -246,7 +246,7 @@ public void test_block_loader_uses_stored_fields_for_loading_when_synthetic_sour
assertThat(blockLoader, Matchers.instanceOf(MatchOnlyTextFieldType.BytesFromMixedStringsBytesRefBlockLoader.class));
}

public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_is_not_set() {
public void testBlockLoaderUsesSyntheticSourceDelegateWhenIgnoreAboveIsNotSet() {
// given
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate = new KeywordFieldMapper.KeywordFieldType(
"child",
Expand Down Expand Up @@ -274,7 +274,7 @@ public void test_block_loader_uses_synthetic_source_delegate_when_ignore_above_i
assertThat(blockLoader, Matchers.instanceOf(BlockLoader.Delegating.class));
}

public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set() {
public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSet() {
// given
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
Expand Down Expand Up @@ -322,7 +322,7 @@ public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore
assertThat(blockLoader, Matchers.not(Matchers.instanceOf(BlockLoader.Delegating.class)));
}

public void test_block_loader_does_not_use_synthetic_source_delegate_when_ignore_above_is_set_at_index_level() {
public void testBlockLoaderDoesNotUseSyntheticSourceDelegateWhenIgnoreAboveIsSetAtIndexLevel() {
// given
Settings settings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, IndexVersion.current())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,114 @@ synthetic_source match_only_text with ignored multi-field:
- match:
hits.hits.0._source.foo: "Apache Lucene powers Elasticsearch"

---
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: match_only_text
fields:
raw:
type: keyword
ignore_above: 20

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: [ "This value is too long and will be ignored", "This value is short" ]

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: this value is

- match: { "hits.total.value": 1 }
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
- match: { hits.hits.0._source.foo.1: "This value is short" }

# now, flip the values around
- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: [ "This value is short", "This value is too long and will be ignored" ]

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: this value is

- match: { "hits.total.value": 1 }
# the order will be the same since text fields currently don't take offsets into account
- match: { hits.hits.0._source.foo.0: "This value is too long and will be ignored" }
- match: { hits.hits.0._source.foo.1: "This value is short" }

---
synthetic_source match_only_text with ignored multi-field and multiple values in the same doc and preserved order:
- requires:
cluster_features: [ "mapper.source.mode_from_index_setting" ]
reason: "Source mode configured through index setting"

- do:
indices.create:
index: synthetic_source_test
body:
settings:
index:
mapping.source.mode: synthetic
mappings:
properties:
foo:
type: match_only_text
# this will force the order to be preserved
synthetic_source_keep: arrays
fields:
raw:
type: keyword
ignore_above: 20

- do:
index:
index: synthetic_source_test
id: "1"
refresh: true
body:
foo: [ "This value is short", "This value is too long and will be ignored" ]

- do:
search:
index: synthetic_source_test
body:
query:
match_phrase:
foo: this value is

- match: { "hits.total.value": 1 }
- match: { hits.hits.0._source.foo.0: "This value is short" }
- match: { hits.hits.0._source.foo.1: "This value is too long and will be ignored" }

---
synthetic_source match_only_text with stored multi-field:
- requires:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,20 @@
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
import org.elasticsearch.index.mapper.DocumentParserContext;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MapperBuilderContext;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.SourceLoader;
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
import org.elasticsearch.index.mapper.TextFieldMapper;
import org.elasticsearch.index.mapper.TextParams;
Expand Down Expand Up @@ -78,7 +81,7 @@ private static NamedAnalyzer wrapAnalyzer(NamedAnalyzer in) {
);
}

public static class Builder extends FieldMapper.Builder {
public static class Builder extends BuilderWithSyntheticSourceContext {

final Parameter<SimilarityProvider> similarity = TextParams.similarity(m -> builder(m).similarity.getValue());
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
Expand All @@ -87,25 +90,29 @@ public static class Builder extends FieldMapper.Builder {

private final Parameter<Map<String, String>> meta = Parameter.metaParam();

private final IndexVersion indexCreatedVersion;
private final TextParams.Analyzers analyzers;
private final boolean isSyntheticSourceEnabled;
private final Parameter<Boolean> store;

public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
super(name);
this.indexCreatedVersion = indexCreatedVersion;
public Builder(
String name,
IndexVersion indexCreatedVersion,
IndexAnalyzers indexAnalyzers,
boolean isSyntheticSourceEnabled,
boolean isWithinMultiField
) {
super(name, indexCreatedVersion, isSyntheticSourceEnabled, isWithinMultiField);
this.analyzers = new TextParams.Analyzers(
indexAnalyzers,
m -> builder(m).analyzers.getIndexAnalyzer(),
m -> builder(m).analyzers.positionIncrementGap.getValue(),
indexCreatedVersion
);
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
this.store = Parameter.storeParam(
m -> builder(m).store.getValue(),
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
);
this.store = Parameter.storeParam(m -> builder(m).store.getValue(), () -> {
if (TextFieldMapper.keywordMultiFieldsNotStoredWhenIgnoredIndexVersionCheck(indexCreatedVersion())) {
return false;
}
return isSyntheticSourceEnabled() && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false;
});
}

@Override
Expand Down Expand Up @@ -135,6 +142,7 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
store.getValue(),
tsi,
context.isSourceSynthetic(),
isWithinMultiField(),
TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType.stored(), multiFields),
meta.getValue()
);
Expand Down Expand Up @@ -165,7 +173,13 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
}

public static final TypeParser PARSER = new TypeParser(
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
(n, c) -> new Builder(
n,
c.indexVersionCreated(),
c.getIndexAnalyzers(),
SourceFieldMapper.isSynthetic(c.getIndexSettings()),
c.isWithinMultiField()
)
);

/**
Expand Down Expand Up @@ -484,15 +498,17 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th
}

public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {

private AnnotatedTextFieldType(
String name,
boolean store,
TextSearchInfo tsi,
boolean isSyntheticSource,
boolean isWithinMultiField,
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
Map<String, String> meta
) {
super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
super(name, true, store, tsi, isSyntheticSource, isWithinMultiField, syntheticSourceDelegate, meta, false, false);
}

public AnnotatedTextFieldType(String name, Map<String, String> meta) {
Expand All @@ -505,9 +521,9 @@ public String typeName() {
}
}

private final IndexVersion indexCreatedVersion;
private final FieldType fieldType;
private final Builder builder;

private final NamedAnalyzer indexAnalyzer;

protected AnnotatedTextFieldMapper(
Expand All @@ -518,10 +534,18 @@ protected AnnotatedTextFieldMapper(
Builder builder
) {
super(simpleName, mappedFieldType, builderParams);

assert fieldType.tokenized();

this.fieldType = freezeAndDeduplicateFieldType(fieldType);
this.builder = builder;
this.indexAnalyzer = wrapAnalyzer(builder.analyzers.getIndexAnalyzer());
this.indexCreatedVersion = builder.indexCreatedVersion();
}

@Override
public AnnotatedTextFieldType fieldType() {
return (AnnotatedTextFieldType) super.fieldType();
}

@Override
Expand All @@ -544,6 +568,26 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
context.addToFieldNames(fieldType().name());
}
}

// if synthetic source needs to be supported, yet the field isn't stored, then we need to rely on something else
if (needsToSupportSyntheticSource() && fieldType.stored() == false) {
// if we can rely on the synthetic source delegate for synthetic source, then return
if (fieldType().canUseSyntheticSourceDelegateForSyntheticSource(value)) {
return;
}

// otherwise, we need to store a copy of this value so that synthetic source can load it
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
context.doc().add(new StoredField(fieldName, value));
}
}

private boolean needsToSupportSyntheticSource() {
if (TextFieldMapper.multiFieldsNotStoredByDefaultIndexVersionCheck(indexCreatedVersion)) {
// if we're within a multi field, then supporting synthetic source isn't necessary as that's the responsibility of the parent
return fieldType().isSyntheticSourceEnabled() && fieldType().isWithinMultiField() == false;
}
return fieldType().isSyntheticSourceEnabled();
}

@Override
Expand All @@ -553,8 +597,13 @@ protected String contentType() {

@Override
public FieldMapper.Builder getMergeBuilder() {
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, builder.isSyntheticSourceEnabled)
.init(this);
return new Builder(
leafName(),
builder.indexCreatedVersion(),
builder.analyzers.indexAnalyzers,
fieldType().isSyntheticSourceEnabled(),
fieldType().isWithinMultiField()
).init(this);
}

@Override
Expand All @@ -568,11 +617,32 @@ protected void write(XContentBuilder b, Object value) throws IOException {
});
}

return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
}

private SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fullFieldName, String leafFieldName) {
// since we don't know whether the delegate field loader can be used for synthetic source until parsing, we need to check both this
// field and the delegate

// first field loader - to check whether the field's value was stored under this match_only_text field
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
final var thisFieldLayer = new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
@Override
protected void writeValue(Object value, XContentBuilder b) throws IOException {
b.value(value.toString());
}
};

final CompositeSyntheticFieldLoader fieldLoader = new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, thisFieldLayer);

// second loader - to check whether the field's value was stored by a keyword delegate field
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
if (kwd != null) {
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
// merge the two field loaders into one
var kwdFieldLoader = kwd.syntheticFieldLoader(fullPath(), leafName());
return fieldLoader.mergedWith(kwdFieldLoader);
}

return super.syntheticSourceSupport();
return fieldLoader;
}
}
Loading