diff --git a/docs/reference/mapping/params/ignore-above.asciidoc b/docs/reference/mapping/params/ignore-above.asciidoc index 7d04bc82dcbb3..526f2d6205961 100644 --- a/docs/reference/mapping/params/ignore-above.asciidoc +++ b/docs/reference/mapping/params/ignore-above.asciidoc @@ -57,3 +57,33 @@ NOTE: The value for `ignore_above` is the _character count_, but Lucene counts bytes. If you use UTF-8 text with many non-ASCII characters, you may want to set the limit to `32766 / 4 = 8191` since UTF-8 characters may occupy at most 4 bytes. + +[[index-mapping-ignore-above]] +=== `index.mapping.ignore_above` + +The `ignore_above` setting, typically used at the field level, can also be applied at the index level using +`index.mapping.ignore_above`. This setting lets you define a maximum string length for all applicable fields across +the index, including `keyword`, `wildcard`, and keyword values in `flattened` fields. Any values that exceed this +limit will be ignored during indexing and won’t be stored. + +This index-wide setting ensures a consistent approach to managing excessively long values. It works the same as the +field-level setting—if a string’s length goes over the specified limit, that string won’t be indexed or stored. +When dealing with arrays, each element is evaluated separately, and only the elements that exceed the limit are ignored. + +[source,console] +-------------------------------------------------- +PUT my-index-000001 +{ + "settings": { + "index.mapping.ignore_above": 256 + } +} +-------------------------------------------------- + +In this example, all applicable fields in `my-index-000001` will ignore any strings longer than 256 characters. + +TIP: You can override this index-wide setting for specific fields by specifying a custom `ignore_above` value in the +field mapping. + +NOTE: Just like the field-level `ignore_above`, this setting only affects indexing and storage. The original values +are still available in the `_source` field if `_source` is enabled, which is the default behavior in Elasticsearch. diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..1730a49f743d9 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml @@ -0,0 +1,214 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened: null } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over the lazy dog"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["the quick brown fox", "jumps over the lazy dog"] } + +--- +date ignore_above index level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + keyword: + type: keyword + date: + type: date + format: "yyyy-MM-dd'T'HH:mm:ss" + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"], "date": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + + - do: + search: + body: + fields: + - keyword + - date + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0._source.date: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] } + - match: { hits.hits.0.fields.keyword: null } + - match: { hits.hits.0.fields.date: ["2023-09-17T15:30:00","2023-09-17T15:31:00"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..defdc8467bf8d --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/540_ignore_above_synthetic_source.yml @@ -0,0 +1,179 @@ +--- +ignore_above mapping level setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + +--- +ignore_above mapping level setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + flattened: + type: flattened + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: {} + + - length: { hits.hits: 1 } + #TODO: synthetic source field reconstruction bug (TBD: add link to the issue here) + #- match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] } + - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] } + - match: { hits.hits.0.fields.keyword.0: "foo bar" } + - match: { hits.hits.0.fields.flattened.0.value: "jumps over" } + +--- +ignore_above mapping overrides setting: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" } + - match: { hits.hits.0._source.flattened.value: "the quick brown fox" } + - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" } + - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" } + +--- +ignore_above mapping overrides setting on arrays: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + keyword: + type: keyword + ignore_above: 100 + flattened: + type: flattened + ignore_above: 100 + + - do: + index: + index: test + refresh: true + id: "1" + body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } } + + - do: + search: + body: + fields: + - keyword + - flattened + query: + match_all: { } + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0._source.flattened.value: ["jumps over the lazy dog", "the quick brown fox"] } + - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] } + - match: { hits.hits.0.fields.flattened.0.value: ["jumps over the lazy dog", "the quick brown fox"] } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml new file mode 100644 index 0000000000000..3c29845871fe7 --- /dev/null +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/550_ignore_above_invalid.yml @@ -0,0 +1,63 @@ +--- +ignore_above index setting negative value: + - do: + catch: bad_request + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: -1 + mappings: + properties: + keyword: + type: keyword + +--- +keyword ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + keyword: + ignore_above: -2 + type: keyword + +--- +flattened ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + flattened: + ignore_above: -2 + type: flattened + +--- +wildcard ignore_above mapping setting negative value: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + catch: bad_request + indices.create: + index: test + body: + mappings: + properties: + wildcard: + ignore_above: -2 + type: wildcard diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 69abb59689c00..ad3d7d7f1c2ec 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -151,6 +151,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.INDEX_SEARCH_IDLE_AFTER, IndexSettings.INDEX_SEARCH_THROTTLED, IndexFieldDataService.INDEX_FIELDDATA_CACHE_KEY, + IndexSettings.IGNORE_ABOVE_SETTING, FieldMapper.IGNORE_MALFORMED_SETTING, FieldMapper.COERCE_SETTING, Store.INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 92abdb39acf56..f6ad5e22b9ed7 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -25,6 +25,7 @@ import org.elasticsearch.common.unit.ByteSizeUnit; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.translog.Translog; @@ -697,6 +698,31 @@ public Iterator> settings() { Property.IndexSettingDeprecatedInV7AndRemovedInV8 ); + /** + * The `index.mapping.ignore_above` setting defines the maximum length for the content of a field that will be indexed + * or stored. If the length of the field’s content exceeds this limit, the field value will be ignored during indexing. + * This setting is useful for `keyword`, `flattened`, and `wildcard` fields where very large values are undesirable. + * It allows users to manage the size of indexed data by skipping fields with excessively long content. As an index-level + * setting, it applies to all `keyword` and `wildcard` fields, as well as to keyword values within `flattened` fields. + * When it comes to arrays, the `ignore_above` setting applies individually to each element of the array. If any element's + * length exceeds the specified limit, only that element will be ignored during indexing, while the rest of the array will + * still be processed. This behavior is consistent with the field-level `ignore_above` setting. + * This setting can be overridden at the field level by specifying a custom `ignore_above` value in the field mapping. + *

+ * Example usage: + *

+     * "index.mapping.ignore_above": 256
+     * 
+ */ + public static final Setting IGNORE_ABOVE_SETTING = Setting.intSetting( + "index.mapping.ignore_above", + Integer.MAX_VALUE, + 0, + Property.IndexScope, + Property.ServerlessPublic + ); + public static final NodeFeature IGNORE_ABOVE_INDEX_LEVEL_SETTING = new NodeFeature("mapper.ignore_above_index_level_setting"); + private final Index index; private final IndexVersion version; private final Logger logger; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index 2da8d32773733..46b1dbdce4c4b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -79,6 +79,7 @@ import static org.apache.lucene.index.IndexWriter.MAX_TERM_LENGTH; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; /** * A field mapper for keywords. This mapper accepts strings and indexes them as-is. @@ -110,8 +111,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } public static class KeywordField extends Field { @@ -158,12 +157,8 @@ public static final class Builder extends FieldMapper.DimensionBuilder { m -> toType(m).fieldType().eagerGlobalOrdinals(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> toType(m).fieldType().ignoreAbove(), - Defaults.IGNORE_ABOVE - ); + private final Parameter ignoreAbove; + private final int ignoreAboveDefault; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> toType(m).indexOptions); private final Parameter hasNorms = TextParams.norms(false, m -> toType(m).fieldType.omitNorms() == false); @@ -193,7 +188,23 @@ public static final class Builder extends FieldMapper.DimensionBuilder { private final ScriptCompiler scriptCompiler; private final IndexVersion indexCreatedVersion; - public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler scriptCompiler, IndexVersion indexCreatedVersion) { + public Builder(final String name, final MappingParserContext mappingParserContext) { + this( + name, + mappingParserContext.getIndexAnalyzers(), + mappingParserContext.scriptCompiler(), + IGNORE_ABOVE_SETTING.get(mappingParserContext.getSettings()), + mappingParserContext.getIndexSettings().getIndexVersionCreated() + ); + } + + Builder( + String name, + IndexAnalyzers indexAnalyzers, + ScriptCompiler scriptCompiler, + int ignoreAboveDefault, + IndexVersion indexCreatedVersion + ) { super(name); this.indexAnalyzers = indexAnalyzers; this.scriptCompiler = Objects.requireNonNull(scriptCompiler); @@ -220,10 +231,17 @@ public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler script ); } }).precludesParameters(normalizer); + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).fieldType().ignoreAbove(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } public Builder(String name, IndexVersion indexCreatedVersion) { - this(name, null, ScriptCompiler.NONE, indexCreatedVersion); + this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion); } public Builder ignoreAbove(int ignoreAbove) { @@ -370,10 +388,7 @@ public KeywordFieldMapper build(MapperBuilderContext context) { private static final IndexVersion MINIMUM_COMPATIBILITY_VERSION = IndexVersion.fromId(5000099); - public static final TypeParser PARSER = new TypeParser( - (n, c) -> new Builder(n, c.getIndexAnalyzers(), c.scriptCompiler(), c.indexVersionCreated()), - MINIMUM_COMPATIBILITY_VERSION - ); + public static final TypeParser PARSER = new TypeParser(Builder::new, MINIMUM_COMPATIBILITY_VERSION); public static final class KeywordFieldType extends StringFieldType { @@ -865,6 +880,8 @@ public boolean hasNormalizer() { private final boolean isSyntheticSource; private final IndexAnalyzers indexAnalyzers; + private final int ignoreAboveDefault; + private final int ignoreAbove; private KeywordFieldMapper( String simpleName, @@ -887,6 +904,8 @@ private KeywordFieldMapper( this.scriptCompiler = builder.scriptCompiler; this.indexCreatedVersion = builder.indexCreatedVersion; this.isSyntheticSource = isSyntheticSource; + this.ignoreAboveDefault = builder.ignoreAboveDefault; + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -1004,7 +1023,9 @@ public Map indexAnalyzers() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexAnalyzers, scriptCompiler, indexCreatedVersion).dimension(fieldType().isDimension()).init(this); + return new Builder(leafName(), indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion).dimension( + fieldType().isDimension() + ).init(this); } @Override @@ -1072,7 +1093,7 @@ protected BytesRef preserve(BytesRef value) { }); } - if (fieldType().ignoreAbove != Defaults.IGNORE_ABOVE) { + if (fieldType().ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index d18c3283ef909..d2ca7a24a78fd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -11,6 +11,7 @@ import org.elasticsearch.features.FeatureSpecification; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -41,6 +42,7 @@ public Set getFeatures() { SourceFieldMapper.SYNTHETIC_SOURCE_WITH_COPY_TO_AND_DOC_VALUES_FALSE_SUPPORT, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_FIX, FlattenedFieldMapper.IGNORE_ABOVE_SUPPORT, + IndexSettings.IGNORE_ABOVE_INDEX_LEVEL_SETTING, SourceFieldMapper.SYNTHETIC_SOURCE_COPY_TO_INSIDE_OBJECTS_FIX ); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java index 43890abf25aa7..2c504262c35ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapper.java @@ -82,6 +82,8 @@ import java.util.Set; import java.util.function.Function; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A field mapper that accepts a JSON object and flattens it into a single field. This data type * can be a useful alternative to an 'object' mapping when the object has a large, unknown set @@ -123,6 +125,9 @@ private static Builder builder(Mapper in) { return ((FlattenedFieldMapper) in).builder; } + private final int ignoreAboveDefault; + private final int ignoreAbove; + public static class Builder extends FieldMapper.Builder { final Parameter depthLimit = Parameter.intParam( @@ -148,12 +153,8 @@ public static class Builder extends FieldMapper.Builder { m -> builder(m).eagerGlobalOrdinals.get(), false ); - private final Parameter ignoreAbove = Parameter.intParam( - "ignore_above", - true, - m -> builder(m).ignoreAbove.get(), - Integer.MAX_VALUE - ); + private final int ignoreAboveDefault; + private final Parameter ignoreAbove; private final Parameter indexOptions = TextParams.keywordIndexOptions(m -> builder(m).indexOptions.get()); private final Parameter similarity = TextParams.similarity(m -> builder(m).similarity.get()); @@ -176,7 +177,7 @@ public static class Builder extends FieldMapper.Builder { + "] are true" ); } - }).precludesParameters(ignoreAbove); + }); private final Parameter> meta = Parameter.metaParam(); @@ -184,8 +185,20 @@ public static FieldMapper.Parameter> dimensionsParam(Function builder(m).ignoreAbove.get(), ignoreAboveDefault) + .addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); + this.dimensions.precludesParameters(ignoreAbove); } @Override @@ -223,11 +236,11 @@ public FlattenedFieldMapper build(MapperBuilderContext context) { dimensions.get(), ignoreAbove.getValue() ); - return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), this); + return new FlattenedFieldMapper(leafName(), ft, builderParams(this, context), ignoreAboveDefault, this); } } - public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n)); + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()))); /** * A field type that represents the values under a particular JSON key, used @@ -808,9 +821,17 @@ public void validateMatchedRoutingPath(final String routingPath) { private final FlattenedFieldParser fieldParser; private final Builder builder; - private FlattenedFieldMapper(String leafName, MappedFieldType mappedFieldType, BuilderParams builderParams, Builder builder) { + private FlattenedFieldMapper( + String leafName, + MappedFieldType mappedFieldType, + BuilderParams builderParams, + int ignoreAboveDefault, + Builder builder + ) { super(leafName, mappedFieldType, builderParams); + this.ignoreAboveDefault = ignoreAboveDefault; this.builder = builder; + this.ignoreAbove = builder.ignoreAbove.get(); this.fieldParser = new FlattenedFieldParser( mappedFieldType.name(), mappedFieldType.name() + KEYED_FIELD_SUFFIX, @@ -835,8 +856,8 @@ int depthLimit() { return builder.depthLimit.get(); } - int ignoreAbove() { - return builder.ignoreAbove.get(); + public int ignoreAbove() { + return ignoreAbove; } @Override @@ -876,7 +897,7 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName()).init(this); + return new Builder(leafName(), ignoreAboveDefault).init(this); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java index 7e5cc5045c100..b4c7ea0ed9508 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldTypeTests.java @@ -243,6 +243,7 @@ public void testFetchSourceValue() throws IOException { "field", createIndexAnalyzers(), ScriptCompiler.NONE, + Integer.MAX_VALUE, IndexVersion.current() ).normalizer("lowercase").build(MapperBuilderContext.root(false, false)).fieldType(); assertEquals(List.of("value"), fetchSourceValue(normalizerMapper, "VALUE")); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java index 06c3125648309..fd024c5d23e28 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldsTests.java @@ -63,6 +63,7 @@ private KeywordFieldMapper.Builder getKeywordFieldMapperBuilder(boolean isStored "field", IndexAnalyzers.of(Map.of(), Map.of("normalizer", Lucene.STANDARD_ANALYZER), Map.of()), ScriptCompiler.NONE, + Integer.MAX_VALUE, IndexVersion.current() ); if (isStored) { diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml new file mode 100644 index 0000000000000..252bafbdbe15a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/20_ignore_above_stored_source.yml @@ -0,0 +1,56 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["foo", "bar", "jumps over the lazy dog"] } + - match: { hits.hits.0._source.d_wildcard: ["foo", "bar", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["foo", "bar"] } + - match: { hits.hits.0.fields.d_wildcard: ["foo", "bar"] } + diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml new file mode 100644 index 0000000000000..f5c9f3d92369a --- /dev/null +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/wildcard/30_ignore_above_synthetic_source.yml @@ -0,0 +1,58 @@ +--- +wildcard field type ignore_above: + - requires: + cluster_features: [ "mapper.ignore_above_index_level_setting" ] + reason: introduce ignore_above index level setting + - do: + indices.create: + index: test + body: + settings: + index: + mapping: + ignore_above: 10 + mappings: + _source: + mode: synthetic + properties: + a_wildcard: + type: wildcard + b_wildcard: + type: wildcard + ignore_above: 20 + c_wildcard: + type: wildcard + d_wildcard: + type: wildcard + ignore_above: 5 + + + + - do: + index: + index: test + refresh: true + id: "1" + body: { "a_wildcard": "foo bar", "b_wildcard": "the quick brown", "c_wildcard": ["foo", "bar", "jumps over the lazy dog"], "d_wildcard": ["foo", "bar", "the quick"]} + + - do: + search: + body: + fields: + - a_wildcard + - b_wildcard + - c_wildcard + - d_wildcard + query: + match_all: {} + + - length: { hits.hits: 1 } + - match: { hits.hits.0._source.a_wildcard: "foo bar" } + - match: { hits.hits.0._source.b_wildcard: "the quick brown" } + - match: { hits.hits.0._source.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0._source.d_wildcard: ["bar", "foo", "the quick"] } + - match: { hits.hits.0.fields.a_wildcard.0: "foo bar" } + - match: { hits.hits.0.fields.b_wildcard.0: "the quick brown" } + - match: { hits.hits.0.fields.c_wildcard: ["bar", "foo"] } + - match: { hits.hits.0.fields.d_wildcard: ["bar", "foo"] } + diff --git a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java index 8e4f56e299587..1e97e64371586 100644 --- a/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java +++ b/x-pack/plugin/wildcard/src/main/java/org/elasticsearch/xpack/wildcard/mapper/WildcardFieldMapper.java @@ -87,6 +87,8 @@ import java.util.Map; import java.util.Set; +import static org.elasticsearch.index.IndexSettings.IGNORE_ABOVE_SETTING; + /** * A {@link FieldMapper} for indexing fields with ngrams for efficient wildcard matching */ @@ -191,7 +193,6 @@ public static class Defaults { Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER ); - public static final int IGNORE_ABOVE = Integer.MAX_VALUE; } private static WildcardFieldMapper toType(FieldMapper in) { @@ -200,21 +201,28 @@ private static WildcardFieldMapper toType(FieldMapper in) { public static class Builder extends FieldMapper.Builder { - final Parameter ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, Defaults.IGNORE_ABOVE) - .addValidator(v -> { - if (v < 0) { - throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); - } - }); + final Parameter ignoreAbove; final Parameter nullValue = Parameter.stringParam("null_value", false, m -> toType(m).nullValue, null).acceptsNull(); final Parameter> meta = Parameter.metaParam(); final IndexVersion indexVersionCreated; - public Builder(String name, IndexVersion indexVersionCreated) { + final int ignoreAboveDefault; + + public Builder(final String name, IndexVersion indexVersionCreated) { + this(name, Integer.MAX_VALUE, indexVersionCreated); + } + + private Builder(String name, int ignoreAboveDefault, IndexVersion indexVersionCreated) { super(name); this.indexVersionCreated = indexVersionCreated; + this.ignoreAboveDefault = ignoreAboveDefault; + this.ignoreAbove = Parameter.intParam("ignore_above", true, m -> toType(m).ignoreAbove, ignoreAboveDefault).addValidator(v -> { + if (v < 0) { + throw new IllegalArgumentException("[ignore_above] must be positive, got [" + v + "]"); + } + }); } @Override @@ -236,23 +244,18 @@ Builder nullValue(String nullValue) { public WildcardFieldMapper build(MapperBuilderContext context) { return new WildcardFieldMapper( leafName(), - new WildcardFieldType( - context.buildFullName(leafName()), - nullValue.get(), - ignoreAbove.get(), - indexVersionCreated, - meta.get() - ), - ignoreAbove.get(), + new WildcardFieldType(context.buildFullName(leafName()), indexVersionCreated, meta.get(), this), context.isSourceSynthetic(), builderParams(this, context), - nullValue.get(), - indexVersionCreated + indexVersionCreated, + this ); } } - public static TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated())); + public static TypeParser PARSER = new TypeParser( + (n, c) -> new Builder(n, IGNORE_ABOVE_SETTING.get(c.getSettings()), c.indexVersionCreated()) + ); public static final char TOKEN_START_OR_END_CHAR = 0; public static final String TOKEN_START_STRING = Character.toString(TOKEN_START_OR_END_CHAR); @@ -263,18 +266,18 @@ public static final class WildcardFieldType extends MappedFieldType { static Analyzer lowercaseNormalizer = new LowercaseNormalizer(); private final String nullValue; - private final int ignoreAbove; private final NamedAnalyzer analyzer; + private final int ignoreAbove; - private WildcardFieldType(String name, String nullValue, int ignoreAbove, IndexVersion version, Map meta) { + private WildcardFieldType(String name, IndexVersion version, Map meta, Builder builder) { super(name, true, false, true, Defaults.TEXT_SEARCH_INFO, meta); if (version.onOrAfter(IndexVersions.V_7_10_0)) { this.analyzer = WILDCARD_ANALYZER_7_10; } else { this.analyzer = WILDCARD_ANALYZER_7_9; } - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); + this.ignoreAbove = builder.ignoreAbove.getValue(); } @Override @@ -889,26 +892,27 @@ protected String parseSourceValue(Object value) { NGRAM_FIELD_TYPE = freezeAndDeduplicateFieldType(ft); assert NGRAM_FIELD_TYPE.indexOptions() == IndexOptions.DOCS; } - - private final int ignoreAbove; private final String nullValue; private final IndexVersion indexVersionCreated; + + private final int ignoreAbove; + private final int ignoreAboveDefault; private final boolean storeIgnored; private WildcardFieldMapper( String simpleName, WildcardFieldType mappedFieldType, - int ignoreAbove, boolean storeIgnored, BuilderParams builderParams, - String nullValue, - IndexVersion indexVersionCreated + IndexVersion indexVersionCreated, + Builder builder ) { super(simpleName, mappedFieldType, builderParams); - this.nullValue = nullValue; - this.ignoreAbove = ignoreAbove; + this.nullValue = builder.nullValue.getValue(); this.storeIgnored = storeIgnored; this.indexVersionCreated = indexVersionCreated; + this.ignoreAbove = builder.ignoreAbove.getValue(); + this.ignoreAboveDefault = builder.ignoreAboveDefault; } @Override @@ -983,14 +987,14 @@ protected String contentType() { @Override public FieldMapper.Builder getMergeBuilder() { - return new Builder(leafName(), indexVersionCreated).init(this); + return new Builder(leafName(), ignoreAboveDefault, indexVersionCreated).init(this); } @Override protected SyntheticSourceSupport syntheticSourceSupport() { var layers = new ArrayList(); layers.add(new WildcardSyntheticFieldLoader()); - if (ignoreAbove != Defaults.IGNORE_ABOVE) { + if (ignoreAbove != ignoreAboveDefault) { layers.add(new CompositeSyntheticFieldLoader.StoredFieldLayer(originalName()) { @Override protected void writeValue(Object value, XContentBuilder b) throws IOException { diff --git a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java index 2bea4bb247d8f..d34303ea803d6 100644 --- a/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java +++ b/x-pack/qa/runtime-fields/src/main/java/org/elasticsearch/xpack/runtimefields/test/CoreTestTranslater.java @@ -222,10 +222,32 @@ public boolean modifySections(List executables) { */ protected abstract boolean modifySearch(ApiCallSection search); + private static Object getSetting(final Object map, final String... keys) { + Map current = (Map) map; + for (final String key : keys) { + if (current != null) { + current = (Map) current.get(key); + } else { + return null; + } + } + return current; + } + private boolean modifyCreateIndex(ApiCallSection createIndex) { String index = createIndex.getParams().get("index"); for (Map body : createIndex.getBodies()) { - Object settings = body.get("settings"); + final Object settings = body.get("settings"); + final Object indexMapping = getSetting(settings, "index", "mapping"); + if (indexMapping instanceof Map m) { + final Object ignoreAbove = m.get("ignore_above"); + if (ignoreAbove instanceof Integer ignoreAboveValue) { + if (ignoreAboveValue >= 0) { + // Scripts don't support ignore_above so we skip those fields + continue; + } + } + } if (settings instanceof Map && ((Map) settings).containsKey("sort.field")) { /* * You can't sort the index on a runtime field