diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index c80a9137a401d..667c30d32bee4 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1,3 +1,4 @@ +--- object with unmapped fields: - requires: cluster_features: ["mapper.ignored_source.dont_expand_dots"] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml index 07cb154449a70..499d805c6e500 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml @@ -600,3 +600,284 @@ end time not allowed in logs mode: - match: { error.root_cause.0.type: "illegal_argument_exception" } - match: { error.type: "illegal_argument_exception" } - match: { error.reason: "[index.time_series.end_time] requires [index.mode=time_series]" } + +--- +ignore dynamic beyond limit logsdb default value: + - requires: + cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ] + reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit` + + - do: + indices.create: + index: test-ignore-dynamic-default + body: + settings: + index: + mode: logsdb + + - do: + indices.get_settings: + index: test-ignore-dynamic-default + include_defaults: true + + - match: { test-ignore-dynamic-default.settings.index.mode: "logsdb" } + - match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.limit: "1000" } + - match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.ignore_dynamic_beyond_limit: "true" } + +--- +ignore dynamic beyond limit logsdb override value: + - requires: + cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ] + reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit` + + - do: + indices.create: + index: test-ignore-dynamic-override + body: + settings: + index: + mode: logsdb + mapping: + total_fields: + ignore_dynamic_beyond_limit: false + + - do: + indices.get_settings: + index: test-ignore-dynamic-override + + - match: { test-ignore-dynamic-override.settings.index.mode: "logsdb" } + - match: { test-ignore-dynamic-override.settings.index.mapping.total_fields.ignore_dynamic_beyond_limit: "false" } + +--- +logsdb with default ignore dynamic beyond limit and default sorting: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-default-sort + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name` + # keyword field (`host.name`). + # + # As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name` + # and `name`). We cannot use a field limit lower than 4 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 4 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-default-sort + + - match: { test-logsdb-default-sort.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-default-sort + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-default-sort + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "bar" } + - match: { hits.hits.0._source.value: 20 } + - match: { hits.hits.0._source.message: "jumps over the lazy dog" } + - match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "foo" } + - match: { hits.hits.1._source.value: 10 } + - match: { hits.hits.1._source.message: "the quick brown fox" } + - match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] } + +--- +logsdb with default ignore dynamic beyond limit and non-default sorting: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-non-default-sort + body: + settings: + index: + sort.field: [ "name" ] + sort.order: [ "desc" ] + mode: logsdb + mapping: + # NOTE: Here sort settings are overridden and we do not have any additional statically mapped field other + # than `name` and `timestamp`. As a result, there are only 2 statically mapped fields. + total_fields: + limit: 2 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-non-default-sort + + - match: { test-logsdb-non-default-sort.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-non-default-sort + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-non-default-sort + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "foo" } + - match: { hits.hits.0._source.value: 10 } + - match: { hits.hits.0._source.message: "the quick brown fox" } + - match: { hits.hits.0._ignored: [ "host", "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "bar" } + - match: { hits.hits.1._source.value: 20 } + - match: { hits.hits.1._source.message: "jumps over the lazy dog" } + - match: { hits.hits.1._ignored: [ "host", "message", "pid", "region", "value" ] } + +--- +logsdb with default ignore dynamic beyond limit and too low limit: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + catch: bad_request + indices.create: + index: test-logsdb-low-limit + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name` + # keyword field (`host.name`). + # + # As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name` + # and `name`). We cannot use a field limit lower than 4 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 3 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + - match: { error.type: "illegal_argument_exception" } + - match: { error.reason: "Limit of total fields [3] has been exceeded" } + +--- +logsdb with default ignore dynamic beyond limit and subobjects false: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-subobjects-false + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `false` anyway, a single `host.name` keyword field is automatically mapped. + # + # As a result, there are just 3 statically mapped fields (`@timestamp`, `host.name` and `name`). + # We cannot use a field limit lower than 3 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 3 + mappings: + subobjects: false + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-subobjects-false + + - match: { test-logsdb-subobjects-false.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-subobjects-false + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-subobjects-false + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "bar" } + - match: { hits.hits.0._source.value: 20 } + - match: { hits.hits.0._source.message: "jumps over the lazy dog" } + - match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "foo" } + - match: { hits.hits.1._source.value: 10 } + - match: { hits.hits.1._source.message: "the quick brown fox" } + - match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index f55d78ad7a550..ac6fe42255fd2 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -119,6 +119,7 @@ private static IndexVersion def(int id, Version luceneVersion) { public static final IndexVersion UPGRADE_TO_LUCENE_9_12 = def(8_516_00_0, Version.LUCENE_9_12_0); public static final IndexVersion ENABLE_IGNORE_ABOVE_LOGSDB = def(8_517_00_0, Version.LUCENE_9_12_0); public static final IndexVersion ADD_ROLE_MAPPING_CLEANUP_MIGRATION = def(8_518_00_0, Version.LUCENE_9_12_0); + public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index d0aa672863bea..cebab843856b9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -57,7 +57,8 @@ public Set getTestFeatures() { RangeFieldMapper.DATE_RANGE_INDEXING_FIX, IgnoredSourceFieldMapper.DONT_EXPAND_DOTS_IN_IGNORED_SOURCE, SourceFieldMapper.REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION, - IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS + IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, + MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index d57475a562539..39dbc19f0d56c 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -23,9 +23,12 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -122,9 +125,18 @@ public boolean isAutoUpdate() { Property.IndexScope, Property.ServerlessPublic ); + + public static final NodeFeature LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = new NodeFeature( + "mapper.logsdb_default_ignore_dynamic_beyond_limit" + ); public static final Setting INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_LIMIT_SETTING = Setting.boolSetting( "index.mapping.total_fields.ignore_dynamic_beyond_limit", - false, + settings -> { + boolean isLogsDBIndexMode = IndexSettings.MODE.get(settings) == IndexMode.LOGSDB; + boolean isNewIndexVersion = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings) + .onOrAfter(IndexVersions.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT); + return String.valueOf(isLogsDBIndexMode && isNewIndexVersion); + }, Property.Dynamic, Property.IndexScope );