elastic · elasticsearchmachine · Sep 23, 2024 · Sep 23, 2024
diff --git a/docs/reference/mapping/params/ignore-above.asciidoc b/docs/reference/mapping/params/ignore-above.asciidoc
@@ -57,3 +57,33 @@ NOTE: The value for `ignore_above` is the _character count_, but Lucene counts
 bytes. If you use UTF-8 text with many non-ASCII characters, you may want to
 set the limit to `32766 / 4 = 8191` since UTF-8 characters may occupy at most
 4 bytes.
+
+[[index-mapping-ignore-above]]
+=== `index.mapping.ignore_above`
+
+The `ignore_above` setting, typically used at the field level, can also be applied at the index level using
+`index.mapping.ignore_above`. This setting lets you define a maximum string length for all applicable fields across
+the index, including `keyword`, `wildcard`, and keyword values in `flattened` fields. Any values that exceed this
+limit will be ignored during indexing and won’t be stored.
+
+This index-wide setting ensures a consistent approach to managing excessively long values. It works the same as the
+field-level setting—if a string’s length goes over the specified limit, that string won’t be indexed or stored.
+When dealing with arrays, each element is evaluated separately, and only the elements that exceed the limit are ignored.
+
+[source,console]
+--------------------------------------------------
+PUT my-index-000001
+{
+  "settings": {
+    "index.mapping.ignore_above": 256
+  }
+}
+--------------------------------------------------
+
+In this example, all applicable fields in `my-index-000001` will ignore any strings longer than 256 characters.
+
+TIP: You can override this index-wide setting for specific fields by specifying a custom `ignore_above` value in the
+field mapping.
+
+NOTE: Just like the field-level `ignore_above`, this setting only affects indexing and storage. The original values
+are still available in the `_source` field if `_source` is enabled, which is the default behavior in Elasticsearch.
diff --git a/...c/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml b/...c/src/yamlRestTest/resources/rest-api-spec/test/search/530_ignore_above_stored_source.yml
@@ -0,0 +1,214 @@
+---
+ignore_above mapping level setting:
+  - requires:
+      cluster_features: [ "mapper.ignore_above_index_level_setting" ]
+      reason: introduce ignore_above index level setting
+  - do:
+      indices.create:
+        index:  test
+        body:
+          settings:
+            index:
+              mapping:
+                ignore_above: 10
+          mappings:
+            properties:
+              keyword:
+                type: keyword
+              flattened:
+                type: flattened
+
+  - do:
+      index:
+        index:  test
+        refresh: true
+        id: "1"
+        body:   { "keyword": "foo bar", "flattened": { "value": "the quick brown fox" } }
+
+  - do:
+      search:
+        body:
+          fields:
+            - keyword
+            - flattened
+          query:
+            match_all: {}
+
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._source.keyword: "foo bar" }
+  - match: { hits.hits.0._source.flattened.value: "the quick brown fox" }
+  - match: { hits.hits.0.fields.keyword.0: "foo bar" }
+  - match: { hits.hits.0.fields.flattened: null }
+
+---
+ignore_above mapping level setting on arrays:
+  - requires:
+      cluster_features: [ "mapper.ignore_above_index_level_setting" ]
+      reason: introduce ignore_above index level setting
+  - do:
+      indices.create:
+        index:  test
+        body:
+          settings:
+            index:
+              mapping:
+                ignore_above: 10
+          mappings:
+            properties:
+              keyword:
+                type: keyword
+              flattened:
+                type: flattened
+
+  - do:
+      index:
+        index:  test
+        refresh: true
+        id: "1"
+        body:   { "keyword": ["foo bar", "the quick brown fox"], "flattened": { "value": ["the quick brown fox", "jumps over"] } }
+
+  - do:
+      search:
+        body:
+          fields:
+            - keyword
+            - flattened
+          query:
+            match_all: {}
+
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._source.keyword: ["foo bar", "the quick brown fox"] }
+  - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over"] }
+  - match: { hits.hits.0.fields.keyword.0: "foo bar" }
+  - match: { hits.hits.0.fields.flattened.0.value: "jumps over" }
+
+---
+ignore_above mapping overrides setting:
+  - requires:
+      cluster_features: [ "mapper.ignore_above_index_level_setting" ]
+      reason: introduce ignore_above index level setting
+  - do:
+      indices.create:
+        index:  test
+        body:
+          settings:
+            index:
+              mapping:
+                ignore_above: 10
+          mappings:
+            properties:
+              keyword:
+                type: keyword
+                ignore_above: 100
+              flattened:
+                type: flattened
+                ignore_above: 100
+
+  - do:
+      index:
+        index:  test
+        refresh: true
+        id: "1"
+        body:   { "keyword": "foo bar baz foo bar baz", "flattened": { "value": "the quick brown fox" } }
+
+  - do:
+      search:
+        body:
+          fields:
+            - keyword
+            - flattened
+          query:
+            match_all: { }
+
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._source.keyword: "foo bar baz foo bar baz" }
+  - match: { hits.hits.0._source.flattened.value: "the quick brown fox" }
+  - match: { hits.hits.0.fields.keyword.0: "foo bar baz foo bar baz" }
+  - match: { hits.hits.0.fields.flattened.0.value: "the quick brown fox" }
+
+---
+ignore_above mapping overrides setting on arrays:
+  - requires:
+      cluster_features: [ "mapper.ignore_above_index_level_setting" ]
+      reason: introduce ignore_above index level setting
+  - do:
+      indices.create:
+        index: test
+        body:
+          settings:
+            index:
+              mapping:
+                ignore_above: 10
+          mappings:
+            properties:
+              keyword:
+                type: keyword
+                ignore_above: 100
+              flattened:
+                type: flattened
+                ignore_above: 100
+
+  - do:
+      index:
+        index: test
+        refresh: true
+        id: "1"
+        body: { "keyword": ["foo bar baz foo bar baz", "the quick brown fox jumps over"], "flattened": { "value": ["the quick brown fox", "jumps over the lazy dog"] } }
+
+  - do:
+      search:
+        body:
+          fields:
+            - keyword
+            - flattened
+          query:
+            match_all: { }
+
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._source.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] }
+  - match: { hits.hits.0._source.flattened.value: ["the quick brown fox", "jumps over the lazy dog"] }
+  - match: { hits.hits.0.fields.keyword: ["foo bar baz foo bar baz", "the quick brown fox jumps over"] }
+  - match: { hits.hits.0.fields.flattened.0.value: ["the quick brown fox", "jumps over the lazy dog"] }
+
+---
+date ignore_above index level setting:
+  - requires:
+      cluster_features: [ "mapper.ignore_above_index_level_setting" ]
+      reason: introduce ignore_above index level setting
+  - do:
+      indices.create:
+        index:  test
+        body:
+          settings:
+            index:
+              mapping:
+                ignore_above: 10
+          mappings:
+            properties:
+              keyword:
+                type: keyword
+              date:
+                type: date
+                format: "yyyy-MM-dd'T'HH:mm:ss"
+
+  - do:
+      index:
+        index:  test
+        refresh: true
+        id: "1"
+        body:   { "keyword": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"], "date": ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] }
+
+  - do:
+      search:
+        body:
+          fields:
+            - keyword
+            - date
+          query:
+            match_all: {}
+
+  - length: { hits.hits: 1 }
+  - match: { hits.hits.0._source.keyword: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] }
+  - match: { hits.hits.0._source.date: ["2023-09-17T15:30:00", "2023-09-17T15:31:00"] }
+  - match: { hits.hits.0.fields.keyword: null }
+  - match: { hits.hits.0.fields.date: ["2023-09-17T15:30:00","2023-09-17T15:31:00"] }