elastic · elasticsearchmachine · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024 · Sep 23, 2024
diff --git a/docs/changelog/112645.yaml b/docs/changelog/112645.yaml
@@ -0,0 +1,6 @@
+pr: 112645
+summary: Add support for multi-value dimensions
+area: Mapping
+type: enhancement
+issues:
+ - 110387
diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc
@@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
 Dimension fields have the following constraints:
 
 * The `doc_values` and `index` mapping parameters must be `true`.
-* Field values cannot be an <<array,array or multi-value>>.
 // end::dimension[]
 * Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
 ** The field cannot use a <<normalizer,`normalizer`>>.

diff --git a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml
@@ -1230,3 +1230,83 @@ non string dimension fields:
   - match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true }
   - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' }
   - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true }
+
+---
+multi value dimensions:
+  - requires:
+      cluster_features: ["routing.multi_value_routing_path"]
+      reason: support for multi-value dimensions
+
+  - do:
+      allowed_warnings:
+        - "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation"
+      indices.put_index_template:
+        name: my-dynamic-template
+        body:
+          index_patterns: [k9s*]
+          data_stream: {}
+          template:
+            settings:
+              index:
+                number_of_shards: 1
+                mode: time_series
+                time_series:
+                  start_time: 2023-08-31T13:03:08.138Z
+
+            mappings:
+              properties:
+                attributes:
+                  type: passthrough
+                  dynamic: true
+                  time_series_dimension: true
+                  priority: 1
+              dynamic_templates:
+                - counter_metric:
+                    mapping:
+                      type: integer
+                      time_series_metric: counter
+
+  - do:
+      bulk:
+        index: k9s
+        refresh: true
+        body:
+          - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
+          - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }'
+          - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
+          - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }'
+          - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
+          - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }'
+  - is_false: errors
+
+  - do:
+      search:
+        index: k9s
+        body:
+          size: 0
+          aggs:
+            tsids:
+              terms:
+                field: _tsid
+
+  - length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series
+
+  - do:
+      search:
+        index: k9s
+        body:
+          size: 0
+          aggs:
+            dims:
+              terms:
+                field: dim1
+                order:
+                  _key: asc
+
+  - length: { aggregations.dims.buckets: 3 }
+  - match: { aggregations.dims.buckets.0.key: a }
+  - match: { aggregations.dims.buckets.0.doc_count: 2 }
+  - match: { aggregations.dims.buckets.1.key: b }
+  - match: { aggregations.dims.buckets.1.doc_count: 3 }
+  - match: { aggregations.dims.buckets.2.key: c }
+  - match: { aggregations.dims.buckets.2.doc_count: 1 }
diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml
@@ -119,11 +119,11 @@ missing dimension on routing path field:
                 type: keyword
 
 ---
-multi-value routing path field:
+multi-value routing path field succeeds:
   - requires:
       test_runner_features: close_to
-      cluster_features: ["gte_v8.13.0"]
-      reason: _tsid hashing introduced in 8.13
+      cluster_features: ["routing.multi_value_routing_path"]
+      reason: support for multi-value dimensions
 
   - do:
       indices.create:
@@ -172,12 +172,7 @@ multi-value routing path field:
           - '{"index": {}}'
           - '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }'
 
-  - is_true: errors
-
-  - match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
-  - match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
-  - match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
-  - match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
+  - is_false: errors
 
   - do:
       search:
@@ -195,13 +190,21 @@ multi-value routing path field:
                   avg:
                     field: voltage
 
-  - match: {hits.total.value: 4}
-  - length: {aggregations.tsids.buckets: 2}
+  - match: {hits.total.value: 8}
+  - length: {aggregations.tsids.buckets: 4}
 
-  - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
+  - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" }
   - match: {aggregations.tsids.buckets.0.doc_count: 2 }
   - close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }}
 
-  - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
+  - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" }
   - match: {aggregations.tsids.buckets.1.doc_count: 2 }
-  - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }}
+  - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }}
+
+  - match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
+  - match: {aggregations.tsids.buckets.2.doc_count: 2 }
+  - close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }}
+
+  - match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
+  - match: {aggregations.tsids.buckets.3.doc_count: 2 }
+  - close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }}
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java
@@ -35,7 +35,6 @@
 import java.util.ArrayList;
 import java.util.Base64;
 import java.util.Collections;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -45,13 +44,15 @@
 import java.util.function.Predicate;
 
 import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
+import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken;
 
 /**
  * Generates the shard id for {@code (id, routing)} pairs.
  */
 public abstract class IndexRouting {
 
     static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path");
+    static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path");
 
     /**
      * Build the routing from {@link IndexMetadata}.
@@ -301,7 +302,13 @@ public String createId(Map<String, Object> flat, byte[] suffix) {
             Builder b = builder();
             for (Map.Entry<String, Object> e : flat.entrySet()) {
                 if (isRoutingPath.test(e.getKey())) {
-                    b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString()))));
+                    if (e.getValue() instanceof List<?> listValue) {
+                        for (Object v : listValue) {
+                            b.addHash(e.getKey(), new BytesRef(v.toString()));
+                        }
+                    } else {
+                        b.addHash(e.getKey(), new BytesRef(e.getValue().toString()));
+                    }
                 }
             }
             return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty);
@@ -336,7 +343,7 @@ public class Builder {
 
             public void addMatching(String fieldName, BytesRef string) {
                 if (isRoutingPath.test(fieldName)) {
-                    hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string)));
+                    addHash(fieldName, string);
                 }
             }
 
@@ -357,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws
                 }
             }
 
+            private void extractArray(@Nullable String path, XContentParser source) throws IOException {
+                while (source.currentToken() != Token.END_ARRAY) {
+                    expectValueToken(source.currentToken(), source);
+                    extractItem(path, source);
+                }
+            }
+
             private void extractItem(String path, XContentParser source) throws IOException {
                 switch (source.currentToken()) {
                     case START_OBJECT:
@@ -367,7 +381,12 @@ private void extractItem(String path, XContentParser source) throws IOException
                     case VALUE_STRING:
                     case VALUE_NUMBER:
                     case VALUE_BOOLEAN:
-                        hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text()))));
+                        addHash(path, new BytesRef(source.text()));
+                        source.nextToken();
+                        break;
+                    case START_ARRAY:
+                        source.nextToken();
+                        extractArray(path, source);
                         source.nextToken();
                         break;
                     case VALUE_NULL:
@@ -376,28 +395,24 @@ private void extractItem(String path, XContentParser source) throws IOException
                     default:
                         throw new ParsingException(
                             source.getTokenLocation(),
-                            "Routing values must be strings but found [{}]",
+                            "Cannot extract routing path due to unexpected token [{}]",
                             source.currentToken()
                         );
                 }
             }
 
+            private void addHash(String path, BytesRef value) {
+                hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size()));
+            }
+
             private int buildHash(IntSupplier onEmpty) {
-                Collections.sort(hashes);
-                Iterator<NameAndHash> itr = hashes.iterator();
-                if (itr.hasNext() == false) {
+                if (hashes.isEmpty()) {
                     return onEmpty.getAsInt();
                 }
-                NameAndHash prev = itr.next();
-                int hash = hash(prev.name) ^ prev.hash;
-                while (itr.hasNext()) {
-                    NameAndHash next = itr.next();
-                    if (prev.name.equals(next.name)) {
-                        throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]");
-                    }
-                    int thisHash = hash(next.name) ^ next.hash;
-                    hash = 31 * hash + thisHash;
-                    prev = next;
+                Collections.sort(hashes);
+                int hash = 0;
+                for (NameAndHash nah : hashes) {
+                    hash = 31 * hash + (hash(nah.name) ^ nah.hash);
                 }
                 return hash;
             }
@@ -458,10 +473,13 @@ private String error(String operation) {
         }
     }
 
-    private record NameAndHash(BytesRef name, int hash) implements Comparable<NameAndHash> {
+    private record NameAndHash(BytesRef name, int hash, int order) implements Comparable<NameAndHash> {
         @Override
         public int compareTo(NameAndHash o) {
-            return name.compareTo(o.name);
+            int i = name.compareTo(o.name);
+            if (i != 0) return i;
+            // ensures array values are in the order as they appear in the source
+            return Integer.compare(order, o.order);
         }
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java
@@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification {
 
     @Override
     public Set<NodeFeature> getFeatures() {
-        return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH);
+        return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH);
     }
 }
diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java
@@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar
         }
     }
 
+    /**
+     * Makes sure the provided token {@linkplain Token#isValue() is a value type}
+     *
+     * @throws ParsingException if the token is not a value type
+     */
+    public static void expectValueToken(Token actual, XContentParser parser) {
+        if (actual.isValue() == false) {
+            throw new ParsingException(
+                parser.getTokenLocation(),
+                String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual)
+            );
+        }
+    }
+
     private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) {
         return new ParsingException(
             parser.getTokenLocation(),