Skip to content

Commit 8d223cb

Browse files
authored
Add support for multi-value dimensions (#112645)
Closes #110387 Having this in now affords us not having to introduce version checks in the ES exporter later. We can simply use the same serialization logic for metric attributes as we do for other signals. This also enables us to properly map `*.ip` fields to the ip field type as ip fields containing a list of IPs are not converted to a comma-separated list.
1 parent 71eb158 commit 8d223cb

File tree

19 files changed

+349
-83
lines changed

19 files changed

+349
-83
lines changed

docs/changelog/112645.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 112645
2+
summary: Add support for multi-value dimensions
3+
area: Mapping
4+
type: enhancement
5+
issues:
6+
- 110387

docs/reference/mapping/types/keyword.asciidoc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
163163
Dimension fields have the following constraints:
164164

165165
* The `doc_values` and `index` mapping parameters must be `true`.
166-
* Field values cannot be an <<array,array or multi-value>>.
167166
// end::dimension[]
168167
* Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
169168
** The field cannot use a <<normalizer,`normalizer`>>.

modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,3 +1230,83 @@ non string dimension fields:
12301230
- match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true }
12311231
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' }
12321232
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true }
1233+
1234+
---
1235+
multi value dimensions:
1236+
- requires:
1237+
cluster_features: ["routing.multi_value_routing_path"]
1238+
reason: support for multi-value dimensions
1239+
1240+
- do:
1241+
allowed_warnings:
1242+
- "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation"
1243+
indices.put_index_template:
1244+
name: my-dynamic-template
1245+
body:
1246+
index_patterns: [k9s*]
1247+
data_stream: {}
1248+
template:
1249+
settings:
1250+
index:
1251+
number_of_shards: 1
1252+
mode: time_series
1253+
time_series:
1254+
start_time: 2023-08-31T13:03:08.138Z
1255+
1256+
mappings:
1257+
properties:
1258+
attributes:
1259+
type: passthrough
1260+
dynamic: true
1261+
time_series_dimension: true
1262+
priority: 1
1263+
dynamic_templates:
1264+
- counter_metric:
1265+
mapping:
1266+
type: integer
1267+
time_series_metric: counter
1268+
1269+
- do:
1270+
bulk:
1271+
index: k9s
1272+
refresh: true
1273+
body:
1274+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1275+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }'
1276+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1277+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }'
1278+
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
1279+
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }'
1280+
- is_false: errors
1281+
1282+
- do:
1283+
search:
1284+
index: k9s
1285+
body:
1286+
size: 0
1287+
aggs:
1288+
tsids:
1289+
terms:
1290+
field: _tsid
1291+
1292+
- length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series
1293+
1294+
- do:
1295+
search:
1296+
index: k9s
1297+
body:
1298+
size: 0
1299+
aggs:
1300+
dims:
1301+
terms:
1302+
field: dim1
1303+
order:
1304+
_key: asc
1305+
1306+
- length: { aggregations.dims.buckets: 3 }
1307+
- match: { aggregations.dims.buckets.0.key: a }
1308+
- match: { aggregations.dims.buckets.0.doc_count: 2 }
1309+
- match: { aggregations.dims.buckets.1.key: b }
1310+
- match: { aggregations.dims.buckets.1.doc_count: 3 }
1311+
- match: { aggregations.dims.buckets.2.key: c }
1312+
- match: { aggregations.dims.buckets.2.doc_count: 1 }

rest-api-spec/build.gradle

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,7 @@ tasks.register('enforceYamlTestConvention').configure {
5353
tasks.named("precommit").configure {
5454
dependsOn 'enforceYamlTestConvention'
5555
}
56+
57+
tasks.named("yamlRestCompatTestTransform").configure({ task ->
58+
task.skipTest("tsdb/140_routing_path/multi-value routing path field", "Multi-value routing paths are allowed now. See #112645")
59+
})

rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,11 @@ missing dimension on routing path field:
119119
type: keyword
120120

121121
---
122-
multi-value routing path field:
122+
multi-value routing path field succeeds:
123123
- requires:
124124
test_runner_features: close_to
125-
cluster_features: ["gte_v8.13.0"]
126-
reason: _tsid hashing introduced in 8.13
125+
cluster_features: ["routing.multi_value_routing_path"]
126+
reason: support for multi-value dimensions
127127

128128
- do:
129129
indices.create:
@@ -172,12 +172,7 @@ multi-value routing path field:
172172
- '{"index": {}}'
173173
- '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }'
174174

175-
- is_true: errors
176-
177-
- match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
178-
- match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
179-
- match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
180-
- match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
175+
- is_false: errors
181176

182177
- do:
183178
search:
@@ -195,13 +190,21 @@ multi-value routing path field:
195190
avg:
196191
field: voltage
197192

198-
- match: {hits.total.value: 4}
199-
- length: {aggregations.tsids.buckets: 2}
193+
- match: {hits.total.value: 8}
194+
- length: {aggregations.tsids.buckets: 4}
200195

201-
- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
196+
- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" }
202197
- match: {aggregations.tsids.buckets.0.doc_count: 2 }
203198
- close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }}
204199

205-
- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
200+
- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" }
206201
- match: {aggregations.tsids.buckets.1.doc_count: 2 }
207-
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }}
202+
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }}
203+
204+
- match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
205+
- match: {aggregations.tsids.buckets.2.doc_count: 2 }
206+
- close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }}
207+
208+
- match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
209+
- match: {aggregations.tsids.buckets.3.doc_count: 2 }
210+
- close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }}

server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java

Lines changed: 38 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import java.util.ArrayList;
3737
import java.util.Base64;
3838
import java.util.Collections;
39-
import java.util.Iterator;
4039
import java.util.List;
4140
import java.util.Map;
4241
import java.util.Set;
@@ -46,13 +45,15 @@
4645
import java.util.function.Predicate;
4746

4847
import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
48+
import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken;
4949

5050
/**
5151
* Generates the shard id for {@code (id, routing)} pairs.
5252
*/
5353
public abstract class IndexRouting {
5454

5555
static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path");
56+
static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path");
5657

5758
/**
5859
* Build the routing from {@link IndexMetadata}.
@@ -302,7 +303,13 @@ public String createId(Map<String, Object> flat, byte[] suffix) {
302303
Builder b = builder();
303304
for (Map.Entry<String, Object> e : flat.entrySet()) {
304305
if (isRoutingPath.test(e.getKey())) {
305-
b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString()))));
306+
if (e.getValue() instanceof List<?> listValue) {
307+
for (Object v : listValue) {
308+
b.addHash(e.getKey(), new BytesRef(v.toString()));
309+
}
310+
} else {
311+
b.addHash(e.getKey(), new BytesRef(e.getValue().toString()));
312+
}
306313
}
307314
}
308315
return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty);
@@ -337,7 +344,7 @@ public class Builder {
337344

338345
public void addMatching(String fieldName, BytesRef string) {
339346
if (isRoutingPath.test(fieldName)) {
340-
hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string)));
347+
addHash(fieldName, string);
341348
}
342349
}
343350

@@ -358,6 +365,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws
358365
}
359366
}
360367

368+
private void extractArray(@Nullable String path, XContentParser source) throws IOException {
369+
while (source.currentToken() != Token.END_ARRAY) {
370+
expectValueToken(source.currentToken(), source);
371+
extractItem(path, source);
372+
}
373+
}
374+
361375
private void extractItem(String path, XContentParser source) throws IOException {
362376
switch (source.currentToken()) {
363377
case START_OBJECT:
@@ -368,7 +382,12 @@ private void extractItem(String path, XContentParser source) throws IOException
368382
case VALUE_STRING:
369383
case VALUE_NUMBER:
370384
case VALUE_BOOLEAN:
371-
hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text()))));
385+
addHash(path, new BytesRef(source.text()));
386+
source.nextToken();
387+
break;
388+
case START_ARRAY:
389+
source.nextToken();
390+
extractArray(path, source);
372391
source.nextToken();
373392
break;
374393
case VALUE_NULL:
@@ -377,28 +396,24 @@ private void extractItem(String path, XContentParser source) throws IOException
377396
default:
378397
throw new ParsingException(
379398
source.getTokenLocation(),
380-
"Routing values must be strings but found [{}]",
399+
"Cannot extract routing path due to unexpected token [{}]",
381400
source.currentToken()
382401
);
383402
}
384403
}
385404

405+
private void addHash(String path, BytesRef value) {
406+
hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size()));
407+
}
408+
386409
private int buildHash(IntSupplier onEmpty) {
387-
Collections.sort(hashes);
388-
Iterator<NameAndHash> itr = hashes.iterator();
389-
if (itr.hasNext() == false) {
410+
if (hashes.isEmpty()) {
390411
return onEmpty.getAsInt();
391412
}
392-
NameAndHash prev = itr.next();
393-
int hash = hash(prev.name) ^ prev.hash;
394-
while (itr.hasNext()) {
395-
NameAndHash next = itr.next();
396-
if (prev.name.equals(next.name)) {
397-
throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]");
398-
}
399-
int thisHash = hash(next.name) ^ next.hash;
400-
hash = 31 * hash + thisHash;
401-
prev = next;
413+
Collections.sort(hashes);
414+
int hash = 0;
415+
for (NameAndHash nah : hashes) {
416+
hash = 31 * hash + (hash(nah.name) ^ nah.hash);
402417
}
403418
return hash;
404419
}
@@ -459,10 +474,13 @@ private String error(String operation) {
459474
}
460475
}
461476

462-
private record NameAndHash(BytesRef name, int hash) implements Comparable<NameAndHash> {
477+
private record NameAndHash(BytesRef name, int hash, int order) implements Comparable<NameAndHash> {
463478
@Override
464479
public int compareTo(NameAndHash o) {
465-
return name.compareTo(o.name);
480+
int i = name.compareTo(o.name);
481+
if (i != 0) return i;
482+
// ensures array values are in the order as they appear in the source
483+
return Integer.compare(order, o.order);
466484
}
467485
}
468486
}

server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification {
1818

1919
@Override
2020
public Set<NodeFeature> getFeatures() {
21-
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH);
21+
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH);
2222
}
2323
}

server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar
7272
}
7373
}
7474

75+
/**
76+
* Makes sure the provided token {@linkplain Token#isValue() is a value type}
77+
*
78+
* @throws ParsingException if the token is not a value type
79+
*/
80+
public static void expectValueToken(Token actual, XContentParser parser) {
81+
if (actual.isValue() == false) {
82+
throw new ParsingException(
83+
parser.getTokenLocation(),
84+
String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual)
85+
);
86+
}
87+
}
88+
7589
private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) {
7690
return new ParsingException(
7791
parser.getTokenLocation(),

0 commit comments

Comments
 (0)