Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/112645.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 112645
summary: Add support for multi-value dimensions
area: Mapping
type: enhancement
issues:
- 110387
1 change: 0 additions & 1 deletion docs/reference/mapping/types/keyword.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
Dimension fields have the following constraints:

* The `doc_values` and `index` mapping parameters must be `true`.
* Field values cannot be an <<array,array or multi-value>>.
// end::dimension[]
* Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
** The field cannot use a <<normalizer,`normalizer`>>.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1230,3 +1230,83 @@ non string dimension fields:
- match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true }
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' }
- match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true }

---
multi value dimensions:
- requires:
cluster_features: ["routing.multi_value_routing_path"]
reason: support for multi-value dimensions

- do:
allowed_warnings:
- "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation"
indices.put_index_template:
name: my-dynamic-template
body:
index_patterns: [k9s*]
data_stream: {}
template:
settings:
index:
number_of_shards: 1
mode: time_series
time_series:
start_time: 2023-08-31T13:03:08.138Z

mappings:
properties:
attributes:
type: passthrough
dynamic: true
time_series_dimension: true
priority: 1
dynamic_templates:
- counter_metric:
mapping:
type: integer
time_series_metric: counter

- do:
bulk:
index: k9s
refresh: true
body:
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }'
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }'
- '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }'
- '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }'
- is_false: errors

- do:
search:
index: k9s
body:
size: 0
aggs:
tsids:
terms:
field: _tsid

- length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series

- do:
search:
index: k9s
body:
size: 0
aggs:
dims:
terms:
field: dim1
order:
_key: asc

- length: { aggregations.dims.buckets: 3 }
- match: { aggregations.dims.buckets.0.key: a }
- match: { aggregations.dims.buckets.0.doc_count: 2 }
- match: { aggregations.dims.buckets.1.key: b }
- match: { aggregations.dims.buckets.1.doc_count: 3 }
- match: { aggregations.dims.buckets.2.key: c }
- match: { aggregations.dims.buckets.2.doc_count: 1 }
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,11 @@ missing dimension on routing path field:
type: keyword

---
multi-value routing path field:
multi-value routing path field succeeds:
- requires:
test_runner_features: close_to
cluster_features: ["gte_v8.13.0"]
reason: _tsid hashing introduced in 8.13
cluster_features: ["routing.multi_value_routing_path"]
reason: support for multi-value dimensions

- do:
indices.create:
Expand Down Expand Up @@ -172,12 +172,7 @@ multi-value routing path field:
- '{"index": {}}'
- '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }'

- is_true: errors

- match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
- match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
- match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
- match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" }
- is_false: errors

- do:
search:
Expand All @@ -195,13 +190,21 @@ multi-value routing path field:
avg:
field: voltage

- match: {hits.total.value: 4}
- length: {aggregations.tsids.buckets: 2}
- match: {hits.total.value: 8}
- length: {aggregations.tsids.buckets: 4}

- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
- match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" }
- match: {aggregations.tsids.buckets.0.doc_count: 2 }
- close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }}

- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
- match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" }
- match: {aggregations.tsids.buckets.1.doc_count: 2 }
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }}
- close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }}

- match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" }
- match: {aggregations.tsids.buckets.2.doc_count: 2 }
- close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }}

- match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" }
- match: {aggregations.tsids.buckets.3.doc_count: 2 }
- close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }}
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
import java.util.ArrayList;
import java.util.Base64;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -45,13 +44,15 @@
import java.util.function.Predicate;

import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken;
import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken;

/**
* Generates the shard id for {@code (id, routing)} pairs.
*/
public abstract class IndexRouting {

static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path");
static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path");

/**
* Build the routing from {@link IndexMetadata}.
Expand Down Expand Up @@ -301,7 +302,13 @@ public String createId(Map<String, Object> flat, byte[] suffix) {
Builder b = builder();
for (Map.Entry<String, Object> e : flat.entrySet()) {
if (isRoutingPath.test(e.getKey())) {
b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString()))));
if (e.getValue() instanceof List<?> listValue) {
for (Object v : listValue) {
b.addHash(e.getKey(), new BytesRef(v.toString()));
}
} else {
b.addHash(e.getKey(), new BytesRef(e.getValue().toString()));
}
}
}
return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty);
Expand Down Expand Up @@ -336,7 +343,7 @@ public class Builder {

public void addMatching(String fieldName, BytesRef string) {
if (isRoutingPath.test(fieldName)) {
hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string)));
addHash(fieldName, string);
}
}

Expand All @@ -357,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws
}
}

private void extractArray(@Nullable String path, XContentParser source) throws IOException {
while (source.currentToken() != Token.END_ARRAY) {
expectValueToken(source.currentToken(), source);
extractItem(path, source);
}
}

private void extractItem(String path, XContentParser source) throws IOException {
switch (source.currentToken()) {
case START_OBJECT:
Expand All @@ -367,7 +381,12 @@ private void extractItem(String path, XContentParser source) throws IOException
case VALUE_STRING:
case VALUE_NUMBER:
case VALUE_BOOLEAN:
hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text()))));
addHash(path, new BytesRef(source.text()));
source.nextToken();
break;
case START_ARRAY:
source.nextToken();
extractArray(path, source);
source.nextToken();
break;
case VALUE_NULL:
Expand All @@ -376,28 +395,24 @@ private void extractItem(String path, XContentParser source) throws IOException
default:
throw new ParsingException(
source.getTokenLocation(),
"Routing values must be strings but found [{}]",
"Cannot extract routing path due to unexpected token [{}]",
source.currentToken()
);
}
}

private void addHash(String path, BytesRef value) {
hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size()));
}

private int buildHash(IntSupplier onEmpty) {
Collections.sort(hashes);
Iterator<NameAndHash> itr = hashes.iterator();
if (itr.hasNext() == false) {
if (hashes.isEmpty()) {
return onEmpty.getAsInt();
}
NameAndHash prev = itr.next();
int hash = hash(prev.name) ^ prev.hash;
while (itr.hasNext()) {
NameAndHash next = itr.next();
if (prev.name.equals(next.name)) {
throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]");
}
int thisHash = hash(next.name) ^ next.hash;
hash = 31 * hash + thisHash;
prev = next;
Collections.sort(hashes);
int hash = 0;
for (NameAndHash nah : hashes) {
hash = 31 * hash + (hash(nah.name) ^ nah.hash);
}
return hash;
}
Expand Down Expand Up @@ -458,10 +473,13 @@ private String error(String operation) {
}
}

private record NameAndHash(BytesRef name, int hash) implements Comparable<NameAndHash> {
private record NameAndHash(BytesRef name, int hash, int order) implements Comparable<NameAndHash> {
@Override
public int compareTo(NameAndHash o) {
return name.compareTo(o.name);
int i = name.compareTo(o.name);
if (i != 0) return i;
// ensures array values are in the order as they appear in the source
return Integer.compare(order, o.order);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification {

@Override
public Set<NodeFeature> getFeatures() {
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH);
return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar
}
}

/**
* Makes sure the provided token {@linkplain Token#isValue() is a value type}
*
* @throws ParsingException if the token is not a value type
*/
public static void expectValueToken(Token actual, XContentParser parser) {
if (actual.isValue() == false) {
throw new ParsingException(
parser.getTokenLocation(),
String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual)
);
}
}

private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) {
return new ParsingException(
parser.getTokenLocation(),
Expand Down
Loading