From fb45fa3a9438cb07b0434f35f1daab7cc5db5f59 Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 23 Sep 2024 09:31:18 +0200 Subject: [PATCH 1/2] Add support for multi-value dimensions (#112645) Closes https://github.com/elastic/elasticsearch/issues/110387 Having this in now affords us not having to introduce version checks in the ES exporter later. We can simply use the same serialization logic for metric attributes as we do for other signals. This also enables us to properly map `*.ip` fields to the ip field type as ip fields containing a list of IPs are not converted to a comma-separated list. (cherry picked from commit 8d223cbf7a097765e9e18c80d9abafd17eb93336) # Conflicts: # server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java --- docs/changelog/112645.yaml | 6 ++ docs/reference/mapping/types/keyword.asciidoc | 1 - .../test/data_stream/150_tsdb.yml | 80 +++++++++++++++++++ rest-api-spec/build.gradle | 4 + .../test/tsdb/140_routing_path.yml | 31 +++---- .../cluster/routing/IndexRouting.java | 58 +++++++++----- .../cluster/routing/RoutingFeatures.java | 2 +- .../common/xcontent/XContentParserUtils.java | 14 ++++ .../index/mapper/TimeSeriesIdFieldMapper.java | 58 +++++++++----- .../cluster/routing/IndexRoutingTests.java | 31 ++++++- .../index/mapper/BooleanFieldMapperTests.java | 7 +- .../index/mapper/IpFieldMapperTests.java | 10 +-- .../index/mapper/KeywordFieldMapperTests.java | 7 +- .../mapper/TimeSeriesIdFieldMapperTests.java | 47 +++++++++++ .../flattened/FlattenedFieldMapperTests.java | 11 +-- .../mapper/WholeNumberFieldMapperTests.java | 10 +-- .../UnsignedLongFieldMapperTests.java | 11 +-- .../metrics-otel@template.yaml | 5 ++ .../rest-api-spec/test/20_metrics_tests.yml | 39 ++++++++- 19 files changed, 349 insertions(+), 83 deletions(-) create mode 100644 docs/changelog/112645.yaml diff --git a/docs/changelog/112645.yaml b/docs/changelog/112645.yaml new file mode 100644 index 0000000000000..cf4ef4609a1f3 --- /dev/null +++ b/docs/changelog/112645.yaml @@ -0,0 +1,6 @@ +pr: 112645 +summary: Add support for multi-value dimensions +area: Mapping +type: enhancement +issues: + - 110387 diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 59d307c4df0ad..a4be7026dffcd 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index. Dimension fields have the following constraints: * The `doc_values` and `index` mapping parameters must be `true`. -* Field values cannot be an <>. // end::dimension[] * Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints: ** The field cannot use a <>. diff --git a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml index d20231a6d6cf2..56f387c016261 100644 --- a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml +++ b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml @@ -1230,3 +1230,83 @@ non string dimension fields: - match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true } + +--- +multi value dimensions: + - requires: + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions + + - do: + allowed_warnings: + - "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation" + indices.put_index_template: + name: my-dynamic-template + body: + index_patterns: [k9s*] + data_stream: {} + template: + settings: + index: + number_of_shards: 1 + mode: time_series + time_series: + start_time: 2023-08-31T13:03:08.138Z + + mappings: + properties: + attributes: + type: passthrough + dynamic: true + time_series_dimension: true + priority: 1 + dynamic_templates: + - counter_metric: + mapping: + type: integer + time_series_metric: counter + + - do: + bulk: + index: k9s + refresh: true + body: + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }' + - is_false: errors + + - do: + search: + index: k9s + body: + size: 0 + aggs: + tsids: + terms: + field: _tsid + + - length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series + + - do: + search: + index: k9s + body: + size: 0 + aggs: + dims: + terms: + field: dim1 + order: + _key: asc + + - length: { aggregations.dims.buckets: 3 } + - match: { aggregations.dims.buckets.0.key: a } + - match: { aggregations.dims.buckets.0.doc_count: 2 } + - match: { aggregations.dims.buckets.1.key: b } + - match: { aggregations.dims.buckets.1.doc_count: 3 } + - match: { aggregations.dims.buckets.2.key: c } + - match: { aggregations.dims.buckets.2.doc_count: 1 } diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 21dff41d2ef3e..d4650c7aa1a4a 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -243,3 +243,7 @@ tasks.register('enforceYamlTestConvention').configure { tasks.named("precommit").configure { dependsOn 'enforceYamlTestConvention' } + +tasks.named("yamlRestCompatTestTransform").configure({ task -> + task.skipTest("tsdb/140_routing_path/multi-value routing path field", "Multi-value routing paths are allowed now. See #112645") +}) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml index 6eb7a8dcad7aa..616afd3cf67ad 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml @@ -119,11 +119,11 @@ missing dimension on routing path field: type: keyword --- -multi-value routing path field: +multi-value routing path field succeeds: - requires: test_runner_features: close_to - cluster_features: ["gte_v8.13.0"] - reason: _tsid hashing introduced in 8.13 + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions - do: indices.create: @@ -172,12 +172,7 @@ multi-value routing path field: - '{"index": {}}' - '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }' - - is_true: errors - - - match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } + - is_false: errors - do: search: @@ -195,13 +190,21 @@ multi-value routing path field: avg: field: voltage - - match: {hits.total.value: 4} - - length: {aggregations.tsids.buckets: 2} + - match: {hits.total.value: 8} + - length: {aggregations.tsids.buckets: 4} - - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" } + - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" } - match: {aggregations.tsids.buckets.0.doc_count: 2 } - close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }} - - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" } + - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" } - match: {aggregations.tsids.buckets.1.doc_count: 2 } - - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }} + - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }} + + - match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" } + - match: {aggregations.tsids.buckets.2.doc_count: 2 } + - close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }} + + - match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" } + - match: {aggregations.tsids.buckets.3.doc_count: 2 } + - close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index f4ed7ec6c264c..cb55e65abbd18 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -35,7 +35,6 @@ import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -45,6 +44,7 @@ import java.util.function.Predicate; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken; /** * Generates the shard id for {@code (id, routing)} pairs. @@ -52,6 +52,7 @@ public abstract class IndexRouting { static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path"); + static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path"); /** * Build the routing from {@link IndexMetadata}. @@ -301,7 +302,13 @@ public String createId(Map flat, byte[] suffix) { Builder b = builder(); for (Map.Entry e : flat.entrySet()) { if (isRoutingPath.test(e.getKey())) { - b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString())))); + if (e.getValue() instanceof List listValue) { + for (Object v : listValue) { + b.addHash(e.getKey(), new BytesRef(v.toString())); + } + } else { + b.addHash(e.getKey(), new BytesRef(e.getValue().toString())); + } } } return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty); @@ -336,7 +343,7 @@ public class Builder { public void addMatching(String fieldName, BytesRef string) { if (isRoutingPath.test(fieldName)) { - hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string))); + addHash(fieldName, string); } } @@ -357,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws } } + private void extractArray(@Nullable String path, XContentParser source) throws IOException { + while (source.currentToken() != Token.END_ARRAY) { + expectValueToken(source.currentToken(), source); + extractItem(path, source); + } + } + private void extractItem(String path, XContentParser source) throws IOException { switch (source.currentToken()) { case START_OBJECT: @@ -367,7 +381,12 @@ private void extractItem(String path, XContentParser source) throws IOException case VALUE_STRING: case VALUE_NUMBER: case VALUE_BOOLEAN: - hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text())))); + addHash(path, new BytesRef(source.text())); + source.nextToken(); + break; + case START_ARRAY: + source.nextToken(); + extractArray(path, source); source.nextToken(); break; case VALUE_NULL: @@ -376,28 +395,24 @@ private void extractItem(String path, XContentParser source) throws IOException default: throw new ParsingException( source.getTokenLocation(), - "Routing values must be strings but found [{}]", + "Cannot extract routing path due to unexpected token [{}]", source.currentToken() ); } } + private void addHash(String path, BytesRef value) { + hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size())); + } + private int buildHash(IntSupplier onEmpty) { - Collections.sort(hashes); - Iterator itr = hashes.iterator(); - if (itr.hasNext() == false) { + if (hashes.isEmpty()) { return onEmpty.getAsInt(); } - NameAndHash prev = itr.next(); - int hash = hash(prev.name) ^ prev.hash; - while (itr.hasNext()) { - NameAndHash next = itr.next(); - if (prev.name.equals(next.name)) { - throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]"); - } - int thisHash = hash(next.name) ^ next.hash; - hash = 31 * hash + thisHash; - prev = next; + Collections.sort(hashes); + int hash = 0; + for (NameAndHash nah : hashes) { + hash = 31 * hash + (hash(nah.name) ^ nah.hash); } return hash; } @@ -458,10 +473,13 @@ private String error(String operation) { } } - private record NameAndHash(BytesRef name, int hash) implements Comparable { + private record NameAndHash(BytesRef name, int hash, int order) implements Comparable { @Override public int compareTo(NameAndHash o) { - return name.compareTo(o.name); + int i = name.compareTo(o.name); + if (i != 0) return i; + // ensures array values are in the order as they appear in the source + return Integer.compare(order, o.order); } } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java index 8885410bd0530..f8028ce7f9d68 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java @@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification { @Override public Set getFeatures() { - return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH); + return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH); } } diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java index cd7cf5ddbd893..6390e62f9758f 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java @@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar } } + /** + * Makes sure the provided token {@linkplain Token#isValue() is a value type} + * + * @throws ParsingException if the token is not a value type + */ + public static void expectValueToken(Token actual, XContentParser parser) { + if (actual.isValue() == false) { + throw new ParsingException( + parser.getTokenLocation(), + String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual) + ); + } + } + private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { return new ParsingException( parser.getTokenLocation(), diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java index 80535be39a4c4..d660b5b952645 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java @@ -41,13 +41,14 @@ import java.io.IOException; import java.net.InetAddress; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Comparator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.SortedMap; +import java.util.TreeMap; /** * Mapper for {@code _tsid} field included generated when the index is @@ -176,16 +177,14 @@ public static class TimeSeriesIdBuilder implements DocumentDimensions { public static final int MAX_DIMENSIONS = 512; - private record Dimension(BytesRef name, BytesReference value) {} - private final Murmur3Hasher tsidHasher = new Murmur3Hasher(0); /** - * A sorted set of the serialized values of dimension fields that will be used + * A map of the serialized values of dimension fields that will be used * for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper} * to build the _tsid field for the document. */ - private final SortedSet dimensions = new TreeSet<>(Comparator.comparing(o -> o.name)); + private final SortedMap> dimensions = new TreeMap<>(); /** * Builds the routing. Used for building {@code _id}. If null then skipped. */ @@ -203,9 +202,17 @@ public BytesReference buildLegacyTsid() throws IOException { try (BytesStreamOutput out = new BytesStreamOutput()) { out.writeVInt(dimensions.size()); - for (Dimension entry : dimensions) { - out.writeBytesRef(entry.name); - entry.value.writeTo(out); + for (Map.Entry> entry : dimensions.entrySet()) { + out.writeBytesRef(entry.getKey()); + List value = entry.getValue(); + if (value.size() > 1) { + // multi-value dimensions are only supported for newer indices that use buildTsidHash + throw new IllegalArgumentException( + "Dimension field [" + entry.getKey().utf8ToString() + "] cannot be a multi-valued field." + ); + } + assert value.isEmpty() == false : "dimension value is empty"; + value.get(0).writeTo(out); } return out.bytes(); } @@ -237,18 +244,19 @@ public BytesReference buildTsidHash() { int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0); tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.name.bytes); + for (final BytesRef name : dimensions.keySet()) { + tsidHasher.update(name.bytes); } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); // NOTE: concatenate all dimension value hashes up to a certain number of dimensions int tsidHashStartIndex = tsidHashIndex; - for (final Dimension dimension : dimensions) { + for (final List values : dimensions.values()) { if ((tsidHashIndex - tsidHashStartIndex) >= 4 * numberOfDimensions) { break; } - final BytesRef dimensionValueBytesRef = dimension.value.toBytesRef(); + assert values.isEmpty() == false : "dimension values are empty"; + final BytesRef dimensionValueBytesRef = values.get(0).toBytesRef(); ByteUtils.writeIntLE( StringHelper.murmurhash3_x86_32( dimensionValueBytesRef.bytes, @@ -264,8 +272,10 @@ public BytesReference buildTsidHash() { // NOTE: hash all dimension field allValues tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.value.toBytesRef().bytes); + for (final List values : dimensions.values()) { + for (BytesReference v : values) { + tsidHasher.update(v.toBytesRef().bytes); + } } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); @@ -368,8 +378,20 @@ public DocumentDimensions validate(final IndexSettings settings) { } private void add(String fieldName, BytesReference encoded) throws IOException { - if (dimensions.add(new Dimension(new BytesRef(fieldName), encoded)) == false) { - throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field."); + BytesRef name = new BytesRef(fieldName); + List values = dimensions.get(name); + if (values == null) { + // optimize for the common case where dimensions are not multi-valued + dimensions.put(name, List.of(encoded)); + } else { + if (values.size() == 1) { + // converts the immutable list that's optimized for the common case of having only one value to a mutable list + BytesReference previousValue = values.get(0); + values = new ArrayList<>(4); + values.add(previousValue); + dimensions.put(name, values); + } + values.add(encoded); } } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java index 6ac8e6a853f27..e39ccdf7af5e2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java @@ -595,7 +595,32 @@ public void testRoutingPathBooleansInSource() throws IOException { IndexRouting routing = indexRoutingForPath(shards, "foo"); assertIndexShard(routing, Map.of("foo", true), Math.floorMod(hash(List.of("foo", "true")), shards)); assertIndexShard(routing, Map.of("foo", false), Math.floorMod(hash(List.of("foo", "false")), shards)); + } + public void testRoutingPathArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a,b,c,d"); + assertIndexShard( + routing, + Map.of("c", List.of(true), "d", List.of(), "a", List.of("foo", "bar", "foo"), "b", List.of(21, 42)), + // Note that the fields are sorted + Math.floorMod(hash(List.of("a", "foo", "a", "bar", "a", "foo", "b", "21", "b", "42", "c", "true")), shards) + ); + } + + public void testRoutingPathObjectArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a"); + + BytesReference source = source(Map.of("a", List.of("foo", Map.of("foo", "bar")))); + Exception e = expectThrows( + IllegalArgumentException.class, + () -> routing.indexShard(randomAlphaOfLength(5), null, XContentType.JSON, source, s -> {}) + ); + assertThat( + e.getMessage(), + equalTo("Error extracting routing: Failed to parse object: expecting value token but found [START_OBJECT]") + ); } public void testRoutingPathBwc() throws IOException { @@ -668,7 +693,11 @@ private void assertIndexShard(IndexRouting routing, Map source, IndexRouting.ExtractFromSource.Builder b = r.builder(); for (Map.Entry e : flattened.entrySet()) { - b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + if (e.getValue() instanceof List listValue) { + listValue.forEach(v -> b.addMatching(e.getKey(), new BytesRef(v.toString()))); + } else { + b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + } } String idFromBuilder = b.createId(suffix, () -> { throw new AssertionError(); }); assertThat(idFromBuilder, equalTo(idFromSource)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java index b0a9a9e9fe7dc..83553503c3c5e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java @@ -268,8 +268,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", true, false)))); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", true, false); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java index f92df7a421cf9..1b8a2d68cd930 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java @@ -266,11 +266,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", "192.168.1.1", "192.168.1.1"))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", "192.168.1.1", "192.168.1.1"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 2d2fad23b3831..5b218fb077d32 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -384,8 +384,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", "1234", "45678")))); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", "1234", "45678"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java index b27b8bd9766f3..9d56938f185de 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java @@ -28,10 +28,13 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; @@ -63,13 +66,19 @@ protected IndexVersion getVersion() { } private DocumentMapper createDocumentMapper(String routingPath, XContentBuilder mappings) throws IOException { + return createDocumentMapper(getVersion(), routingPath, mappings); + } + + private DocumentMapper createDocumentMapper(IndexVersion version, String routingPath, XContentBuilder mappings) throws IOException { return createMapperService( + version, getIndexSettingsBuilder().put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.name()) .put(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING.getKey(), 200) // Allow tests that use many dimensions .put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), routingPath) .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), "2021-04-28T00:00:00Z") .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), "2021-10-29T00:00:00Z") .build(), + () -> true, mappings ).documentMapper(); } @@ -644,6 +653,44 @@ public void testDifferentDimensions() throws IOException { assertThat(doc1.rootDoc().getBinaryValue("_tsid").bytes, not(doc2.rootDoc().getBinaryValue("_tsid").bytes)); } + public void testMultiValueDimensionsNotSupportedBeforeTsidHashing() throws IOException { + IndexVersion priorToTsidHashing = IndexVersionUtils.getPreviousVersion(IndexVersions.TIME_SERIES_ID_HASHING); + DocumentMapper docMapper = createDocumentMapper( + priorToTsidHashing, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + CheckedConsumer fields = d -> d.field("a", new String[] { a1, a2 }); + DocumentParsingException exception = assertThrows(DocumentParsingException.class, () -> parseDocument(docMapper, fields)); + assertThat(exception.getMessage(), containsString("Dimension field [a] cannot be a multi-valued field")); + } + + public void testMultiValueDimensions() throws IOException { + DocumentMapper docMapper = createDocumentMapper( + IndexVersions.TIME_SERIES_ID_HASHING, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + List docs = List.of( + parseDocument(docMapper, d -> d.field("a", new String[] { a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1, a2 })) + ); + List tsids = docs.stream() + .map(doc -> doc.rootDoc().getBinaryValue("_tsid").toString()) + .distinct() + .collect(Collectors.toList()); + assertThat(tsids, hasSize(docs.size())); + } + /** * Documents with fewer dimensions have a different value. */ diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java index 2f245a319f8cc..285431b881add 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper.KeyedFlattenedFieldType; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper.RootFlattenedFieldType; import org.elasticsearch.xcontent.XContentBuilder; @@ -200,11 +201,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimensions", List.of("key1", "key2", "field3.key3")); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field.key1", "value1", "value2"))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field.key1] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field.key1", "value1", "value2"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java index bb17983264072..a297f5d13254b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java @@ -80,11 +80,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", randomNumber(), randomNumber(), randomNumber()))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", randomNumber(), randomNumber(), randomNumber()); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java index cdb25fbe995b2..f554a84048fde 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.index.mapper.NumberTypeOutOfRangeSpec; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.TimeSeriesParams; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.index.mapper.WholeNumberFieldMapperTests; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; @@ -268,11 +269,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml index 89ff28249aabb..a4413d266181d 100644 --- a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml +++ b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml @@ -28,6 +28,11 @@ template: type: constant_keyword value: metrics dynamic_templates: + - ecs_ip: + mapping: + type: ip + path_match: [ "ip", "*.ip", "*_ip" ] + match_mapping_type: string - all_strings_to_keywords: mapping: ignore_above: 1024 diff --git a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml index a6591d6c32210..1823dfab7e716 100644 --- a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml +++ b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml @@ -123,11 +123,10 @@ setup: start_time: 2024-07-01T13:03:08.138Z mappings: dynamic_templates: - - ip_fields: + - no_ip_fields: mapping: - type: ip + type: keyword match_mapping_type: string - path_match: "*.ip" - do: bulk: index: metrics-generic.otel-default @@ -145,5 +144,37 @@ setup: indices.get_mapping: index: $idx0name expand_wildcards: hidden - - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'keyword' } - match: { .$idx0name.mappings.properties.attributes.properties.foo.type: "keyword" } +--- +IP dimensions: + - requires: + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions + - do: + bulk: + index: metrics-generic.otel-default + refresh: true + body: + - create: {"dynamic_templates":{"metrics.foo.bar":"counter_long"}} + - "@timestamp": 2024-07-18T14:48:33.467654000Z + resource: + attributes: + host.ip: [ "127.0.0.1", "0.0.0.0" ] + attributes: + philip: [ a, b, c ] + metrics: + foo.bar: 42 + - is_false: errors + + - do: + indices.get_data_stream: + name: metrics-generic.otel-default + - set: { data_streams.0.indices.0.index_name: idx0name } + + - do: + indices.get_mapping: + index: $idx0name + expand_wildcards: hidden + - match: { .$idx0name.mappings.properties.resource.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.philip.type: "keyword" } From f0507c7fd91dfaa48b51efcef56ab021b71fdf4b Mon Sep 17 00:00:00 2001 From: Felix Barnsteiner Date: Mon, 23 Sep 2024 11:54:06 +0200 Subject: [PATCH 2/2] Remove skip test for 8.x This was just needed for 8.x to 9.0 compatibility tests --- rest-api-spec/build.gradle | 4 ---- 1 file changed, 4 deletions(-) diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index d4650c7aa1a4a..21dff41d2ef3e 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -243,7 +243,3 @@ tasks.register('enforceYamlTestConvention').configure { tasks.named("precommit").configure { dependsOn 'enforceYamlTestConvention' } - -tasks.named("yamlRestCompatTestTransform").configure({ task -> - task.skipTest("tsdb/140_routing_path/multi-value routing path field", "Multi-value routing paths are allowed now. See #112645") -})