diff --git a/docs/changelog/112645.yaml b/docs/changelog/112645.yaml new file mode 100644 index 0000000000000..cf4ef4609a1f3 --- /dev/null +++ b/docs/changelog/112645.yaml @@ -0,0 +1,6 @@ +pr: 112645 +summary: Add support for multi-value dimensions +area: Mapping +type: enhancement +issues: + - 110387 diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc index 59d307c4df0ad..a4be7026dffcd 100644 --- a/docs/reference/mapping/types/keyword.asciidoc +++ b/docs/reference/mapping/types/keyword.asciidoc @@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index. Dimension fields have the following constraints: * The `doc_values` and `index` mapping parameters must be `true`. -* Field values cannot be an <>. // end::dimension[] * Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints: ** The field cannot use a <>. diff --git a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml index d20231a6d6cf2..56f387c016261 100644 --- a/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml +++ b/modules/data-streams/src/yamlRestTest/resources/rest-api-spec/test/data_stream/150_tsdb.yml @@ -1230,3 +1230,83 @@ non string dimension fields: - match: { .$idx0name.mappings.properties.attributes.properties.double.time_series_dimension: true } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.time_series_dimension: true } + +--- +multi value dimensions: + - requires: + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions + + - do: + allowed_warnings: + - "index template [my-dynamic-template] has index patterns [k9s*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [my-dynamic-template] will take precedence during new index creation" + indices.put_index_template: + name: my-dynamic-template + body: + index_patterns: [k9s*] + data_stream: {} + template: + settings: + index: + number_of_shards: 1 + mode: time_series + time_series: + start_time: 2023-08-31T13:03:08.138Z + + mappings: + properties: + attributes: + type: passthrough + dynamic: true + time_series_dimension: true + priority: 1 + dynamic_templates: + - counter_metric: + mapping: + type: integer + time_series_metric: counter + + - do: + bulk: + index: k9s + refresh: true + body: + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "10", "attributes": { "dim1": ["a" , "b"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["b" , "a"], "dim2": [1, 2] } }' + - '{ "create": { "dynamic_templates": { "data": "counter_metric" } } }' + - '{ "@timestamp": "2023-09-01T13:03:08.138Z","data": "20", "attributes": { "dim1": ["c" , "b"], "dim2": [1, 2] } }' + - is_false: errors + + - do: + search: + index: k9s + body: + size: 0 + aggs: + tsids: + terms: + field: _tsid + + - length: { aggregations.tsids.buckets: 3 } # only the order of the dim1 attribute is different, yet we expect to have two distinct time series + + - do: + search: + index: k9s + body: + size: 0 + aggs: + dims: + terms: + field: dim1 + order: + _key: asc + + - length: { aggregations.dims.buckets: 3 } + - match: { aggregations.dims.buckets.0.key: a } + - match: { aggregations.dims.buckets.0.doc_count: 2 } + - match: { aggregations.dims.buckets.1.key: b } + - match: { aggregations.dims.buckets.1.doc_count: 3 } + - match: { aggregations.dims.buckets.2.key: c } + - match: { aggregations.dims.buckets.2.doc_count: 1 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml index 6eb7a8dcad7aa..616afd3cf67ad 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/tsdb/140_routing_path.yml @@ -119,11 +119,11 @@ missing dimension on routing path field: type: keyword --- -multi-value routing path field: +multi-value routing path field succeeds: - requires: test_runner_features: close_to - cluster_features: ["gte_v8.13.0"] - reason: _tsid hashing introduced in 8.13 + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions - do: indices.create: @@ -172,12 +172,7 @@ multi-value routing path field: - '{"index": {}}' - '{"@timestamp": "2021-04-28T18:35:54.467Z", "uid": "df3145b3-0563-4d3b-a0f7-897eb2876ea9", "voltage": 6.8, "unmapped_field": 40, "tag": [ "one", "three" ] }' - - is_true: errors - - - match: {items.1.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.3.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.4.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } - - match: {items.7.index.error.reason: "Error extracting routing: Routing values must be strings but found [START_ARRAY]" } + - is_false: errors - do: search: @@ -195,13 +190,21 @@ multi-value routing path field: avg: field: voltage - - match: {hits.total.value: 4} - - length: {aggregations.tsids.buckets: 2} + - match: {hits.total.value: 8} + - length: {aggregations.tsids.buckets: 4} - - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" } + - match: {aggregations.tsids.buckets.0.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSOYoRw_LI4DD7DFEGEJ3NR3eQkMY" } - match: {aggregations.tsids.buckets.0.doc_count: 2 } - close_to: {aggregations.tsids.buckets.0.voltage.value: { value: 6.70, error: 0.01 }} - - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" } + - match: { aggregations.tsids.buckets.1.key: "KDODRmbj7vu4rLWvjrJbpUtt0uPSddqA4WYKglGPR_C0cJe8QGaiC2c" } - match: {aggregations.tsids.buckets.1.doc_count: 2 } - - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.30, error: 0.01 }} + - close_to: {aggregations.tsids.buckets.1.voltage.value: { value: 7.15, error: 0.01 }} + + - match: { aggregations.tsids.buckets.2.key: "KDODRmbj7vu4rLWvjrJbpUuaET_vOYoRw6ImzKEcF4sEaGKnXSaKfM0" } + - match: {aggregations.tsids.buckets.2.doc_count: 2 } + - close_to: {aggregations.tsids.buckets.2.voltage.value: { value: 6.70, error: 0.01 }} + + - match: { aggregations.tsids.buckets.3.key: "KDODRmbj7vu4rLWvjrJbpUvcUWJEddqA4Seo8jbBBBFxwC0lrefCb6A" } + - match: {aggregations.tsids.buckets.3.doc_count: 2 } + - close_to: {aggregations.tsids.buckets.3.voltage.value: { value: 7.30, error: 0.01 }} diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java index f4ed7ec6c264c..cb55e65abbd18 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java @@ -35,7 +35,6 @@ import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -45,6 +44,7 @@ import java.util.function.Predicate; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; +import static org.elasticsearch.common.xcontent.XContentParserUtils.expectValueToken; /** * Generates the shard id for {@code (id, routing)} pairs. @@ -52,6 +52,7 @@ public abstract class IndexRouting { static final NodeFeature BOOLEAN_ROUTING_PATH = new NodeFeature("routing.boolean_routing_path"); + static final NodeFeature MULTI_VALUE_ROUTING_PATH = new NodeFeature("routing.multi_value_routing_path"); /** * Build the routing from {@link IndexMetadata}. @@ -301,7 +302,13 @@ public String createId(Map flat, byte[] suffix) { Builder b = builder(); for (Map.Entry e : flat.entrySet()) { if (isRoutingPath.test(e.getKey())) { - b.hashes.add(new NameAndHash(new BytesRef(e.getKey()), hash(new BytesRef(e.getValue().toString())))); + if (e.getValue() instanceof List listValue) { + for (Object v : listValue) { + b.addHash(e.getKey(), new BytesRef(v.toString())); + } + } else { + b.addHash(e.getKey(), new BytesRef(e.getValue().toString())); + } } } return b.createId(suffix, IndexRouting.ExtractFromSource::defaultOnEmpty); @@ -336,7 +343,7 @@ public class Builder { public void addMatching(String fieldName, BytesRef string) { if (isRoutingPath.test(fieldName)) { - hashes.add(new NameAndHash(new BytesRef(fieldName), hash(string))); + addHash(fieldName, string); } } @@ -357,6 +364,13 @@ private void extractObject(@Nullable String path, XContentParser source) throws } } + private void extractArray(@Nullable String path, XContentParser source) throws IOException { + while (source.currentToken() != Token.END_ARRAY) { + expectValueToken(source.currentToken(), source); + extractItem(path, source); + } + } + private void extractItem(String path, XContentParser source) throws IOException { switch (source.currentToken()) { case START_OBJECT: @@ -367,7 +381,12 @@ private void extractItem(String path, XContentParser source) throws IOException case VALUE_STRING: case VALUE_NUMBER: case VALUE_BOOLEAN: - hashes.add(new NameAndHash(new BytesRef(path), hash(new BytesRef(source.text())))); + addHash(path, new BytesRef(source.text())); + source.nextToken(); + break; + case START_ARRAY: + source.nextToken(); + extractArray(path, source); source.nextToken(); break; case VALUE_NULL: @@ -376,28 +395,24 @@ private void extractItem(String path, XContentParser source) throws IOException default: throw new ParsingException( source.getTokenLocation(), - "Routing values must be strings but found [{}]", + "Cannot extract routing path due to unexpected token [{}]", source.currentToken() ); } } + private void addHash(String path, BytesRef value) { + hashes.add(new NameAndHash(new BytesRef(path), hash(value), hashes.size())); + } + private int buildHash(IntSupplier onEmpty) { - Collections.sort(hashes); - Iterator itr = hashes.iterator(); - if (itr.hasNext() == false) { + if (hashes.isEmpty()) { return onEmpty.getAsInt(); } - NameAndHash prev = itr.next(); - int hash = hash(prev.name) ^ prev.hash; - while (itr.hasNext()) { - NameAndHash next = itr.next(); - if (prev.name.equals(next.name)) { - throw new IllegalArgumentException("Duplicate routing dimension for [" + next.name + "]"); - } - int thisHash = hash(next.name) ^ next.hash; - hash = 31 * hash + thisHash; - prev = next; + Collections.sort(hashes); + int hash = 0; + for (NameAndHash nah : hashes) { + hash = 31 * hash + (hash(nah.name) ^ nah.hash); } return hash; } @@ -458,10 +473,13 @@ private String error(String operation) { } } - private record NameAndHash(BytesRef name, int hash) implements Comparable { + private record NameAndHash(BytesRef name, int hash, int order) implements Comparable { @Override public int compareTo(NameAndHash o) { - return name.compareTo(o.name); + int i = name.compareTo(o.name); + if (i != 0) return i; + // ensures array values are in the order as they appear in the source + return Integer.compare(order, o.order); } } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java index 8885410bd0530..f8028ce7f9d68 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/RoutingFeatures.java @@ -18,6 +18,6 @@ public class RoutingFeatures implements FeatureSpecification { @Override public Set getFeatures() { - return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH); + return Set.of(IndexRouting.BOOLEAN_ROUTING_PATH, IndexRouting.MULTI_VALUE_ROUTING_PATH); } } diff --git a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java index cd7cf5ddbd893..6390e62f9758f 100644 --- a/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java +++ b/server/src/main/java/org/elasticsearch/common/xcontent/XContentParserUtils.java @@ -72,6 +72,20 @@ public static void ensureExpectedToken(Token expected, Token actual, XContentPar } } + /** + * Makes sure the provided token {@linkplain Token#isValue() is a value type} + * + * @throws ParsingException if the token is not a value type + */ + public static void expectValueToken(Token actual, XContentParser parser) { + if (actual.isValue() == false) { + throw new ParsingException( + parser.getTokenLocation(), + String.format(Locale.ROOT, "Failed to parse object: expecting value token but found [%s]", actual) + ); + } + } + private static ParsingException parsingException(XContentParser parser, Token expected, Token actual) { return new ParsingException( parser.getTokenLocation(), diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java index 80535be39a4c4..d660b5b952645 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java @@ -41,13 +41,14 @@ import java.io.IOException; import java.net.InetAddress; import java.time.ZoneId; +import java.util.ArrayList; import java.util.Base64; import java.util.Collections; -import java.util.Comparator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; -import java.util.SortedSet; -import java.util.TreeSet; +import java.util.SortedMap; +import java.util.TreeMap; /** * Mapper for {@code _tsid} field included generated when the index is @@ -176,16 +177,14 @@ public static class TimeSeriesIdBuilder implements DocumentDimensions { public static final int MAX_DIMENSIONS = 512; - private record Dimension(BytesRef name, BytesReference value) {} - private final Murmur3Hasher tsidHasher = new Murmur3Hasher(0); /** - * A sorted set of the serialized values of dimension fields that will be used + * A map of the serialized values of dimension fields that will be used * for generating the _tsid field. The map will be used by {@link TimeSeriesIdFieldMapper} * to build the _tsid field for the document. */ - private final SortedSet dimensions = new TreeSet<>(Comparator.comparing(o -> o.name)); + private final SortedMap> dimensions = new TreeMap<>(); /** * Builds the routing. Used for building {@code _id}. If null then skipped. */ @@ -203,9 +202,17 @@ public BytesReference buildLegacyTsid() throws IOException { try (BytesStreamOutput out = new BytesStreamOutput()) { out.writeVInt(dimensions.size()); - for (Dimension entry : dimensions) { - out.writeBytesRef(entry.name); - entry.value.writeTo(out); + for (Map.Entry> entry : dimensions.entrySet()) { + out.writeBytesRef(entry.getKey()); + List value = entry.getValue(); + if (value.size() > 1) { + // multi-value dimensions are only supported for newer indices that use buildTsidHash + throw new IllegalArgumentException( + "Dimension field [" + entry.getKey().utf8ToString() + "] cannot be a multi-valued field." + ); + } + assert value.isEmpty() == false : "dimension value is empty"; + value.get(0).writeTo(out); } return out.bytes(); } @@ -237,18 +244,19 @@ public BytesReference buildTsidHash() { int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0); tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.name.bytes); + for (final BytesRef name : dimensions.keySet()) { + tsidHasher.update(name.bytes); } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); // NOTE: concatenate all dimension value hashes up to a certain number of dimensions int tsidHashStartIndex = tsidHashIndex; - for (final Dimension dimension : dimensions) { + for (final List values : dimensions.values()) { if ((tsidHashIndex - tsidHashStartIndex) >= 4 * numberOfDimensions) { break; } - final BytesRef dimensionValueBytesRef = dimension.value.toBytesRef(); + assert values.isEmpty() == false : "dimension values are empty"; + final BytesRef dimensionValueBytesRef = values.get(0).toBytesRef(); ByteUtils.writeIntLE( StringHelper.murmurhash3_x86_32( dimensionValueBytesRef.bytes, @@ -264,8 +272,10 @@ public BytesReference buildTsidHash() { // NOTE: hash all dimension field allValues tsidHasher.reset(); - for (final Dimension dimension : dimensions) { - tsidHasher.update(dimension.value.toBytesRef().bytes); + for (final List values : dimensions.values()) { + for (BytesReference v : values) { + tsidHasher.update(v.toBytesRef().bytes); + } } tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex); @@ -368,8 +378,20 @@ public DocumentDimensions validate(final IndexSettings settings) { } private void add(String fieldName, BytesReference encoded) throws IOException { - if (dimensions.add(new Dimension(new BytesRef(fieldName), encoded)) == false) { - throw new IllegalArgumentException("Dimension field [" + fieldName + "] cannot be a multi-valued field."); + BytesRef name = new BytesRef(fieldName); + List values = dimensions.get(name); + if (values == null) { + // optimize for the common case where dimensions are not multi-valued + dimensions.put(name, List.of(encoded)); + } else { + if (values.size() == 1) { + // converts the immutable list that's optimized for the common case of having only one value to a mutable list + BytesReference previousValue = values.get(0); + values = new ArrayList<>(4); + values.add(previousValue); + dimensions.put(name, values); + } + values.add(encoded); } } } diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java index 6ac8e6a853f27..e39ccdf7af5e2 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java @@ -595,7 +595,32 @@ public void testRoutingPathBooleansInSource() throws IOException { IndexRouting routing = indexRoutingForPath(shards, "foo"); assertIndexShard(routing, Map.of("foo", true), Math.floorMod(hash(List.of("foo", "true")), shards)); assertIndexShard(routing, Map.of("foo", false), Math.floorMod(hash(List.of("foo", "false")), shards)); + } + public void testRoutingPathArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a,b,c,d"); + assertIndexShard( + routing, + Map.of("c", List.of(true), "d", List.of(), "a", List.of("foo", "bar", "foo"), "b", List.of(21, 42)), + // Note that the fields are sorted + Math.floorMod(hash(List.of("a", "foo", "a", "bar", "a", "foo", "b", "21", "b", "42", "c", "true")), shards) + ); + } + + public void testRoutingPathObjectArraysInSource() throws IOException { + int shards = between(2, 1000); + IndexRouting routing = indexRoutingForPath(shards, "a"); + + BytesReference source = source(Map.of("a", List.of("foo", Map.of("foo", "bar")))); + Exception e = expectThrows( + IllegalArgumentException.class, + () -> routing.indexShard(randomAlphaOfLength(5), null, XContentType.JSON, source, s -> {}) + ); + assertThat( + e.getMessage(), + equalTo("Error extracting routing: Failed to parse object: expecting value token but found [START_OBJECT]") + ); } public void testRoutingPathBwc() throws IOException { @@ -668,7 +693,11 @@ private void assertIndexShard(IndexRouting routing, Map source, IndexRouting.ExtractFromSource.Builder b = r.builder(); for (Map.Entry e : flattened.entrySet()) { - b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + if (e.getValue() instanceof List listValue) { + listValue.forEach(v -> b.addMatching(e.getKey(), new BytesRef(v.toString()))); + } else { + b.addMatching(e.getKey(), new BytesRef(e.getValue().toString())); + } } String idFromBuilder = b.createId(suffix, () -> { throw new AssertionError(); }); assertThat(idFromBuilder, equalTo(idFromSource)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java index b0a9a9e9fe7dc..83553503c3c5e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/BooleanFieldMapperTests.java @@ -268,8 +268,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", true, false)))); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", true, false); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java index f92df7a421cf9..1b8a2d68cd930 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IpFieldMapperTests.java @@ -266,11 +266,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", "192.168.1.1", "192.168.1.1"))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", "192.168.1.1", "192.168.1.1"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 2d2fad23b3831..5b218fb077d32 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -384,8 +384,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows(DocumentParsingException.class, () -> mapper.parse(source(b -> b.array("field", "1234", "45678")))); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", "1234", "45678"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java index b27b8bd9766f3..9d56938f185de 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapperTests.java @@ -28,10 +28,13 @@ import org.elasticsearch.xcontent.XContentType; import java.io.IOException; +import java.util.List; +import java.util.stream.Collectors; import static org.elasticsearch.index.seqno.SequenceNumbers.UNASSIGNED_SEQ_NO; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.not; import static org.hamcrest.Matchers.nullValue; @@ -63,13 +66,19 @@ protected IndexVersion getVersion() { } private DocumentMapper createDocumentMapper(String routingPath, XContentBuilder mappings) throws IOException { + return createDocumentMapper(getVersion(), routingPath, mappings); + } + + private DocumentMapper createDocumentMapper(IndexVersion version, String routingPath, XContentBuilder mappings) throws IOException { return createMapperService( + version, getIndexSettingsBuilder().put(IndexSettings.MODE.getKey(), IndexMode.TIME_SERIES.name()) .put(MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING.getKey(), 200) // Allow tests that use many dimensions .put(IndexMetadata.INDEX_ROUTING_PATH.getKey(), routingPath) .put(IndexSettings.TIME_SERIES_START_TIME.getKey(), "2021-04-28T00:00:00Z") .put(IndexSettings.TIME_SERIES_END_TIME.getKey(), "2021-10-29T00:00:00Z") .build(), + () -> true, mappings ).documentMapper(); } @@ -644,6 +653,44 @@ public void testDifferentDimensions() throws IOException { assertThat(doc1.rootDoc().getBinaryValue("_tsid").bytes, not(doc2.rootDoc().getBinaryValue("_tsid").bytes)); } + public void testMultiValueDimensionsNotSupportedBeforeTsidHashing() throws IOException { + IndexVersion priorToTsidHashing = IndexVersionUtils.getPreviousVersion(IndexVersions.TIME_SERIES_ID_HASHING); + DocumentMapper docMapper = createDocumentMapper( + priorToTsidHashing, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + CheckedConsumer fields = d -> d.field("a", new String[] { a1, a2 }); + DocumentParsingException exception = assertThrows(DocumentParsingException.class, () -> parseDocument(docMapper, fields)); + assertThat(exception.getMessage(), containsString("Dimension field [a] cannot be a multi-valued field")); + } + + public void testMultiValueDimensions() throws IOException { + DocumentMapper docMapper = createDocumentMapper( + IndexVersions.TIME_SERIES_ID_HASHING, + "a", + mapping(b -> b.startObject("a").field("type", "keyword").field("time_series_dimension", true).endObject()) + ); + + String a1 = randomAlphaOfLength(10); + String a2 = randomAlphaOfLength(10); + List docs = List.of( + parseDocument(docMapper, d -> d.field("a", new String[] { a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a1, a2, a1 })), + parseDocument(docMapper, d -> d.field("a", new String[] { a2, a1, a2 })) + ); + List tsids = docs.stream() + .map(doc -> doc.rootDoc().getBinaryValue("_tsid").toString()) + .distinct() + .collect(Collectors.toList()); + assertThat(tsids, hasSize(docs.size())); + } + /** * Documents with fewer dimensions have a different value. */ diff --git a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java index 2f245a319f8cc..285431b881add 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/flattened/FlattenedFieldMapperTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.mapper.MapperTestCase; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.SourceToParse; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper.KeyedFlattenedFieldType; import org.elasticsearch.index.mapper.flattened.FlattenedFieldMapper.RootFlattenedFieldType; import org.elasticsearch.xcontent.XContentBuilder; @@ -200,11 +201,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimensions", List.of("key1", "key2", "field3.key3")); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field.key1", "value1", "value2"))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field.key1] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field.key1", "value1", "value2"); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java index bb17983264072..a297f5d13254b 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/WholeNumberFieldMapperTests.java @@ -80,11 +80,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", randomNumber(), randomNumber(), randomNumber()))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", randomNumber(), randomNumber(), randomNumber()); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java index cdb25fbe995b2..f554a84048fde 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java @@ -21,6 +21,7 @@ import org.elasticsearch.index.mapper.NumberTypeOutOfRangeSpec; import org.elasticsearch.index.mapper.ParsedDocument; import org.elasticsearch.index.mapper.TimeSeriesParams; +import org.elasticsearch.index.mapper.TimeSeriesRoutingHashFieldMapper; import org.elasticsearch.index.mapper.WholeNumberFieldMapperTests; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.xcontent.XContentBuilder; @@ -268,11 +269,11 @@ public void testDimensionMultiValuedFieldTSDB() throws IOException { b.field("time_series_dimension", true); }), IndexMode.TIME_SERIES); - Exception e = expectThrows( - DocumentParsingException.class, - () -> mapper.parse(source(b -> b.array("field", randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()))) - ); - assertThat(e.getCause().getMessage(), containsString("Dimension field [field] cannot be a multi-valued field")); + ParsedDocument doc = mapper.parse(source(null, b -> { + b.array("field", randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong()); + b.field("@timestamp", Instant.now()); + }, TimeSeriesRoutingHashFieldMapper.encode(randomInt()))); + assertThat(doc.docs().get(0).getFields("field"), hasSize(greaterThan(1))); } public void testDimensionMultiValuedFieldNonTSDB() throws IOException { diff --git a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml index 89ff28249aabb..a4413d266181d 100644 --- a/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml +++ b/x-pack/plugin/otel-data/src/main/resources/index-templates/metrics-otel@template.yaml @@ -28,6 +28,11 @@ template: type: constant_keyword value: metrics dynamic_templates: + - ecs_ip: + mapping: + type: ip + path_match: [ "ip", "*.ip", "*_ip" ] + match_mapping_type: string - all_strings_to_keywords: mapping: ignore_above: 1024 diff --git a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml index a6591d6c32210..1823dfab7e716 100644 --- a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml +++ b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_metrics_tests.yml @@ -123,11 +123,10 @@ setup: start_time: 2024-07-01T13:03:08.138Z mappings: dynamic_templates: - - ip_fields: + - no_ip_fields: mapping: - type: ip + type: keyword match_mapping_type: string - path_match: "*.ip" - do: bulk: index: metrics-generic.otel-default @@ -145,5 +144,37 @@ setup: indices.get_mapping: index: $idx0name expand_wildcards: hidden - - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.host\.ip.type: 'keyword' } - match: { .$idx0name.mappings.properties.attributes.properties.foo.type: "keyword" } +--- +IP dimensions: + - requires: + cluster_features: ["routing.multi_value_routing_path"] + reason: support for multi-value dimensions + - do: + bulk: + index: metrics-generic.otel-default + refresh: true + body: + - create: {"dynamic_templates":{"metrics.foo.bar":"counter_long"}} + - "@timestamp": 2024-07-18T14:48:33.467654000Z + resource: + attributes: + host.ip: [ "127.0.0.1", "0.0.0.0" ] + attributes: + philip: [ a, b, c ] + metrics: + foo.bar: 42 + - is_false: errors + + - do: + indices.get_data_stream: + name: metrics-generic.otel-default + - set: { data_streams.0.indices.0.index_name: idx0name } + + - do: + indices.get_mapping: + index: $idx0name + expand_wildcards: hidden + - match: { .$idx0name.mappings.properties.resource.properties.attributes.properties.host\.ip.type: 'ip' } + - match: { .$idx0name.mappings.properties.attributes.properties.philip.type: "keyword" }