diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/TsidBuilder.java b/server/src/main/java/org/elasticsearch/cluster/routing/TsidBuilder.java index d29dced2adb28..5757dbfa5e533 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/TsidBuilder.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/TsidBuilder.java @@ -31,7 +31,6 @@ */ public class TsidBuilder { - private static final int MAX_TSID_VALUE_FIELDS = 16; private final BufferedMurmur3Hasher murmur3Hasher = new BufferedMurmur3Hasher(0L); private final List dimensions = new ArrayList<>(); @@ -205,19 +204,21 @@ public MurmurHash3.Hash128 hash() { /** * Builds a time series identifier (TSID) based on the dimensions added to this builder. - * This is a slight adaptation of {@link RoutingPathFields#buildHash()} but creates shorter tsids. + * This is an adaptation of {@link RoutingPathFields#buildHash()} but creates shorter TSIDs with a fixed size 16 bytes. * The TSID is a hash that includes: * @@ -227,44 +228,40 @@ public MurmurHash3.Hash128 hash() { */ public BytesRef buildTsid() { throwIfEmpty(); - int numberOfValues = Math.min(MAX_TSID_VALUE_FIELDS, dimensions.size()); - byte[] hash = new byte[4 + numberOfValues + 16]; - int index = 0; + byte[] hash = new byte[16]; Collections.sort(dimensions); MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128(); murmur3Hasher.reset(); + // full hash for all dimension names and values for uniqueness for (int i = 0; i < dimensions.size(); i++) { Dimension dim = dimensions.get(i); - murmur3Hasher.addLong(dim.pathHash.h1 ^ dim.pathHash.h2); + murmur3Hasher.addLongs(dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2); } - ByteUtils.writeIntLE((int) murmur3Hasher.digestHash(hashBuffer).h1, hash, index); - index += 4; + MurmurHash3.Hash128 hash128 = murmur3Hasher.digestHash(hashBuffer); + ByteUtils.writeLongLE(hash128.h1, hash, 0); + ByteUtils.writeLongLE(hash128.h2, hash, 8); - // similarity hash for values - String previousPath = null; - for (int i = 0; i < numberOfValues; i++) { + // similarity hash for dimension names + murmur3Hasher.reset(); + for (int i = 0; i < dimensions.size(); i++) { Dimension dim = dimensions.get(i); - String path = dim.path(); - if (path.equals(previousPath)) { - // only add the first value for array fields - continue; - } - MurmurHash3.Hash128 valueHash = dim.valueHash(); - murmur3Hasher.reset(); - murmur3Hasher.addLong(valueHash.h1 ^ valueHash.h2); - hash[index++] = (byte) murmur3Hasher.digestHash(hashBuffer).h1; - previousPath = path; + murmur3Hasher.addLong(dim.pathHash.h1 ^ dim.pathHash.h2); } + hash[0] = (byte) murmur3Hasher.digestHash(hashBuffer).h1; + // similarity hash for first two dimensions murmur3Hasher.reset(); - for (int i = 0; i < dimensions.size(); i++) { - Dimension dim = dimensions.get(i); - murmur3Hasher.addLongs(dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2); + murmur3Hasher.addLong(dimensions.get(0).valueHash().hashCode()); + int valueSimilarityHash = (int) murmur3Hasher.digestHash(hashBuffer).h1 & 0x0F; + if (dimensions.size() > 1) { + murmur3Hasher.reset(); + murmur3Hasher.addLong(dimensions.get(1).valueHash().hashCode()); + valueSimilarityHash = (valueSimilarityHash << 4) | (byte) murmur3Hasher.digestHash(hashBuffer).h1 & 0x0F; } - index = writeHash128(murmur3Hasher.digestHash(hashBuffer), hash, index); - return new BytesRef(hash, 0, index); + hash[1] = (byte) valueSimilarityHash; + return new BytesRef(hash); } private void throwIfEmpty() { @@ -273,14 +270,6 @@ private void throwIfEmpty() { } } - private static int writeHash128(MurmurHash3.Hash128 hash128, byte[] buffer, int index) { - ByteUtils.writeLongLE(hash128.h2, buffer, index); - index += 8; - ByteUtils.writeLongLE(hash128.h1, buffer, index); - index += 8; - return index; - } - /** * A functional interface that describes how objects of a complex type are added to a TSID. * diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/TsidBuilderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/TsidBuilderTests.java index 6ed1eca4fa75d..48a66d2d0a8d9 100644 --- a/server/src/test/java/org/elasticsearch/cluster/routing/TsidBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/routing/TsidBuilderTests.java @@ -37,14 +37,13 @@ public void testAddDimensions() { // if these change, we'll need a new index version // because it means existing time series will get a new _tsid and will be routed to a different shard - assertThat(builder.hash().toString(), equalTo("0xd4de1356065d297a2be489781e15d256")); // used to make shard routing decisions + assertThat(builder.hash().toString(), equalTo("0xd4de1356065d297a2be489781e15d256")); BytesRef bytesRef = builder.buildTsid(); assertThat(bytesRef, notNullValue()); - // 4 bytes for path hash + 1 byte per value (up to 16, only first value for arrays) + 16 bytes for hash - assertThat(bytesRef.length, equalTo(26)); + assertThat(bytesRef.length, equalTo(16)); assertThat( HexFormat.of().formatHex(bytesRef.bytes, bytesRef.offset, bytesRef.length), - equalTo("bf438ddaa0a8d663fdbb56d2151e7889e42b7a295d065613ded4") // _tsid in hex format + equalTo("bfa45d065613ded456d2151e7889e42b") // _tsid in hex format ); } @@ -111,22 +110,13 @@ public void testExceptionWhenNoDimensions() { assertTrue(tsidException.getMessage().contains("Dimensions are empty")); } - public void testTsidMinSize() { - BytesRef tsid = TsidBuilder.newBuilder().addIntDimension("test_int", 42).buildTsid(); - - // The TSID format should be: 4 bytes for path hash + 1 byte per value (up to 16) + 16 bytes for hash - // Since we only added one dimension, we expect: 4 + 1 + 16 = 21 bytes - assertEquals(21, tsid.length); - } - - public void testTsidMaxSize() { + public void testTsidSize() { TsidBuilder tsidBuilder = TsidBuilder.newBuilder(); - for (int i = 0; i < 32; i++) { + int dimensions = randomIntBetween(1, 64); + for (int i = 0; i < dimensions; i++) { tsidBuilder.addStringDimension("dimension_" + i, "value_" + i); } - // The TSID format should be: 4 bytes for path hash + 1 byte per value (up to 16) + 16 bytes for hash - // Since we added 32 dimensions, we expect: 4 + 16 + 16 = 36 bytes - assertEquals(36, tsidBuilder.buildTsid().length); + assertEquals(16, tsidBuilder.buildTsid().length); } }