Skip to content

Commit 9846b8e

Browse files
authored
Introduce single prefix byte TSID layout (elastic#143955)
The current TSID layout: ``` byte 0 = hash(dimension_names) bytes 1–4 = hash(value_0) … hash(value_3) bytes 5–20 = 128-bit hash (uniqueness) ``` The first 5 bytes are derived from dimension names and a subset of values. Since these are often static for a given metric, partitioning by prefix typically yields only a single partition. This change introduces a 16-byte TSID layout with a single prefix byte to better partition time-series of a single metric: ``` byte 0 = hash(metric_name) for OTel, hash(labels.__name__) for Prometheus, hash(all dimension names and values) for generic TSDB bytes 1–15 = 15-byte hash (uniqueness + within-metric ordering) --- Total: 16 bytes (2 longs) ``` The new layout groups time-series by metric, enabling partitioned rate aggregations across at least 256 partitions.
1 parent 26e62f9 commit 9846b8e

File tree

12 files changed

+302
-74
lines changed

12 files changed

+302
-74
lines changed

server/src/main/java/org/elasticsearch/cluster/routing/IndexRouting.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ public BytesRef buildTsid(XContentType sourceType, BytesReference source) {
581581
} catch (IOException | ParsingException e) {
582582
throw new IllegalArgumentException("Error extracting tsid: " + e.getMessage(), e);
583583
}
584-
return b.buildTsid();
584+
return b.buildTsid(creationVersion);
585585
}
586586
}
587587
}

server/src/main/java/org/elasticsearch/cluster/routing/TsidBuilder.java

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
import org.elasticsearch.common.hash.BufferedMurmur3Hasher;
1414
import org.elasticsearch.common.hash.MurmurHash3;
1515
import org.elasticsearch.common.util.ByteUtils;
16+
import org.elasticsearch.common.util.FeatureFlag;
17+
import org.elasticsearch.index.IndexVersion;
18+
import org.elasticsearch.index.IndexVersions;
1619
import org.elasticsearch.index.mapper.RoutingPathFields;
1720
import org.elasticsearch.xcontent.XContentString;
1821

@@ -31,12 +34,17 @@
3134
*/
3235
public class TsidBuilder {
3336

37+
public static final boolean SINGLE_PREFIX_BYTE_ENABLED = new FeatureFlag("tsid_layout_single_prefix_byte").isEnabled();
38+
3439
/**
3540
* The maximum number of fields to use for the value similarity part of the TSID.
3641
* This is a trade-off between clustering similar time series together and the size of the TSID.
3742
* More fields improve clustering but also increase the size of the TSID.
3843
*/
3944
private static final int MAX_TSID_VALUE_SIMILARITY_FIELDS = 4;
45+
static final String OTEL_METRIC_FIELD = "_metric_names_hash";
46+
static final String PROMETHEUS_LABEL_FIELD = "labels.__name__";
47+
4048
private final BufferedMurmur3Hasher murmur3Hasher = new BufferedMurmur3Hasher(0L);
4149

4250
private final List<Dimension> dimensions;
@@ -215,6 +223,18 @@ public MurmurHash3.Hash128 hash() {
215223
return murmur3Hasher.digestHash();
216224
}
217225

226+
public final BytesRef buildTsid(IndexVersion indexVersion) {
227+
if (useSingleBytePrefixLayout(indexVersion)) {
228+
return buildSingleBytePrefixTsid();
229+
} else {
230+
return buildMultiBytePrefixTsid();
231+
}
232+
}
233+
234+
public static boolean useSingleBytePrefixLayout(IndexVersion indexVersion) {
235+
return SINGLE_PREFIX_BYTE_ENABLED && indexVersion.onOrAfter(IndexVersions.TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG);
236+
}
237+
218238
/**
219239
* Builds a time series identifier (TSID) based on the dimensions added to this builder.
220240
* This is a slight adaptation of {@link RoutingPathFields#buildHash()} but creates shorter tsids.
@@ -233,18 +253,16 @@ public MurmurHash3.Hash128 hash() {
233253
* This is to avoid hash collisions.
234254
* </li>
235255
* </ul>
236-
*
237-
* @return a BytesRef containing the TSID
238-
* @throws IllegalArgumentException if no dimensions have been added
256+
* Note that this layout has been used with indices created before {@link IndexVersions#TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG}
239257
*/
240-
public BytesRef buildTsid() {
258+
private BytesRef buildMultiBytePrefixTsid() {
241259
throwIfEmpty();
260+
Collections.sort(dimensions);
261+
242262
int numberOfValues = Math.min(MAX_TSID_VALUE_SIMILARITY_FIELDS, dimensions.size());
243263
byte[] hash = new byte[1 + numberOfValues + 16];
244264
int index = 0;
245265

246-
Collections.sort(dimensions);
247-
248266
MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128();
249267
murmur3Hasher.reset();
250268
// similarity hash for dimension names
@@ -280,6 +298,55 @@ public BytesRef buildTsid() {
280298
return new BytesRef(hash, 0, index);
281299
}
282300

301+
private BytesRef buildSingleBytePrefixTsid() {
302+
throwIfEmpty();
303+
Collections.sort(dimensions);
304+
305+
final byte[] tsid = new byte[16];
306+
murmur3Hasher.reset();
307+
MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128();
308+
// hash of all dimension names and values for uniqueness
309+
for (Dimension dim : dimensions) {
310+
murmur3Hasher.addLongs(dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2);
311+
}
312+
murmur3Hasher.digestHash(hashBuffer);
313+
ByteUtils.writeLongLE(hashBuffer.h2, tsid, 0);
314+
ByteUtils.writeLongLE(hashBuffer.h1, tsid, 8);
315+
tsid[0] = computeSingleBytePrefix(hashBuffer);
316+
return new BytesRef(tsid);
317+
}
318+
319+
private byte computeSingleBytePrefix(MurmurHash3.Hash128 scratch) {
320+
murmur3Hasher.reset();
321+
Dimension otelMetric = findDimension(dimensions, OTEL_METRIC_FIELD);
322+
if (otelMetric != null) {
323+
murmur3Hasher.addLong(otelMetric.valueHash().h1 ^ otelMetric.valueHash().h2);
324+
return (byte) murmur3Hasher.digestHash(scratch).h1;
325+
}
326+
Dimension prometheusLabel = findDimension(dimensions, PROMETHEUS_LABEL_FIELD);
327+
if (prometheusLabel != null) {
328+
murmur3Hasher.addLong(prometheusLabel.valueHash().h1 ^ prometheusLabel.valueHash().h2);
329+
return (byte) murmur3Hasher.digestHash(scratch).h1;
330+
}
331+
// similarity hash for dimension names
332+
for (Dimension dim : dimensions) {
333+
murmur3Hasher.addLong(dim.pathHash.h1 ^ dim.pathHash.h2);
334+
}
335+
return (byte) murmur3Hasher.digestHash(scratch).h1;
336+
}
337+
338+
private static Dimension findDimension(List<Dimension> sortedDimensions, String name) {
339+
for (Dimension dim : sortedDimensions) {
340+
int cmp = dim.path.compareTo(name);
341+
if (cmp > 0) {
342+
return null;
343+
} else if (cmp == 0) {
344+
return dim;
345+
}
346+
}
347+
return null;
348+
}
349+
283350
private void throwIfEmpty() {
284351
if (dimensions.isEmpty()) {
285352
throw new IllegalArgumentException("Dimensions are empty");

server/src/main/java/org/elasticsearch/index/IndexVersions.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ private static Version parseUnchecked(String version) {
238238
public static final IndexVersion FLATTENED_FIELD_NO_ROOT_DOC_VALUES = def(9_077_0_00, Version.LUCENE_10_4_0);
239239
public static final IndexVersion IGNORED_SOURCE_AS_DOC_VALUES = def(9_078_0_00, Version.LUCENE_10_4_0);
240240
public static final IndexVersion TIME_SERIES_USE_SYNTHETIC_ID_DEFAULT = def(9_079_0_00, Version.LUCENE_10_4_0);
241-
241+
public static final IndexVersion TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG = def(9_080_00_0, Version.LUCENE_10_4_0);
242242
/*
243243
* STOP! READ THIS FIRST! No, really,
244244
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

server/src/test/java/org/elasticsearch/cluster/routing/IndexRoutingTests.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.index.mapper.TsidExtractingIdFieldMapper;
2929
import org.elasticsearch.index.shard.ShardId;
3030
import org.elasticsearch.test.ESTestCase;
31+
import org.elasticsearch.test.index.IndexVersionUtils;
3132
import org.elasticsearch.xcontent.DeprecationHandler;
3233
import org.elasticsearch.xcontent.NamedXContentRegistry;
3334
import org.elasticsearch.xcontent.XContentType;
@@ -686,11 +687,15 @@ public void testRoutingPathBwc() throws IOException {
686687
public void testRoutingPathBwcAfterTsidBasedRouting() throws IOException {
687688
boolean useSyntheticId = IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG && randomBoolean();
688689
TimeSeriesRoutingFixture fixture = indexRoutingForTimeSeriesDimensions(
689-
IndexVersion.current(),
690+
IndexVersionUtils.randomVersionBetween(
691+
IndexVersions.TIME_SERIES_USE_SYNTHETIC_ID_94,
692+
IndexVersionUtils.getPreviousVersion(IndexVersions.TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG)
693+
),
690694
8,
691695
"dim.*,other.*,top",
692696
useSyntheticId
693697
);
698+
assertFalse(TsidBuilder.useSingleBytePrefixLayout(fixture.routing.creationVersion));
694699
/*
695700
* These are the expected shards after tsid based routing. If these values change
696701
* time series will be routed to unexpected shards. You may modify
@@ -712,6 +717,28 @@ public void testRoutingPathBwcAfterTsidBasedRouting() throws IOException {
712717
assertIndexShard(fixture, Map.of("dim.a", "true"), 6);
713718
}
714719

720+
public void testRoutingPathWithSingleBytePrefixTsid() throws IOException {
721+
boolean useSyntheticId = IndexSettings.TSDB_SYNTHETIC_ID_FEATURE_FLAG && randomBoolean();
722+
TimeSeriesRoutingFixture fixture = indexRoutingForTimeSeriesDimensions(
723+
IndexVersionUtils.randomVersionOnOrAfter(IndexVersions.TSID_SINGLE_PREFIX_BYTE_FEATURE_FLAG),
724+
8,
725+
"dim.*,other.*,top",
726+
useSyntheticId
727+
);
728+
assumeTrue("require single-byte-prefix tsid", TsidBuilder.useSingleBytePrefixLayout(fixture.routing.creationVersion));
729+
assertIndexShard(fixture, Map.of("dim", Map.of("a", "a")), 5);
730+
assertIndexShard(fixture, Map.of("dim", Map.of("a", "b")), 3);
731+
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d")), 7);
732+
assertIndexShard(fixture, Map.of("other", Map.of("a", "a")), 1);
733+
assertIndexShard(fixture, Map.of("top", "a"), 6);
734+
assertIndexShard(fixture, Map.of("dim", Map.of("c", "d"), "top", "b"), 0);
735+
assertIndexShard(fixture, Map.of("dim.a", "a"), 5);
736+
assertIndexShard(fixture, Map.of("dim.a", 1), 2);
737+
assertIndexShard(fixture, Map.of("dim.a", "1"), 0);
738+
assertIndexShard(fixture, Map.of("dim.a", true), 3);
739+
assertIndexShard(fixture, Map.of("dim.a", "true"), 1);
740+
}
741+
715742
public void testRoutingPathReadWithInvalidString() {
716743
int shards = between(2, 1000);
717744
IndexRouting indexRouting = indexRoutingForPath(shards, "foo").routing();
@@ -1236,7 +1263,7 @@ private int expectedShard(IndexRouting routing, List<Object> keysAndValues, int
12361263
*/
12371264
private int hash(IndexRouting routing, List<Object> keysAndValues) {
12381265
if (routing instanceof IndexRouting.ExtractFromSource.ForIndexDimensions) {
1239-
return tsidBasedRoutingHash(keysAndValues);
1266+
return tsidBasedRoutingHash(keysAndValues, routing.creationVersion);
12401267
}
12411268
return legacyRoutingHash(keysAndValues);
12421269
}
@@ -1252,7 +1279,7 @@ private int legacyRoutingHash(List<Object> keysAndValues) {
12521279
return hash;
12531280
}
12541281

1255-
private static int tsidBasedRoutingHash(List<Object> keysAndValues) {
1282+
private static int tsidBasedRoutingHash(List<Object> keysAndValues, IndexVersion indexVersion) {
12561283
TsidBuilder tsidBuilder = new TsidBuilder();
12571284
for (int i = 0; i < keysAndValues.size(); i += 2) {
12581285
String key = keysAndValues.get(i).toString();
@@ -1271,6 +1298,6 @@ private static int tsidBasedRoutingHash(List<Object> keysAndValues) {
12711298
throw new IllegalArgumentException("Unsupported value type for TSID routing: " + value.getClass());
12721299
}
12731300
}
1274-
return StringHelper.murmurhash3_x86_32(tsidBuilder.buildTsid(), 0);
1301+
return StringHelper.murmurhash3_x86_32(tsidBuilder.buildTsid(indexVersion), 0);
12751302
}
12761303
}

0 commit comments

Comments
 (0)