|
| 1 | +/* |
| 2 | + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one |
| 3 | + * or more contributor license agreements. Licensed under the "Elastic License |
| 4 | + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side |
| 5 | + * Public License v 1"; you may not use this file except in compliance with, at |
| 6 | + * your election, the "Elastic License 2.0", the "GNU Affero General Public |
| 7 | + * License v3.0 only", or the "Server Side Public License, v 1". |
| 8 | + */ |
| 9 | + |
| 10 | +package org.elasticsearch.cluster.routing; |
| 11 | + |
| 12 | +import org.apache.lucene.util.BytesRef; |
| 13 | +import org.elasticsearch.common.hash.Murmur3Hasher; |
| 14 | +import org.elasticsearch.common.hash.MurmurHash3; |
| 15 | +import org.elasticsearch.common.util.ByteUtils; |
| 16 | +import org.elasticsearch.index.mapper.RoutingPathFields; |
| 17 | +import org.elasticsearch.xcontent.XContentString; |
| 18 | + |
| 19 | +import java.util.ArrayList; |
| 20 | +import java.util.Collections; |
| 21 | +import java.util.List; |
| 22 | + |
| 23 | +/** |
| 24 | + * A builder for creating time series identifiers (TSIDs) based on dimensions. |
| 25 | + * This class allows adding various types of dimensions (int, long, double, boolean, string, bytes) |
| 26 | + * and builds a TSID that is a hash of the dimension names and values. |
| 27 | + * Important properties of TSIDs are that they cluster similar time series together, |
| 28 | + * which helps with storage efficiency, |
| 29 | + * and that they minimize the risk of hash collisions. |
| 30 | + * At the same time, they should be short to be efficient in terms of storage and processing. |
| 31 | + */ |
| 32 | +public class TsidBuilder { |
| 33 | + |
| 34 | + private static final int MAX_TSID_VALUE_FIELDS = 16; |
| 35 | + private final Murmur3Hasher murmur3Hasher = new Murmur3Hasher(0L); |
| 36 | + |
| 37 | + private final List<Dimension> dimensions = new ArrayList<>(); |
| 38 | + |
| 39 | + public static TsidBuilder newBuilder() { |
| 40 | + return new TsidBuilder(); |
| 41 | + } |
| 42 | + |
| 43 | + /** |
| 44 | + * Adds an integer dimension to the TSID. |
| 45 | + * |
| 46 | + * @param path the path of the dimension |
| 47 | + * @param value the integer value of the dimension |
| 48 | + * @return the TsidBuilder instance for method chaining |
| 49 | + */ |
| 50 | + public TsidBuilder addIntDimension(String path, int value) { |
| 51 | + addDimension(path, new MurmurHash3.Hash128(1, value)); |
| 52 | + return this; |
| 53 | + } |
| 54 | + |
| 55 | + /** |
| 56 | + * Adds a long dimension to the TSID. |
| 57 | + * |
| 58 | + * @param path the path of the dimension |
| 59 | + * @param value the long value of the dimension |
| 60 | + * @return the TsidBuilder instance for method chaining |
| 61 | + */ |
| 62 | + public TsidBuilder addLongDimension(String path, long value) { |
| 63 | + addDimension(path, new MurmurHash3.Hash128(1, value)); |
| 64 | + return this; |
| 65 | + } |
| 66 | + |
| 67 | + /** |
| 68 | + * Adds a double dimension to the TSID. |
| 69 | + * |
| 70 | + * @param path the path of the dimension |
| 71 | + * @param value the double value of the dimension |
| 72 | + * @return the TsidBuilder instance for method chaining |
| 73 | + */ |
| 74 | + public TsidBuilder addDoubleDimension(String path, double value) { |
| 75 | + addDimension(path, new MurmurHash3.Hash128(2, Double.doubleToLongBits(value))); |
| 76 | + return this; |
| 77 | + } |
| 78 | + |
| 79 | + /** |
| 80 | + * Adds a boolean dimension to the TSID. |
| 81 | + * |
| 82 | + * @param path the path of the dimension |
| 83 | + * @param value the boolean value of the dimension |
| 84 | + * @return the TsidBuilder instance for method chaining |
| 85 | + */ |
| 86 | + public TsidBuilder addBooleanDimension(String path, boolean value) { |
| 87 | + addDimension(path, new MurmurHash3.Hash128(3, value ? 1 : 0)); |
| 88 | + return this; |
| 89 | + } |
| 90 | + |
| 91 | + /** |
| 92 | + * Adds a string dimension to the TSID. |
| 93 | + * |
| 94 | + * @param path the path of the dimension |
| 95 | + * @param value the string value of the dimension |
| 96 | + * @return the TsidBuilder instance for method chaining |
| 97 | + */ |
| 98 | + public TsidBuilder addStringDimension(String path, String value) { |
| 99 | + addStringDimension(path, new BytesRef(value)); |
| 100 | + return this; |
| 101 | + } |
| 102 | + |
| 103 | + private void addStringDimension(String path, BytesRef value) { |
| 104 | + addStringDimension(path, value.bytes, value.offset, value.length); |
| 105 | + } |
| 106 | + |
| 107 | + /** |
| 108 | + * Adds a string dimension to the TSID. |
| 109 | + * |
| 110 | + * @param path the path of the dimension |
| 111 | + * @param value the UTF8Bytes value of the dimension |
| 112 | + * @return the TsidBuilder instance for method chaining |
| 113 | + */ |
| 114 | + public TsidBuilder addStringDimension(String path, XContentString.UTF8Bytes value) { |
| 115 | + addStringDimension(path, value.bytes(), value.offset(), value.length()); |
| 116 | + return this; |
| 117 | + } |
| 118 | + |
| 119 | + /** |
| 120 | + * Adds a string dimension to the TSID using a byte array. |
| 121 | + * The value is provided as UTF-8 encoded bytes[]. |
| 122 | + * |
| 123 | + * @param path the path of the dimension |
| 124 | + * @param utf8Bytes the UTF-8 encoded bytes of the string value |
| 125 | + * @param offset the offset in the byte array where the string starts |
| 126 | + * @param length the length of the string in bytes |
| 127 | + * @return the TsidBuilder instance for method chaining |
| 128 | + */ |
| 129 | + public TsidBuilder addStringDimension(String path, byte[] utf8Bytes, int offset, int length) { |
| 130 | + murmur3Hasher.reset(); |
| 131 | + murmur3Hasher.update(utf8Bytes, offset, length); |
| 132 | + MurmurHash3.Hash128 hash128 = murmur3Hasher.digestHash(); |
| 133 | + addDimension(path, hash128); |
| 134 | + return this; |
| 135 | + } |
| 136 | + |
| 137 | + /** |
| 138 | + * Adds a value to the TSID using a funnel. |
| 139 | + * This allows for complex types to be added to the TSID. |
| 140 | + * |
| 141 | + * @param value the value to add |
| 142 | + * @param funnel the funnel that describes how to add the value |
| 143 | + * @param <T> the type of the value |
| 144 | + * @return the TsidBuilder instance for method chaining |
| 145 | + */ |
| 146 | + public <T> TsidBuilder add(T value, TsidFunnel<T> funnel) { |
| 147 | + funnel.add(value, this); |
| 148 | + return this; |
| 149 | + } |
| 150 | + |
| 151 | + /** |
| 152 | + * Adds a value to the TSID using a funnel that can throw exceptions. |
| 153 | + * This allows for complex types to be added to the TSID. |
| 154 | + * |
| 155 | + * @param value the value to add |
| 156 | + * @param funnel the funnel that describes how to add the value |
| 157 | + * @param <T> the type of the value |
| 158 | + * @param <E> the type of exception that can be thrown |
| 159 | + * @return the TsidBuilder instance for method chaining |
| 160 | + * @throws E if an exception occurs while adding the value |
| 161 | + */ |
| 162 | + public <T, E extends Exception> TsidBuilder add(T value, ThrowingTsidFunnel<T, E> funnel) throws E { |
| 163 | + funnel.add(value, this); |
| 164 | + return this; |
| 165 | + } |
| 166 | + |
| 167 | + private void addDimension(String path, MurmurHash3.Hash128 valueHash) { |
| 168 | + murmur3Hasher.reset(); |
| 169 | + addString(murmur3Hasher, path); |
| 170 | + MurmurHash3.Hash128 pathHash = murmur3Hasher.digestHash(); |
| 171 | + dimensions.add(new Dimension(path, pathHash, valueHash, dimensions.size())); |
| 172 | + } |
| 173 | + |
| 174 | + /** |
| 175 | + * Adds all dimensions from another TsidBuilder to this one. |
| 176 | + * If the other builder is null or has no dimensions, this method does nothing. |
| 177 | + * |
| 178 | + * @param other the other TsidBuilder to add dimensions from |
| 179 | + * @return this TsidBuilder instance for method chaining |
| 180 | + */ |
| 181 | + public TsidBuilder addAll(TsidBuilder other) { |
| 182 | + if (other == null || other.dimensions.isEmpty()) { |
| 183 | + return this; |
| 184 | + } |
| 185 | + dimensions.addAll(other.dimensions); |
| 186 | + return this; |
| 187 | + } |
| 188 | + |
| 189 | + /** |
| 190 | + * Computes the hash of the dimensions added to this builder. |
| 191 | + * The hash is a 128-bit value that is computed based on the dimension names and values. |
| 192 | + * |
| 193 | + * @return a HashValue128 representing the hash of the dimensions |
| 194 | + * @throws IllegalArgumentException if no dimensions have been added |
| 195 | + */ |
| 196 | + public MurmurHash3.Hash128 hash() { |
| 197 | + throwIfEmpty(); |
| 198 | + Collections.sort(dimensions); |
| 199 | + murmur3Hasher.reset(); |
| 200 | + for (Dimension dim : dimensions) { |
| 201 | + addLongs(murmur3Hasher, dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2); |
| 202 | + } |
| 203 | + return murmur3Hasher.digestHash(); |
| 204 | + } |
| 205 | + |
| 206 | + /** |
| 207 | + * Builds a time series identifier (TSID) based on the dimensions added to this builder. |
| 208 | + * This is a slight adaptation of {@link RoutingPathFields#buildHash()} but creates shorter tsids. |
| 209 | + * The TSID is a hash that includes: |
| 210 | + * <ul> |
| 211 | + * <li> |
| 212 | + * A hash of the dimension field names (4 bytes). |
| 213 | + * This is to cluster time series that are using the same dimensions together, which makes the encodings more effective. |
| 214 | + * </li> |
| 215 | + * <li> |
| 216 | + * A hash of the dimension field values (1 byte each, up to a maximum of 16 fields). |
| 217 | + * This is to cluster time series with similar values together, also helping with making encodings more effective. |
| 218 | + * </li> |
| 219 | + * <li> |
| 220 | + * A hash of all names and values combined (16 bytes). |
| 221 | + * This is to avoid hash collisions. |
| 222 | + * </li> |
| 223 | + * </ul> |
| 224 | + * |
| 225 | + * @return a BytesRef containing the TSID |
| 226 | + * @throws IllegalArgumentException if no dimensions have been added |
| 227 | + */ |
| 228 | + public BytesRef buildTsid() { |
| 229 | + throwIfEmpty(); |
| 230 | + int numberOfValues = Math.min(MAX_TSID_VALUE_FIELDS, dimensions.size()); |
| 231 | + byte[] hash = new byte[4 + numberOfValues + 16]; |
| 232 | + int index = 0; |
| 233 | + |
| 234 | + Collections.sort(dimensions); |
| 235 | + |
| 236 | + MurmurHash3.Hash128 hashBuffer = new MurmurHash3.Hash128(); |
| 237 | + murmur3Hasher.reset(); |
| 238 | + for (int i = 0; i < dimensions.size(); i++) { |
| 239 | + Dimension dim = dimensions.get(i); |
| 240 | + addLong(murmur3Hasher, dim.pathHash.h1 ^ dim.pathHash.h2); |
| 241 | + } |
| 242 | + ByteUtils.writeIntLE((int) murmur3Hasher.digestHash(hashBuffer).h1, hash, index); |
| 243 | + index += 4; |
| 244 | + |
| 245 | + // similarity hash for values |
| 246 | + String previousPath = null; |
| 247 | + for (int i = 0; i < numberOfValues; i++) { |
| 248 | + Dimension dim = dimensions.get(i); |
| 249 | + String path = dim.path(); |
| 250 | + if (path.equals(previousPath)) { |
| 251 | + // only add the first value for array fields |
| 252 | + continue; |
| 253 | + } |
| 254 | + MurmurHash3.Hash128 valueHash = dim.valueHash(); |
| 255 | + murmur3Hasher.reset(); |
| 256 | + addLong(murmur3Hasher, valueHash.h1 ^ valueHash.h2); |
| 257 | + hash[index++] = (byte) murmur3Hasher.digestHash(hashBuffer).h1; |
| 258 | + previousPath = path; |
| 259 | + } |
| 260 | + |
| 261 | + murmur3Hasher.reset(); |
| 262 | + for (int i = 0; i < dimensions.size(); i++) { |
| 263 | + Dimension dim = dimensions.get(i); |
| 264 | + addLongs(murmur3Hasher, dim.pathHash.h1, dim.pathHash.h2, dim.valueHash.h1, dim.valueHash.h2); |
| 265 | + } |
| 266 | + index = writeHash128(murmur3Hasher.digestHash(hashBuffer), hash, index); |
| 267 | + return new BytesRef(hash, 0, index); |
| 268 | + } |
| 269 | + |
| 270 | + private void throwIfEmpty() { |
| 271 | + if (dimensions.isEmpty()) { |
| 272 | + throw new IllegalArgumentException("Dimensions are empty"); |
| 273 | + } |
| 274 | + } |
| 275 | + |
| 276 | + private static int writeHash128(MurmurHash3.Hash128 hash128, byte[] buffer, int index) { |
| 277 | + ByteUtils.writeLongLE(hash128.h2, buffer, index); |
| 278 | + index += 8; |
| 279 | + ByteUtils.writeLongLE(hash128.h1, buffer, index); |
| 280 | + index += 8; |
| 281 | + return index; |
| 282 | + } |
| 283 | + |
| 284 | + /** |
| 285 | + * A functional interface that describes how objects of a complex type are added to a TSID. |
| 286 | + * |
| 287 | + * @param <T> the type of the value |
| 288 | + */ |
| 289 | + @FunctionalInterface |
| 290 | + public interface TsidFunnel<T> { |
| 291 | + void add(T value, TsidBuilder tsidBuilder); |
| 292 | + } |
| 293 | + |
| 294 | + /** |
| 295 | + * A functional interface that describes how objects of a complex type are added to a TSID, |
| 296 | + * allowing for exceptions to be thrown during the process. |
| 297 | + * |
| 298 | + * @param <T> the type of the value |
| 299 | + * @param <E> the type of exception that can be thrown |
| 300 | + */ |
| 301 | + @FunctionalInterface |
| 302 | + public interface ThrowingTsidFunnel<T, E extends Exception> { |
| 303 | + void add(T value, TsidBuilder tsidBuilder) throws E; |
| 304 | + } |
| 305 | + |
| 306 | + private record Dimension(String path, MurmurHash3.Hash128 pathHash, MurmurHash3.Hash128 valueHash, int insertionOrder) |
| 307 | + implements |
| 308 | + Comparable<Dimension> { |
| 309 | + @Override |
| 310 | + public int compareTo(Dimension o) { |
| 311 | + int i = path.compareTo(o.path); |
| 312 | + if (i != 0) return i; |
| 313 | + // ensures array values are in the order as they appear in the source |
| 314 | + return Integer.compare(insertionOrder, o.insertionOrder); |
| 315 | + } |
| 316 | + } |
| 317 | + |
| 318 | + // these methods will be replaced with a more optimized version when https://github.com/elastic/elasticsearch/pull/133226 is merged |
| 319 | + |
| 320 | + private static void addString(Murmur3Hasher murmur3Hasher, String path) { |
| 321 | + BytesRef bytesRef = new BytesRef(path); |
| 322 | + murmur3Hasher.update(bytesRef.bytes, bytesRef.offset, bytesRef.length); |
| 323 | + } |
| 324 | + |
| 325 | + private static void addLong(Murmur3Hasher murmur3Hasher, long value) { |
| 326 | + byte[] bytes = new byte[8]; |
| 327 | + ByteUtils.writeLongLE(value, bytes, 0); |
| 328 | + murmur3Hasher.update(bytes); |
| 329 | + } |
| 330 | + |
| 331 | + private static void addLongs(Murmur3Hasher murmur3Hasher, long v1, long v2) { |
| 332 | + byte[] bytes = new byte[16]; |
| 333 | + ByteUtils.writeLongLE(v1, bytes, 0); |
| 334 | + ByteUtils.writeLongLE(v2, bytes, 8); |
| 335 | + murmur3Hasher.update(bytes); |
| 336 | + } |
| 337 | + |
| 338 | + private static void addLongs(Murmur3Hasher murmur3Hasher, long v1, long v2, long v3, long v4) { |
| 339 | + byte[] bytes = new byte[32]; |
| 340 | + ByteUtils.writeLongLE(v1, bytes, 0); |
| 341 | + ByteUtils.writeLongLE(v2, bytes, 8); |
| 342 | + ByteUtils.writeLongLE(v3, bytes, 16); |
| 343 | + ByteUtils.writeLongLE(v4, bytes, 24); |
| 344 | + murmur3Hasher.update(bytes); |
| 345 | + } |
| 346 | +} |
0 commit comments