Save allocations and copying in TimeSeriesIdFieldMapper#buildTsidHash (#105582)

original-brownbear · web-flow · commit eeff6184519e · 2024-02-19T21:50:16.000+01:00
No point in copying the bytes multiple times here. Just presize the
array correctly (at most wasting a single byte) and serialize into it.
Saving a couple GB of allocations during the TSDB rally track indexing
step.
diff --git a/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java
@@ -216,7 +216,7 @@ public void writeVInt(int i) throws IOException {
         writeBytes(buffer, 0, index);
     }
 
-    private static int putVInt(byte[] buffer, int i, int off) {
+    public static int putVInt(byte[] buffer, int i, int off) {
         if (Integer.numberOfLeadingZeros(i) >= 25) {
             buffer[off] = (byte) i;
             return 1;
diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java
@@ -19,6 +19,7 @@
 import org.elasticsearch.common.hash.MurmurHash3;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.network.NetworkAddress;
 import org.elasticsearch.common.util.ByteUtils;
 import org.elasticsearch.core.Nullable;
@@ -208,6 +209,12 @@ public BytesReference buildLegacyTsid() throws IOException {
             }
         }
 
+        private static final int MAX_HASH_LEN_BYTES = 2;
+
+        static {
+            assert MAX_HASH_LEN_BYTES == StreamOutput.putVInt(new byte[2], tsidHashLen(MAX_DIMENSIONS), 0);
+        }
+
         /**
          * Here we build the hash of the tsid using a similarity function so that we have a result
          * with the following pattern:
@@ -219,11 +226,13 @@ public BytesReference buildLegacyTsid() throws IOException {
          * The idea is to be able to place 'similar' time series close to each other. Two time series
          * are considered 'similar' if they share the same dimensions (names and values).
          */
-        public BytesReference buildTsidHash() throws IOException {
+        public BytesReference buildTsidHash() {
             // NOTE: hash all dimension field names
             int numberOfDimensions = Math.min(MAX_DIMENSIONS, dimensions.size());
-            int tsidHashIndex = 0;
-            byte[] tsidHash = new byte[16 + 16 + 4 * numberOfDimensions];
+            int len = tsidHashLen(numberOfDimensions);
+            // either one or two bytes are occupied by the vint since we're bounded by #MAX_DIMENSIONS
+            byte[] tsidHash = new byte[MAX_HASH_LEN_BYTES + len];
+            int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0);
 
             tsidHasher.reset();
             for (final Dimension dimension : dimensions) {
@@ -258,11 +267,11 @@ public BytesReference buildTsidHash() throws IOException {
             }
             tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex);
 
-            assert tsidHashIndex == tsidHash.length;
-            try (BytesStreamOutput out = new BytesStreamOutput(tsidHash.length)) {
-                out.writeBytesRef(new BytesRef(tsidHash, 0, tsidHash.length));
-                return out.bytes();
-            }
+            return new BytesArray(tsidHash, 0, tsidHashIndex);
+        }
+
+        private static int tsidHashLen(int numberOfDimensions) {
+            return 16 + 16 + 4 * numberOfDimensions;
         }
 
         private int writeHash128(final MurmurHash3.Hash128 hash128, byte[] buffer, int tsidHashIndex) {

Original file line number	Diff line number	Diff line change
`@@ -216,7 +216,7 @@ public void writeVInt(int i) throws IOException {`
`216`	`216`	`writeBytes(buffer, 0, index);`
`217`	`217`	`}`
`218`	`218`
`219`		`- private static int putVInt(byte[] buffer, int i, int off) {`
	`219`	`+ public static int putVInt(byte[] buffer, int i, int off) {`
`220`	`220`	`if (Integer.numberOfLeadingZeros(i) >= 25) {`
`221`	`221`	`buffer[off] = (byte) i;`
`222`	`222`	`return 1;`