Skip to content

Commit eeff618

Browse files
Save allocations and copying in TimeSeriesIdFieldMapper#buildTsidHash (#105582)
No point in copying the bytes multiple times here. Just presize the array correctly (at most wasting a single byte) and serialize into it. Saving a couple GB of allocations during the TSDB rally track indexing step.
1 parent 9e5fe19 commit eeff618

File tree

2 files changed

+18
-9
lines changed

2 files changed

+18
-9
lines changed

server/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ public void writeVInt(int i) throws IOException {
216216
writeBytes(buffer, 0, index);
217217
}
218218

219-
private static int putVInt(byte[] buffer, int i, int off) {
219+
public static int putVInt(byte[] buffer, int i, int off) {
220220
if (Integer.numberOfLeadingZeros(i) >= 25) {
221221
buffer[off] = (byte) i;
222222
return 1;

server/src/main/java/org/elasticsearch/index/mapper/TimeSeriesIdFieldMapper.java

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.common.hash.MurmurHash3;
2020
import org.elasticsearch.common.io.stream.BytesStreamOutput;
2121
import org.elasticsearch.common.io.stream.StreamInput;
22+
import org.elasticsearch.common.io.stream.StreamOutput;
2223
import org.elasticsearch.common.network.NetworkAddress;
2324
import org.elasticsearch.common.util.ByteUtils;
2425
import org.elasticsearch.core.Nullable;
@@ -208,6 +209,12 @@ public BytesReference buildLegacyTsid() throws IOException {
208209
}
209210
}
210211

212+
private static final int MAX_HASH_LEN_BYTES = 2;
213+
214+
static {
215+
assert MAX_HASH_LEN_BYTES == StreamOutput.putVInt(new byte[2], tsidHashLen(MAX_DIMENSIONS), 0);
216+
}
217+
211218
/**
212219
* Here we build the hash of the tsid using a similarity function so that we have a result
213220
* with the following pattern:
@@ -219,11 +226,13 @@ public BytesReference buildLegacyTsid() throws IOException {
219226
* The idea is to be able to place 'similar' time series close to each other. Two time series
220227
* are considered 'similar' if they share the same dimensions (names and values).
221228
*/
222-
public BytesReference buildTsidHash() throws IOException {
229+
public BytesReference buildTsidHash() {
223230
// NOTE: hash all dimension field names
224231
int numberOfDimensions = Math.min(MAX_DIMENSIONS, dimensions.size());
225-
int tsidHashIndex = 0;
226-
byte[] tsidHash = new byte[16 + 16 + 4 * numberOfDimensions];
232+
int len = tsidHashLen(numberOfDimensions);
233+
// either one or two bytes are occupied by the vint since we're bounded by #MAX_DIMENSIONS
234+
byte[] tsidHash = new byte[MAX_HASH_LEN_BYTES + len];
235+
int tsidHashIndex = StreamOutput.putVInt(tsidHash, len, 0);
227236

228237
tsidHasher.reset();
229238
for (final Dimension dimension : dimensions) {
@@ -258,11 +267,11 @@ public BytesReference buildTsidHash() throws IOException {
258267
}
259268
tsidHashIndex = writeHash128(tsidHasher.digestHash(), tsidHash, tsidHashIndex);
260269

261-
assert tsidHashIndex == tsidHash.length;
262-
try (BytesStreamOutput out = new BytesStreamOutput(tsidHash.length)) {
263-
out.writeBytesRef(new BytesRef(tsidHash, 0, tsidHash.length));
264-
return out.bytes();
265-
}
270+
return new BytesArray(tsidHash, 0, tsidHashIndex);
271+
}
272+
273+
private static int tsidHashLen(int numberOfDimensions) {
274+
return 16 + 16 + 4 * numberOfDimensions;
266275
}
267276

268277
private int writeHash128(final MurmurHash3.Hash128 hash128, byte[] buffer, int tsidHashIndex) {

0 commit comments

Comments
 (0)