elastic · felixbarny · Aug 22, 2025 · Aug 20, 2025 · Aug 20, 2025 · Aug 22, 2025
@@ -0,0 +1,107 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.common.hash;
+
+import org.apache.lucene.util.UnicodeUtil;
+import org.elasticsearch.common.util.ByteUtils;
+
+/**
+ * A buffered Murmur3 hasher that allows adding strings and longs efficiently.
+ * It uses a byte array buffer to reduce allocations for converting strings and longs to bytes before passing them to the hasher.
+ */
+public class BufferedMurmur3Hasher extends Murmur3Hasher {
+
+    public static final int DEFAULT_BUFFER_SIZE = 32 * 4; // 32 characters, each character may take up to 4 bytes in UTF-8
+    /**
+     * The buffer used for holding the UTF-8 encoded strings before passing them to the hasher.
+     * Should be sized so that it can hold the longest UTF-8 encoded string that is expected to be hashed,
+     * to avoid re-sizing the buffer.
+     * But should also be small enough to not waste memory in case the keys are short.
+     */
+    private byte[] buffer;
+
+    public BufferedMurmur3Hasher(long seed) {
+        this(seed, DEFAULT_BUFFER_SIZE);
+    }
+
+    /**
+     * Constructs a BufferedMurmur3Hasher with a specified seed and buffer size.
+     *
+     * @param seed        the seed for the Murmur3 hash function
+     * @param bufferSize  the size of the buffer in bytes, must be at least 32
+     */
+    public BufferedMurmur3Hasher(long seed, int bufferSize) {
+        super(seed);
+        if (bufferSize < 32) {
+            throw new IllegalArgumentException("Buffer size must be at least 32 bytes");
+        }
+        this.buffer = new byte[bufferSize];
+    }
+
+    /**
+     * Adds a string to the hasher.
+     * The string is converted to UTF-8 and written into the buffer.
+     * The buffer is resized if necessary to accommodate the UTF-8 encoded string.
+     *
+     * @param value the string value to add
+     */
+    public void addString(String value) {
+        ensureCapacity(UnicodeUtil.maxUTF8Length(value.length()));
+        int length = UnicodeUtil.UTF16toUTF8(value, 0, value.length(), buffer);
+        update(buffer, 0, length);
+    }
+
+    /**
+     * Adds a long value to the hasher.
+     * The long is written in little-endian format.
+     *
+     * @param value the long value to add
+     */
+    public void addLong(long value) {
+        ByteUtils.writeLongLE(value, buffer, 0);
+        update(buffer, 0, 8);
+    }
+
+    /**
+     * Adds two long values to the hasher.
+     * Each long is written in little-endian format.
+     *
+     * @param v1 the first long value to add
+     * @param v2 the second long value to add
+     */
+    public void addLongs(long v1, long v2) {
+        ByteUtils.writeLongLE(v1, buffer, 0);
+        ByteUtils.writeLongLE(v2, buffer, 8);
+        update(buffer, 0, 16);
+    }
+
+    /**
+     * Adds four long values to the hasher.
+     * Each long is written in little-endian format.
+     *
+     * @param v1 the first long value to add
+     * @param v2 the second long value to add
+     * @param v3 the third long value to add
+     * @param v4 the fourth long value to add
+     */
+    public void addLongs(long v1, long v2, long v3, long v4) {
+        ByteUtils.writeLongLE(v1, buffer, 0);
+        ByteUtils.writeLongLE(v2, buffer, 8);
+        ByteUtils.writeLongLE(v3, buffer, 16);
+        ByteUtils.writeLongLE(v4, buffer, 24);
+        update(buffer, 0, 32);
+    }
+
+    private void ensureCapacity(int requiredBufferLength) {
+        if (buffer.length < requiredBufferLength) {
+            buffer = new byte[requiredBufferLength];
+        }
+    }
+}
@@ -12,7 +12,6 @@
 import org.elasticsearch.common.util.ByteUtils;
 
 import java.math.BigInteger;
-import java.util.Objects;
 
 /**
  * MurmurHash3 hashing functions.
@@ -29,6 +28,13 @@ public static class Hash128 {
         /** higher 64 bits part **/
         public long h2;
 
+        public Hash128() {}
+
+        public Hash128(long h1, long h2) {
+            this.h1 = h1;
+            this.h2 = h2;
+        }
+
         public byte[] getBytes() {
             byte[] hash = new byte[16];
             getBytes(hash, 0);
@@ -49,12 +55,12 @@ public boolean equals(Object other) {
                 return false;
             }
             Hash128 that = (Hash128) other;
-            return Objects.equals(this.h1, that.h1) && Objects.equals(this.h2, that.h2);
+            return this.h1 == that.h1 && this.h2 == that.h2;
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(h1, h2);
+            return (int) (h1 ^ h2);
         }
 
         @Override

diff --git a/server/src/test/java/org/elasticsearch/common/hash/BufferedMurmur3HasherTests.java b/server/src/test/java/org/elasticsearch/common/hash/BufferedMurmur3HasherTests.java
@@ -0,0 +1,77 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the "Elastic License
+ * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
+ * Public License v 1"; you may not use this file except in compliance with, at
+ * your election, the "Elastic License 2.0", the "GNU Affero General Public
+ * License v3.0 only", or the "Server Side Public License, v 1".
+ */
+
+package org.elasticsearch.common.hash;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.util.ByteUtils;
+import org.elasticsearch.test.ESTestCase;
+
+public class BufferedMurmur3HasherTests extends ESTestCase {
+
+    private final BufferedMurmur3Hasher bufferedHasher = new BufferedMurmur3Hasher(0);
+    private final Murmur3Hasher hasher = new Murmur3Hasher(0);
+
+    public void testAddString() {
+        String testString = randomUnicodeOfLengthBetween(10, 100);
+        bufferedHasher.addString(testString);
+
+        BytesRef bytesRef = new BytesRef(testString);
+        hasher.update(bytesRef.bytes, bytesRef.offset, bytesRef.length);
+        assertEquals(hasher.digestHash(), bufferedHasher.digestHash());
+    }
+
+    public void testConstructorWithInvalidBufferSize() {
+        IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> new BufferedMurmur3Hasher(0, 31));
+        assertEquals("Buffer size must be at least 32 bytes", exception.getMessage());
+    }
+
+    public void testAddLong() {
+        long value = randomLong();
+        bufferedHasher.addLong(value);
+
+        hasher.update(toBytes(value), 0, Long.BYTES);
+
+        assertEquals(hasher.digestHash(), bufferedHasher.digestHash());
+    }
+
+    public void testAddTwoLongs() {
+        long value1 = randomLong();
+        long value2 = randomLong();
+
+        bufferedHasher.addLongs(value1, value2);
+
+        hasher.update(toBytes(value1), 0, Long.BYTES);
+        hasher.update(toBytes(value2), 0, Long.BYTES);
+
+        assertEquals(hasher.digestHash(), bufferedHasher.digestHash());
+    }
+
+    public void testAddFourLongs() {
+        long value1 = randomLong();
+        long value2 = randomLong();
+        long value3 = randomLong();
+        long value4 = randomLong();
+
+        bufferedHasher.addLongs(value1, value2, value3, value4);
+
+        hasher.update(toBytes(value1), 0, Long.BYTES);
+        hasher.update(toBytes(value2), 0, Long.BYTES);
+        hasher.update(toBytes(value3), 0, Long.BYTES);
+        hasher.update(toBytes(value4), 0, Long.BYTES);
+
+        assertEquals(hasher.digestHash(), bufferedHasher.digestHash());
+    }
+
+    private byte[] toBytes(long value) {
+        byte[] bytes = new byte[Long.BYTES];
+        ByteUtils.writeLongLE(value, bytes, 0);
+        return bytes;
+    }
+}