diff --git a/server/src/main/java/org/elasticsearch/common/util/IntNHash.java b/server/src/main/java/org/elasticsearch/common/util/IntNHash.java new file mode 100644 index 0000000000000..0e3ebb74adba3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/util/IntNHash.java @@ -0,0 +1,163 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.common.util; + +import com.carrotsearch.hppc.BitMixer; + +import org.elasticsearch.core.Releasables; + +/** + * Specialized hash table implementation maps N int values to ids. + * Collisions are resolved with open addressing and + * linear probing, growth is smooth thanks to {@link BigArrays} and capacity + * is always a multiple of N for faster identification of buckets. + * This class is not thread-safe. + */ +// IDs are internally stored as id + 1 so that 0 encodes for an empty slot +public final class IntNHash extends AbstractHash { + private IntArray keyArray; + private final int keySize; + private final int[] scratch; + + // Constructor with configurable capacity and default maximum load factor. + public IntNHash(long capacity, int keySize, BigArrays bigArrays) { + this(capacity, keySize, DEFAULT_MAX_LOAD_FACTOR, bigArrays); + } + + // Constructor with configurable capacity and load factor. + public IntNHash(long capacity, int keySize, float maxLoadFactor, BigArrays bigArrays) { + super(capacity, maxLoadFactor, bigArrays); + this.keySize = keySize; + this.scratch = new int[keySize]; + try { + // `super` allocates a big array so we have to `close` if we fail here or we'll leak it. + keyArray = bigArrays.newIntArray(keySize * capacity, false); + } finally { + if (keyArray == null) { + close(); + } + } + } + + public int[] getKeys(long id) { + getKeys(id, scratch); + return scratch; + } + + public void getKeys(long id, int[] dst) { + assert dst.length == keySize; + for (int i = 0; i < keySize; i++) { + dst[i] = keyArray.get(keySize * id + i); + } + } + + private boolean keyEquals(long id, int[] keys) { + long keyOffset = keySize * id; + // TODO: fast equals in BigArray + for (int i = 0; i < keys.length; i++) { + if (keyArray.get(keyOffset + i) != keys[i]) { + return false; + } + } + return true; + } + + public long find(int[] keys) { + final long slot = slot(hash(keys), mask); + for (long index = slot;; index = nextSlot(index, mask)) { + final long id = id(index); + if (id == -1) { + return id; + } else if (keyEquals(id, keys)) { + return id; + } + } + } + + private long set(long id, int[] keys) { + assert size < maxSize; + long slot = slot(hash(keys), mask); + for (long index = slot;; index = nextSlot(index, mask)) { + final long curId = id(index); + if (curId == -1) { // means unset + setId(index, id); + append(id, keys); + ++size; + return id; + } else { + if (keyEquals(curId, keys)) { + return -1 - curId; + } + } + } + } + + private void append(long id, int[] keys) { + final long keyOffset = keySize * id; + keyArray = bigArrays.grow(keyArray, keyOffset + keySize); + for (int i = 0; i < keys.length; i++) { + keyArray.set(keyOffset + i, keys[i]); + } + } + + private void reset(long id) { + final long slot = slot(hashFromKeyArray(id), mask); + for (long index = slot;; index = nextSlot(index, mask)) { + final long curId = id(index); + if (curId == -1) { // means unset + setId(index, id); + break; + } + } + } + + /** + * Try to add {@code key}. Return its newly allocated id if it wasn't in + * the hash table yet, or {@code -1-id} if it was already present in + * the hash table. + */ + public long add(int[] keys) { + if (size >= maxSize) { + assert size == maxSize; + grow(); + } + assert size < maxSize; + return set(size, keys); + } + + @Override + protected void removeAndAdd(long index) { + final long id = getAndSetId(index, -1); + assert id >= 0; + reset(id); + } + + @Override + public void close() { + Releasables.close(keyArray, super::close); + } + + static long hash(int[] keys) { + long hash = BitMixer.mix(keys[0]); + for (int i = 1; i < keys.length; i++) { + hash = 31L * hash + BitMixer.mix(keys[i]); + } + return hash; + } + + long hashFromKeyArray(long id) { + final long keyOffset = id * keySize; + long hash = BitMixer.mix(keyArray.get(keyOffset)); + for (int i = 1; i < keySize; i++) { + hash = 31L * hash + BitMixer.mix(keyArray.get(keyOffset + i)); + } + return hash; + } +} diff --git a/server/src/test/java/org/elasticsearch/common/util/IntNHashTests.java b/server/src/test/java/org/elasticsearch/common/util/IntNHashTests.java new file mode 100644 index 0000000000000..ac566e27d71ef --- /dev/null +++ b/server/src/test/java/org/elasticsearch/common/util/IntNHashTests.java @@ -0,0 +1,190 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.common.util; + +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.test.ESTestCase; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import static org.hamcrest.Matchers.equalTo; + +public class IntNHashTests extends ESTestCase { + private BigArrays randombigArrays() { + return new MockBigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService()); + } + + public void test1Key() { + LongHash hash1 = new LongHash(between(1, 100), randombigArrays()); + IntNHash hash2 = new IntNHash(randomIntBetween(0, 100), 1, randombigArrays()); + IntNHash hash3 = new IntNHash(randomIntBetween(0, 100), 2, randombigArrays()); + int padding = randomInt(); + int values = between(10, 1000); + for (int n = 0; n < values; n++) { + int value = randomIntBetween(0, 1000); + long id1 = hash1.add(value); + long id2 = hash2.add(new int[] { value }); + long id3 = hash3.add(new int[] { value, padding }); + assertThat(id1, equalTo(id2)); + assertThat(id1, equalTo(id3)); + } + assertThat(hash1.size(), equalTo(hash2.size)); + assertThat(hash2.size(), equalTo(hash3.size)); + for (long l = 0; l < hash1.size(); l++) { + int v1 = (int) hash1.get(l); + int v2 = hash2.getKeys(l)[0]; + int v3 = hash3.getKeys(l)[0]; + assertThat(v1, equalTo(v2)); + assertThat(v1, equalTo(v3)); + } + Releasables.close(hash1, hash2, hash3); + } + + public void test2Keys() { + LongHash hash1 = new LongHash(between(1, 100), randombigArrays()); + IntNHash hash2 = new IntNHash(randomIntBetween(0, 100), 2, randombigArrays()); + IntNHash hash3 = new IntNHash(randomIntBetween(0, 100), 3, randombigArrays()); + int padding = randomInt(); + int values = between(10, 1000); + for (int n = 0; n < values; n++) { + int first = randomIntBetween(0, 1000); + int second = randomIntBetween(0, 1000); + long v = (((long) first) << 32) | (second & 0xFFFFFFFFL); + long id1 = hash1.add(v); + long id2 = hash2.add(new int[] { first, second }); + long id3 = hash3.add(new int[] { first, second, padding }); + assertThat(id1, equalTo(id2)); + assertThat(id1, equalTo(id3)); + } + assertThat(hash1.size(), equalTo(hash2.size)); + assertThat(hash2.size(), equalTo(hash3.size())); + for (long l = 0; l < hash1.size(); l++) { + long v1 = hash1.get(l); + int first = (int) (v1 >>> 32); + int second = (int) (v1 & 0xFFFFFFFFL); + int[] v2 = hash2.getKeys(l); + assertThat(v2, equalTo(new int[] { first, second })); + int[] v3 = hash3.getKeys(l); + assertThat(v3, equalTo(new int[] { first, second, padding })); + } + Releasables.close(hash1, hash2, hash3); + } + + public void test3Keys() { + Int3Hash hash1 = new Int3Hash(between(1, 100), randombigArrays()); + IntNHash hash2 = new IntNHash(randomIntBetween(0, 100), 3, randombigArrays()); + IntNHash hash3 = new IntNHash(randomIntBetween(0, 100), 4, randombigArrays()); + int values = between(10, 1000); + int padding = randomInt(); + for (int n = 0; n < values; n++) { + int v1 = randomIntBetween(0, 1000); + int v2 = randomIntBetween(0, 1000); + int v3 = randomIntBetween(0, 1000); + long id1 = hash1.add(v1, v2, v3); + long id2 = hash2.add(new int[] { v1, v2, v3 }); + long id3 = hash3.add(new int[] { v1, v2, v3, padding }); + assertThat(id1, equalTo(id2)); + assertThat(id1, equalTo(id3)); + } + assertThat(hash1.size(), equalTo(hash2.size)); + assertThat(hash2.size(), equalTo(hash3.size())); + for (long l = 0; l < hash1.size; l++) { + int v1 = hash1.getKey1(l); + int v2 = hash1.getKey2(l); + int v3 = hash1.getKey3(l); + assertThat(hash2.getKeys(l), equalTo(new int[] { v1, v2, v3 })); + assertThat(hash3.getKeys(l), equalTo(new int[] { v1, v2, v3, padding })); + } + Releasables.close(hash1, hash2, hash3); + } + + public void test4Keys() { + LongLongHash hash1 = new LongLongHash(between(1, 100), randombigArrays()); + IntNHash hash2 = new IntNHash(randomIntBetween(0, 100), 4, randombigArrays()); + IntNHash hash3 = new IntNHash(randomIntBetween(0, 100), 5, randombigArrays()); + int padding = randomInt(); + int values = between(10, 1000); + for (int n = 0; n < values; n++) { + int v1 = randomIntBetween(0, 1000); + int v2 = randomIntBetween(0, 1000); + int v3 = randomIntBetween(0, 1000); + int v4 = randomIntBetween(0, 1000); + long id1 = hash1.add((((long) v1) << 32) | (v2 & 0xFFFFFFFFL), (((long) v3) << 32) | (v4 & 0xFFFFFFFFL)); + long id2 = hash2.add(new int[] { v1, v2, v3, v4 }); + long id3 = hash3.add(new int[] { v1, v2, v3, v4, padding }); + assertThat(id1, equalTo(id2)); + assertThat(id1, equalTo(id3)); + } + assertThat(hash1.size(), equalTo(hash2.size)); + assertThat(hash1.size(), equalTo(hash3.size)); + for (long l = 0; l < hash1.size; l++) { + long k1 = hash1.getKey1(l); + long k2 = hash1.getKey2(l); + int v1 = (int) (k1 >>> 32); + int v2 = (int) (k1 & 0xFFFFFFFFL); + int v3 = (int) (k2 >>> 32); + int v4 = (int) (k2 & 0xFFFFFFFFL); + assertThat(hash2.getKeys(l), equalTo(new int[] { v1, v2, v3, v4 })); + assertThat(hash3.getKeys(l), equalTo(new int[] { v1, v2, v3, v4, padding })); + } + Releasables.close(hash1, hash2, hash3); + } + + public void testLargeKeys() { + record Ints(int[] vs) { + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Ints key = (Ints) o; + return Arrays.equals(vs, key.vs); + } + + @Override + public int hashCode() { + return Arrays.hashCode(vs); + } + } + Map maps = new HashMap<>(); + int keySize = randomIntBetween(1, 50); + int values = between(1, 1000); + try (IntNHash hash = new IntNHash(randomIntBetween(0, 1000), keySize, randombigArrays())) { + for (int i = 0; i < values; i++) { + int[] keys = new int[keySize]; + for (int k = 0; k < keySize; k++) { + keys[k] = randomIntBetween(0, 1000); + } + long hashId = hash.add(keys); + Ints ints = new Ints(keys); + if (hashId < 0) { + hashId = -1 - hashId; + assertThat(maps.get(ints), equalTo(hashId)); + } else { + assertNull(maps.get(ints)); + maps.put(ints, hashId); + } + assertThat((int) hash.size, equalTo(maps.size())); + } + for (long l = 0; l < hash.size; l++) { + int[] key = hash.getKeys(l); + Ints ints = new Ints(key); + assertThat(maps.get(ints), equalTo(l)); + } + for (var e : maps.entrySet()) { + long id = hash.find(e.getKey().vs); + assertThat(id, equalTo(e.getValue())); + } + } + } +}