Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/137331.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 137331
summary: Add ES93BloomFilterStoredFieldsFormat for efficient field existence checks
area: TSDB
type: enhancement
issues: []
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

package org.elasticsearch.index.codec.bloomfilter;

import org.elasticsearch.common.util.ByteUtils;

public class BloomFilterHashFunctions {
private BloomFilterHashFunctions() {}

//
// The following Murmur3 implementation is borrowed from commons-codec.
//
/**
* Implementation of the MurmurHash3 128-bit hash functions.
*
* <p>
* MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
* operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
* specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
* </p>
*
* <p>
* This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
* {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
* </p>
*
* <p>
* This is public domain code with no copyrights. From home page of
* <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
* </p>
*
* <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
* code." </blockquote>
*
* <p>
* Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
* MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
* 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
* </p>
*
* @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
* @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
* code</a>
* @see <a href=
* "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
* Apache Hive Murmer3</a>
* @since 1.13
*/
public static final class MurmurHash3 {
/**
* A default seed to use for the murmur hash algorithm.
* Has the value {@code 104729}.
*/
public static final int DEFAULT_SEED = 104729;

// Constants for 128-bit variant
private static final long C1 = 0x87c37b91114253d5L;
private static final long C2 = 0x4cf5ad432745937fL;
private static final int R1 = 31;
private static final int R2 = 27;
private static final int R3 = 33;
private static final int M = 5;
private static final int N1 = 0x52dce729;
private static final int N2 = 0x38495ab5;

/** No instance methods. */
private MurmurHash3() {}

/**
* Generates 64-bit hash from the byte array with the given offset, length and seed by discarding the second value of the 128-bit
* hash.
*
* This version uses the default seed.
*
* @param data The input byte array
* @param offset The first element of array
* @param length The length of array
* @return The sum of the two 64-bit hashes that make up the hash128
*/
@SuppressWarnings("fallthrough")
public static long hash64(final byte[] data, final int offset, final int length) {
long h1 = MurmurHash3.DEFAULT_SEED;
long h2 = MurmurHash3.DEFAULT_SEED;
final int nblocks = length >> 4;

// body
for (int i = 0; i < nblocks; i++) {
final int index = offset + (i << 4);
long k1 = ByteUtils.readLongLE(data, index);
long k2 = ByteUtils.readLongLE(data, index + 8);

// mix functions for k1
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
h1 ^= k1;
h1 = Long.rotateLeft(h1, R2);
h1 += h2;
h1 = h1 * M + N1;

// mix functions for k2
k2 *= C2;
k2 = Long.rotateLeft(k2, R3);
k2 *= C1;
h2 ^= k2;
h2 = Long.rotateLeft(h2, R1);
h2 += h1;
h2 = h2 * M + N2;
}

// tail
long k1 = 0;
long k2 = 0;
final int index = offset + (nblocks << 4);
switch (offset + length - index) {
case 15:
k2 ^= ((long) data[index + 14] & 0xff) << 48;
case 14:
k2 ^= ((long) data[index + 13] & 0xff) << 40;
case 13:
k2 ^= ((long) data[index + 12] & 0xff) << 32;
case 12:
k2 ^= ((long) data[index + 11] & 0xff) << 24;
case 11:
k2 ^= ((long) data[index + 10] & 0xff) << 16;
case 10:
k2 ^= ((long) data[index + 9] & 0xff) << 8;
case 9:
k2 ^= data[index + 8] & 0xff;
k2 *= C2;
k2 = Long.rotateLeft(k2, R3);
k2 *= C1;
h2 ^= k2;

case 8:
k1 ^= ((long) data[index + 7] & 0xff) << 56;
case 7:
k1 ^= ((long) data[index + 6] & 0xff) << 48;
case 6:
k1 ^= ((long) data[index + 5] & 0xff) << 40;
case 5:
k1 ^= ((long) data[index + 4] & 0xff) << 32;
case 4:
k1 ^= ((long) data[index + 3] & 0xff) << 24;
case 3:
k1 ^= ((long) data[index + 2] & 0xff) << 16;
case 2:
k1 ^= ((long) data[index + 1] & 0xff) << 8;
case 1:
k1 ^= data[index] & 0xff;
k1 *= C1;
k1 = Long.rotateLeft(k1, R1);
k1 *= C2;
h1 ^= k1;
}

// finalization
h1 ^= length;
h2 ^= length;

h1 += h2;
h2 += h1;

h1 = fmix64(h1);
h2 = fmix64(h2);

h1 += h2;

return h1;
}

/**
* Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
*
* @param hash The current hash
* @return The final hash
*/
private static long fmix64(long hash) {
hash ^= (hash >>> 33);
hash *= 0xff51afd7ed558ccdL;
hash ^= (hash >>> 33);
hash *= 0xc4ceb9fe1a85ec53L;
hash ^= (hash >>> 33);
return hash;
}
}
}
Loading