4747import org .elasticsearch .common .lucene .store .IndexOutputOutputStream ;
4848import org .elasticsearch .common .util .BigArrays ;
4949import org .elasticsearch .common .util .ByteArray ;
50- import org .elasticsearch .common .util .ByteUtils ;
5150import org .elasticsearch .core .IOUtils ;
5251
5352import java .io .Closeable ;
@@ -548,7 +547,7 @@ static int numBytesForBloomFilter(int bloomFilterSize) {
548547 // Uses MurmurHash3-128 to generate a 64-bit hash value, then picks 7 subsets of 31 bits each and returns the values in the
549548 // outputs array. This provides us with 7 reasonably independent hashes of the data for the cost of one MurmurHash3 calculation.
550549 static int [] hashTerm (BytesRef br , int [] outputs ) {
551- final long hash64 = MurmurHash3 .hash64 (br .bytes , br .offset , br .length );
550+ final long hash64 = BloomFilterHashFunctions . MurmurHash3 .hash64 (br .bytes , br .offset , br .length );
552551 final int upperHalf = (int ) (hash64 >> 32 );
553552 final int lowerHalf = (int ) hash64 ;
554553 // Derive 7 hash outputs by combining the two 64-bit halves, adding the upper half multiplied with different small constants
@@ -562,183 +561,4 @@ static int[] hashTerm(BytesRef br, int[] outputs) {
562561 outputs [6 ] = (lowerHalf + 17 * upperHalf ) & 0x7FFF_FFFF ;
563562 return outputs ;
564563 }
565-
566- //
567- // The following Murmur3 implementation is borrowed from commons-codec.
568- //
569-
570- /**
571- * Implementation of the MurmurHash3 128-bit hash functions.
572- *
573- * <p>
574- * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
575- * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
576- * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
577- * </p>
578- *
579- * <p>
580- * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
581- * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
582- * </p>
583- *
584- * <p>
585- * This is public domain code with no copyrights. From home page of
586- * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
587- * </p>
588- *
589- * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
590- * code." </blockquote>
591- *
592- * <p>
593- * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original
594- * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
595- * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
596- * </p>
597- *
598- * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
599- * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++
600- * code</a>
601- * @see <a href=
602- * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
603- * Apache Hive Murmer3</a>
604- * @since 1.13
605- */
606- public static final class MurmurHash3 {
607- /**
608- * A default seed to use for the murmur hash algorithm.
609- * Has the value {@code 104729}.
610- */
611- public static final int DEFAULT_SEED = 104729 ;
612-
613- // Constants for 128-bit variant
614- private static final long C1 = 0x87c37b91114253d5L ;
615- private static final long C2 = 0x4cf5ad432745937fL ;
616- private static final int R1 = 31 ;
617- private static final int R2 = 27 ;
618- private static final int R3 = 33 ;
619- private static final int M = 5 ;
620- private static final int N1 = 0x52dce729 ;
621- private static final int N2 = 0x38495ab5 ;
622-
623- /** No instance methods. */
624- private MurmurHash3 () {}
625-
626- /**
627- * Generates 64-bit hash from the byte array with the given offset, length and seed by discarding the second value of the 128-bit
628- * hash.
629- *
630- * This version uses the default seed.
631- *
632- * @param data The input byte array
633- * @param offset The first element of array
634- * @param length The length of array
635- * @return The sum of the two 64-bit hashes that make up the hash128
636- */
637- @ SuppressWarnings ("fallthrough" )
638- public static long hash64 (final byte [] data , final int offset , final int length ) {
639- long h1 = MurmurHash3 .DEFAULT_SEED ;
640- long h2 = MurmurHash3 .DEFAULT_SEED ;
641- final int nblocks = length >> 4 ;
642-
643- // body
644- for (int i = 0 ; i < nblocks ; i ++) {
645- final int index = offset + (i << 4 );
646- long k1 = ByteUtils .readLongLE (data , index );
647- long k2 = ByteUtils .readLongLE (data , index + 8 );
648-
649- // mix functions for k1
650- k1 *= C1 ;
651- k1 = Long .rotateLeft (k1 , R1 );
652- k1 *= C2 ;
653- h1 ^= k1 ;
654- h1 = Long .rotateLeft (h1 , R2 );
655- h1 += h2 ;
656- h1 = h1 * M + N1 ;
657-
658- // mix functions for k2
659- k2 *= C2 ;
660- k2 = Long .rotateLeft (k2 , R3 );
661- k2 *= C1 ;
662- h2 ^= k2 ;
663- h2 = Long .rotateLeft (h2 , R1 );
664- h2 += h1 ;
665- h2 = h2 * M + N2 ;
666- }
667-
668- // tail
669- long k1 = 0 ;
670- long k2 = 0 ;
671- final int index = offset + (nblocks << 4 );
672- switch (offset + length - index ) {
673- case 15 :
674- k2 ^= ((long ) data [index + 14 ] & 0xff ) << 48 ;
675- case 14 :
676- k2 ^= ((long ) data [index + 13 ] & 0xff ) << 40 ;
677- case 13 :
678- k2 ^= ((long ) data [index + 12 ] & 0xff ) << 32 ;
679- case 12 :
680- k2 ^= ((long ) data [index + 11 ] & 0xff ) << 24 ;
681- case 11 :
682- k2 ^= ((long ) data [index + 10 ] & 0xff ) << 16 ;
683- case 10 :
684- k2 ^= ((long ) data [index + 9 ] & 0xff ) << 8 ;
685- case 9 :
686- k2 ^= data [index + 8 ] & 0xff ;
687- k2 *= C2 ;
688- k2 = Long .rotateLeft (k2 , R3 );
689- k2 *= C1 ;
690- h2 ^= k2 ;
691-
692- case 8 :
693- k1 ^= ((long ) data [index + 7 ] & 0xff ) << 56 ;
694- case 7 :
695- k1 ^= ((long ) data [index + 6 ] & 0xff ) << 48 ;
696- case 6 :
697- k1 ^= ((long ) data [index + 5 ] & 0xff ) << 40 ;
698- case 5 :
699- k1 ^= ((long ) data [index + 4 ] & 0xff ) << 32 ;
700- case 4 :
701- k1 ^= ((long ) data [index + 3 ] & 0xff ) << 24 ;
702- case 3 :
703- k1 ^= ((long ) data [index + 2 ] & 0xff ) << 16 ;
704- case 2 :
705- k1 ^= ((long ) data [index + 1 ] & 0xff ) << 8 ;
706- case 1 :
707- k1 ^= data [index ] & 0xff ;
708- k1 *= C1 ;
709- k1 = Long .rotateLeft (k1 , R1 );
710- k1 *= C2 ;
711- h1 ^= k1 ;
712- }
713-
714- // finalization
715- h1 ^= length ;
716- h2 ^= length ;
717-
718- h1 += h2 ;
719- h2 += h1 ;
720-
721- h1 = fmix64 (h1 );
722- h2 = fmix64 (h2 );
723-
724- h1 += h2 ;
725-
726- return h1 ;
727- }
728-
729- /**
730- * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
731- *
732- * @param hash The current hash
733- * @return The final hash
734- */
735- private static long fmix64 (long hash ) {
736- hash ^= (hash >>> 33 );
737- hash *= 0xff51afd7ed558ccdL ;
738- hash ^= (hash >>> 33 );
739- hash *= 0xc4ceb9fe1a85ec53L ;
740- hash ^= (hash >>> 33 );
741- return hash ;
742- }
743- }
744564}
0 commit comments