Skip to content

Commit f52207a

Browse files
garimasi514gitster
authored andcommitted
bloom.c: add the murmur3 hash implementation
In preparation for computing changed paths Bloom filters, implement the Murmur3 hash algorithm as described in [1]. It hashes the given data using the given seed and produces a uniformly distributed hash value. [1] https://en.wikipedia.org/wiki/MurmurHash#Algorithm Helped-by: Derrick Stolee <[email protected]> Helped-by: Szeder Gábor <[email protected]> Reviewed-by: Jakub Narębski <[email protected]> Signed-off-by: Garima Singh <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 3be7efc commit f52207a

File tree

7 files changed

+133
-0
lines changed

7 files changed

+133
-0
lines changed

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -695,6 +695,7 @@ X =
695695
PROGRAMS += $(patsubst %.o,git-%$X,$(PROGRAM_OBJS))
696696

697697
TEST_BUILTINS_OBJS += test-advise.o
698+
TEST_BUILTINS_OBJS += test-bloom.o
698699
TEST_BUILTINS_OBJS += test-chmtime.o
699700
TEST_BUILTINS_OBJS += test-config.o
700701
TEST_BUILTINS_OBJS += test-ctype.o
@@ -840,6 +841,7 @@ LIB_OBJS += base85.o
840841
LIB_OBJS += bisect.o
841842
LIB_OBJS += blame.o
842843
LIB_OBJS += blob.o
844+
LIB_OBJS += bloom.o
843845
LIB_OBJS += branch.o
844846
LIB_OBJS += bulk-checkin.o
845847
LIB_OBJS += bundle.o

bloom.c

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include "git-compat-util.h"
2+
#include "bloom.h"
3+
4+
static uint32_t rotate_left(uint32_t value, int32_t count)
5+
{
6+
uint32_t mask = 8 * sizeof(uint32_t) - 1;
7+
count &= mask;
8+
return ((value << count) | (value >> ((-count) & mask)));
9+
}
10+
11+
/*
12+
* Calculate the murmur3 32-bit hash value for the given data
13+
* using the given seed.
14+
* Produces a uniformly distributed hash value.
15+
* Not considered to be cryptographically secure.
16+
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
17+
*/
18+
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
19+
{
20+
const uint32_t c1 = 0xcc9e2d51;
21+
const uint32_t c2 = 0x1b873593;
22+
const uint32_t r1 = 15;
23+
const uint32_t r2 = 13;
24+
const uint32_t m = 5;
25+
const uint32_t n = 0xe6546b64;
26+
int i;
27+
uint32_t k1 = 0;
28+
const char *tail;
29+
30+
int len4 = len / sizeof(uint32_t);
31+
32+
uint32_t k;
33+
for (i = 0; i < len4; i++) {
34+
uint32_t byte1 = (uint32_t)data[4*i];
35+
uint32_t byte2 = ((uint32_t)data[4*i + 1]) << 8;
36+
uint32_t byte3 = ((uint32_t)data[4*i + 2]) << 16;
37+
uint32_t byte4 = ((uint32_t)data[4*i + 3]) << 24;
38+
k = byte1 | byte2 | byte3 | byte4;
39+
k *= c1;
40+
k = rotate_left(k, r1);
41+
k *= c2;
42+
43+
seed ^= k;
44+
seed = rotate_left(seed, r2) * m + n;
45+
}
46+
47+
tail = (data + len4 * sizeof(uint32_t));
48+
49+
switch (len & (sizeof(uint32_t) - 1)) {
50+
case 3:
51+
k1 ^= ((uint32_t)tail[2]) << 16;
52+
/*-fallthrough*/
53+
case 2:
54+
k1 ^= ((uint32_t)tail[1]) << 8;
55+
/*-fallthrough*/
56+
case 1:
57+
k1 ^= ((uint32_t)tail[0]) << 0;
58+
k1 *= c1;
59+
k1 = rotate_left(k1, r1);
60+
k1 *= c2;
61+
seed ^= k1;
62+
break;
63+
}
64+
65+
seed ^= (uint32_t)len;
66+
seed ^= (seed >> 16);
67+
seed *= 0x85ebca6b;
68+
seed ^= (seed >> 13);
69+
seed *= 0xc2b2ae35;
70+
seed ^= (seed >> 16);
71+
72+
return seed;
73+
}

bloom.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#ifndef BLOOM_H
2+
#define BLOOM_H
3+
4+
/*
5+
* Calculate the murmur3 32-bit hash value for the given data
6+
* using the given seed.
7+
* Produces a uniformly distributed hash value.
8+
* Not considered to be cryptographically secure.
9+
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
10+
*/
11+
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
12+
13+
#endif

t/helper/test-bloom.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#include "git-compat-util.h"
2+
#include "bloom.h"
3+
#include "test-tool.h"
4+
5+
int cmd__bloom(int argc, const char **argv)
6+
{
7+
if (!strcmp(argv[1], "get_murmur3")) {
8+
uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
9+
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
10+
}
11+
12+
return 0;
13+
}

t/helper/test-tool.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ struct test_cmd {
1515

1616
static struct test_cmd cmds[] = {
1717
{ "advise", cmd__advise_if_enabled },
18+
{ "bloom", cmd__bloom },
1819
{ "chmtime", cmd__chmtime },
1920
{ "config", cmd__config },
2021
{ "ctype", cmd__ctype },

t/helper/test-tool.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include "git-compat-util.h"
66

77
int cmd__advise_if_enabled(int argc, const char **argv);
8+
int cmd__bloom(int argc, const char **argv);
89
int cmd__chmtime(int argc, const char **argv);
910
int cmd__config(int argc, const char **argv);
1011
int cmd__ctype(int argc, const char **argv);

t/t0095-bloom.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/sh
2+
3+
test_description='Testing the various Bloom filter computations in bloom.c'
4+
. ./test-lib.sh
5+
6+
test_expect_success 'compute unseeded murmur3 hash for empty string' '
7+
cat >expect <<-\EOF &&
8+
Murmur3 Hash with seed=0:0x00000000
9+
EOF
10+
test-tool bloom get_murmur3 "" >actual &&
11+
test_cmp expect actual
12+
'
13+
14+
test_expect_success 'compute unseeded murmur3 hash for test string 1' '
15+
cat >expect <<-\EOF &&
16+
Murmur3 Hash with seed=0:0x627b0c2c
17+
EOF
18+
test-tool bloom get_murmur3 "Hello world!" >actual &&
19+
test_cmp expect actual
20+
'
21+
22+
test_expect_success 'compute unseeded murmur3 hash for test string 2' '
23+
cat >expect <<-\EOF &&
24+
Murmur3 Hash with seed=0:0x2e4ff723
25+
EOF
26+
test-tool bloom get_murmur3 "The quick brown fox jumps over the lazy dog" >actual &&
27+
test_cmp expect actual
28+
'
29+
30+
test_done

0 commit comments

Comments
 (0)