Skip to content

Commit 8364ab4

Browse files
authored
Add city 32-bit hashing (#3)
1 parent 0d85fc0 commit 8364ab4

15 files changed

+19609
-4
lines changed

Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ DISTNAME = spqrhash-$(EXT_VERSION)
99

1010
# module description
1111
MODULE_big = spqrhash
12-
SRCS = src/spqrhash.c src/murmur3.c
12+
SRCS = src/spqrhash.c src/murmur3.c src/city.c
1313
OBJS = $(SRCS:.c=.o)
1414
EXTENSION = $(MODULE_big)
1515

@@ -20,8 +20,8 @@ REGRESS_OPTS = --inputdir=test
2020

2121
# different vars for extension and plain module
2222

23-
Regress_noext = test_init_noext test_int8_murmur test_string_murmur
24-
Regress_ext = test_init_ext test_int8_murmur test_string_murmur
23+
Regress_noext = test_init_noext test_int8_murmur test_string_murmur test_string_city32 test_string_varlen_city32 test_int8_city32
24+
Regress_ext = test_init_ext test_int8_murmur test_string_murmur test_string_city32 test_string_varlen_city32 test_int8_city32
2525

2626
Data_noext = sql/spqrhash.sql sql/uninstall_spqrhash.sql
2727
Data_ext = sql/spqrhash--1.0.sql sql/spqrhash--unpackaged--1.0.sql

debian/changelog

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
spqrhash (1.1) stable; urgency=medium
2+
3+
* Add city32 hashing
4+
5+
-- Yury Frolov <ein-krebs@yandex-team.ru> Fri, 22 Aug 2025 15:14:38 +0500
6+
17
spqrhash (1.0) stable; urgency=medium
28

39
* Initial release

sql/spqrhash--1.0.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,12 @@ CREATE OR REPLACE FUNCTION spqrhash_murmur3(text) RETURNS int8
77

88
CREATE OR REPLACE FUNCTION spqrhash_murmur3(bytea) RETURNS int8
99
AS '$libdir/spqrhash', 'spqr_hash_murmur3_str' LANGUAGE C IMMUTABLE STRICT;
10+
11+
CREATE OR REPLACE FUNCTION spqrhash_city32(int8) RETURNS int8
12+
AS '$libdir/spqrhash', 'spqr_hash_city32_int64' LANGUAGE C IMMUTABLE STRICT;
13+
14+
CREATE OR REPLACE FUNCTION spqrhash_city32(text) RETURNS int8
15+
AS '$libdir/spqrhash', 'spqr_hash_city32_str' LANGUAGE C IMMUTABLE STRICT;
16+
17+
CREATE OR REPLACE FUNCTION spqrhash_city32(bytea) RETURNS int8
18+
AS '$libdir/spqrhash', 'spqr_hash_city32_str' LANGUAGE C IMMUTABLE STRICT;

sql/spqrhash--unpackaged--1.0.sql

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11

22
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_murmur3(int8);
33
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_murmur3(text);
4-
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_murmur3(bytea);
4+
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_murmur3(bytea);
5+
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_city32(int8);
6+
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_city32(text);
7+
ALTER EXTENSION spqrhash ADD FUNCTION spqrhash_city32(bytea);

sql/spqrhash.sql

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,12 @@ CREATE OR REPLACE FUNCTION spqrhash_murmur3(text) RETURNS int8
77

88
CREATE OR REPLACE FUNCTION spqrhash_murmur3(bytea) RETURNS int8
99
AS '$libdir/spqrhash', 'spqr_hash_murmur3_str' LANGUAGE C IMMUTABLE STRICT;
10+
11+
CREATE OR REPLACE FUNCTION spqrhash_city32(int8) RETURNS int8
12+
AS '$libdir/spqrhash', 'spqr_hash_city32_int64' LANGUAGE C IMMUTABLE STRICT;
13+
14+
CREATE OR REPLACE FUNCTION spqrhash_city32(text) RETURNS int8
15+
AS '$libdir/spqrhash', 'spqr_hash_city32_str' LANGUAGE C IMMUTABLE STRICT;
16+
17+
CREATE OR REPLACE FUNCTION spqrhash_city32(bytea) RETURNS int8
18+
AS '$libdir/spqrhash', 'spqr_hash_city32_str' LANGUAGE C IMMUTABLE STRICT;

sql/uninstall_spqrhash.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,7 @@
22
DROP FUNCTION spqrhash_murmur3(int8);
33
DROP FUNCTION spqrhash_murmur3(text);
44
DROP FUNCTION spqrhash_murmur3(bytea);
5+
DROP FUNCTION spqrhash_city32(int8);
6+
DROP FUNCTION spqrhash_city32(text);
7+
DROP FUNCTION spqrhash_city32(bytea);
58

src/city.c

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
// Copyright (c) 2011 Google, Inc.
2+
//
3+
// Permission is hereby granted, free of charge, to any person obtaining a copy
4+
// of this software and associated documentation files (the "Software"), to deal
5+
// in the Software without restriction, including without limitation the rights
6+
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
// copies of the Software, and to permit persons to whom the Software is
8+
// furnished to do so, subject to the following conditions:
9+
//
10+
// The above copyright notice and this permission notice shall be included in
11+
// all copies or substantial portions of the Software.
12+
//
13+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19+
// THE SOFTWARE.
20+
//
21+
// CityHash, by Geoff Pike and Jyrki Alakuijala
22+
//
23+
// This file provides a few functions for hashing strings. On x86-64
24+
// hardware in 2011, CityHash64() is faster than other high-quality
25+
// hash functions, such as Murmur. This is largely due to higher
26+
// instruction-level parallelism. CityHash64() and CityHash128() also perform
27+
// well on hash-quality tests.
28+
//
29+
// CityHash128() is optimized for relatively long strings and returns
30+
// a 128-bit hash. For strings more than about 2000 bytes it can be
31+
// faster than CityHash64().
32+
//
33+
// Functions in the CityHash family are not suitable for cryptography.
34+
//
35+
// WARNING: This code has not been tested on big-endian platforms!
36+
// It is known to work well on little-endian platforms that have a small penalty
37+
// for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
38+
//
39+
// By the way, for some hash functions, given strings a and b, the hash
40+
// of a+b is easily derived from the hashes of a and b. This property
41+
// doesn't hold for any hash functions in this file.
42+
//
43+
// It's probably possible to create even faster hash functions by
44+
// writing a program that systematically explores some of the space of
45+
// possible hash functions, by using SIMD instructions, or by
46+
// compromising on hash quality.
47+
48+
#include "spqrhash.h"
49+
#include <byteswap.h>
50+
51+
#if defined __GNUC__ && __GNUC__ >= 3
52+
#define HAVE_BUILTIN_EXPECT 1
53+
#endif
54+
55+
struct city_uint128 {
56+
uint64_t first;
57+
uint64_t second;
58+
};
59+
typedef struct city_uint128 city_uint128;
60+
61+
static inline uint64_t Uint128Low64(const city_uint128 x) { return x.first; }
62+
static inline uint64_t Uint128High64(const city_uint128 x) { return x.second; }
63+
64+
#define uint32_in_expected_order(x) le32toh(x)
65+
#define uint64_in_expected_order(x) le64toh(x)
66+
67+
#if !defined(LIKELY)
68+
#if HAVE_BUILTIN_EXPECT
69+
#define LIKELY(x) (__builtin_expect(!!(x), 1))
70+
#else
71+
#define LIKELY(x) (x)
72+
#endif
73+
#endif
74+
75+
static uint64_t UNALIGNED_LOAD64(const char *p)
76+
{
77+
uint64_t result;
78+
memcpy(&result, p, sizeof(result));
79+
return result;
80+
}
81+
82+
static uint32_t UNALIGNED_LOAD32(const char *p)
83+
{
84+
uint32_t result;
85+
memcpy(&result, p, sizeof(result));
86+
return result;
87+
}
88+
89+
static inline uint64_t Fetch64(const char *p)
90+
{
91+
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
92+
}
93+
94+
static inline uint32_t Fetch32(const char *p)
95+
{
96+
return uint32_in_expected_order(UNALIGNED_LOAD32(p));
97+
}
98+
99+
// Magic numbers for 32-bit hashing. Copied from Murmur3.
100+
static const uint32_t c1 = 0xcc9e2d51;
101+
static const uint32_t c2 = 0x1b873593;
102+
103+
104+
// Some primes between 2^63 and 2^64 for various uses.
105+
#define k0 0xc3a5c85c97cb3127ULL
106+
#define k1 0xb492b66fbe98f273ULL
107+
#define k2 0x9ae16a3b2f90404fULL
108+
#define k3 0xc949d7c7509e6557ULL
109+
110+
static inline uint32_t Rotate32(uint32_t val, int shift) {
111+
// Avoid shifting by 32: doing so yields an undefined result.
112+
return shift == 0 ? val : ((val >> shift) | (val << (32 - shift)));
113+
}
114+
115+
// A 32-bit to 32-bit integer hash copied from Murmur3.
116+
static inline uint32_t fmix(uint32_t h)
117+
{
118+
h ^= h >> 16;
119+
h *= 0x85ebca6b;
120+
h ^= h >> 13;
121+
h *= 0xc2b2ae35;
122+
h ^= h >> 16;
123+
return h;
124+
}
125+
126+
static uint32_t Mur(uint32_t a, uint32_t h) {
127+
// Helper from Murmur3 for combining two 32-bit values.
128+
a *= c1;
129+
a = Rotate32(a, 17);
130+
a *= c2;
131+
h ^= a;
132+
h = Rotate32(h, 19);
133+
return h * 5 + 0xe6546b64;
134+
}
135+
136+
static inline uint32_t Hash32Len0to4(const char *s, size_t len) {
137+
uint32_t b = 0;
138+
uint32_t c = 9;
139+
for (size_t i = 0; i < len; i++) {
140+
signed char v = (signed char)(s[i]);
141+
b = b * c1 + (uint32_t)(v);
142+
c ^= b;
143+
}
144+
return fmix(Mur(b, Mur((uint32_t)(len), c)));
145+
}
146+
147+
static inline uint32_t Hash32Len5to12(const char *s, size_t len) {
148+
uint32_t a = (uint32_t)(len), b = a * 5, c = 9, d = b;
149+
a += Fetch32(s);
150+
b += Fetch32(s + len - 4);
151+
c += Fetch32(s + ((len >> 1) & 4));
152+
return fmix(Mur(c, Mur(b, Mur(a, d))));
153+
}
154+
155+
static inline uint32_t Hash32Len13to24(const char *s, size_t len) {
156+
uint32_t a = Fetch32(s - 4 + (len >> 1));
157+
uint32_t b = Fetch32(s + 4);
158+
uint32_t c = Fetch32(s + len - 8);
159+
uint32_t d = Fetch32(s + (len >> 1));
160+
uint32_t e = Fetch32(s);
161+
uint32_t f = Fetch32(s + len - 4);
162+
uint32_t h = (uint32_t)(len);
163+
164+
return fmix(Mur(f, Mur(e, Mur(d, Mur(c, Mur(b, Mur(a, h)))))));
165+
}
166+
167+
static uint32_t CityHash32(const char *s, size_t len) {
168+
if (len <= 24) {
169+
return len <= 12 ?
170+
(len <= 4 ? Hash32Len0to4(s, len) : Hash32Len5to12(s, len)) :
171+
Hash32Len13to24(s, len);
172+
}
173+
174+
// len > 24
175+
uint32_t h = (uint32_t)(len), g = c1 * h, f = g, tmp = 0;
176+
uint32_t a0 = Rotate32(Fetch32(s + len - 4) * c1, 17) * c2;
177+
uint32_t a1 = Rotate32(Fetch32(s + len - 8) * c1, 17) * c2;
178+
uint32_t a2 = Rotate32(Fetch32(s + len - 16) * c1, 17) * c2;
179+
uint32_t a3 = Rotate32(Fetch32(s + len - 12) * c1, 17) * c2;
180+
uint32_t a4 = Rotate32(Fetch32(s + len - 20) * c1, 17) * c2;
181+
h ^= a0;
182+
h = Rotate32(h, 19);
183+
h = h * 5 + 0xe6546b64;
184+
h ^= a2;
185+
h = Rotate32(h, 19);
186+
h = h * 5 + 0xe6546b64;
187+
g ^= a1;
188+
g = Rotate32(g, 19);
189+
g = g * 5 + 0xe6546b64;
190+
g ^= a3;
191+
g = Rotate32(g, 19);
192+
g = g * 5 + 0xe6546b64;
193+
f += a4;
194+
f = Rotate32(f, 19);
195+
f = f * 5 + 0xe6546b64;
196+
size_t iters = (len - 1) / 20;
197+
do {
198+
uint32_t a0 = Rotate32(Fetch32(s) * c1, 17) * c2;
199+
uint32_t a1 = Fetch32(s + 4);
200+
uint32_t a2 = Rotate32(Fetch32(s + 8) * c1, 17) * c2;
201+
uint32_t a3 = Rotate32(Fetch32(s + 12) * c1, 17) * c2;
202+
uint32_t a4 = Fetch32(s + 16);
203+
h ^= a0;
204+
h = Rotate32(h, 18);
205+
h = h * 5 + 0xe6546b64;
206+
f += a1;
207+
f = Rotate32(f, 19);
208+
f = f * c1;
209+
g += a2;
210+
g = Rotate32(g, 18);
211+
g = g * 5 + 0xe6546b64;
212+
h ^= a3 + a1;
213+
h = Rotate32(h, 19);
214+
h = h * 5 + 0xe6546b64;
215+
g ^= a4;
216+
g = bswap_32(g) * 5;
217+
h += a4 * 5;
218+
h = bswap_32(h);
219+
f += a0;
220+
tmp = f;
221+
f = g;
222+
g = h;
223+
h = tmp;
224+
s += 20;
225+
} while (--iters != 0);
226+
g = Rotate32(g, 11) * c1;
227+
g = Rotate32(g, 17) * c1;
228+
f = Rotate32(f, 11) * c1;
229+
f = Rotate32(f, 17) * c1;
230+
h = Rotate32(h + g, 19);
231+
h = h * 5 + 0xe6546b64;
232+
h = Rotate32(h, 17) * c1;
233+
h = Rotate32(h + f, 19);
234+
h = h * 5 + 0xe6546b64;
235+
h = Rotate32(h, 17) * c1;
236+
return h;
237+
}
238+
239+
// Hash 128 input bits down to 64 bits of output.
240+
// This is intended to be a reasonably good hash function.
241+
static inline uint64_t Hash128to64(uint64_t u, uint64_t v)
242+
{
243+
uint64_t a, b, kMul;
244+
// Murmur-inspired hashing.
245+
kMul = 0x9ddfea08eb382d69ULL;
246+
a = (u ^ v) * kMul;
247+
a ^= (a >> 47);
248+
b = (v ^ a) * kMul;
249+
b ^= (b >> 47);
250+
b *= kMul;
251+
return b;
252+
}
253+
254+
/*
255+
* pghashlib API
256+
*/
257+
258+
void hlib_city32_str(const void *s, size_t len, uint64_t *io) {
259+
io[0] = CityHash32(s, len);
260+
}
261+
262+
/* Copied from go encoding/binary PutUVarInt func */
263+
static int put_uvarint(uint8_t *buf, uint64_t n) {
264+
int i = 0;
265+
while (n >= 0x80) {
266+
buf[i] = (uint8_t)(n) | 0x80;
267+
n >>= 7;
268+
i++;
269+
}
270+
buf[i] = (uint8_t)(n);
271+
return i+1;
272+
}
273+
274+
uint64_t hlib_city32_int64(uint64_t input_data)
275+
{
276+
const int ENCODING_BYTES_BIG = 10;
277+
const int ENCODING_BYTES = 8;
278+
const uint64_t BOUND = 1UL << 56;
279+
size_t len;
280+
uint8_t *key;
281+
int sz = ENCODING_BYTES;
282+
if (input_data >= BOUND) {
283+
sz = ENCODING_BYTES_BIG;
284+
}
285+
key = alloca(sz * sizeof *key);
286+
len = sz;
287+
memset(key, 0, len);
288+
put_uvarint(key, input_data);
289+
return (uint64_t)(CityHash32(key, len));
290+
}
291+

0 commit comments

Comments
 (0)