Skip to content

Commit 9df3dfd

Browse files
committed
Switched to use XXHash64 instead of FNV-1a in ggml-rpc.cpp
1 parent f8f071f commit 9df3dfd

File tree

1 file changed

+85
-9
lines changed

1 file changed

+85
-9
lines changed

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 85 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -248,15 +248,91 @@ struct ggml_backend_rpc_buffer_context {
248248

249249
// RPC helper functions
250250

251-
// Computes FNV-1a hash of the data
252-
static uint64_t fnv_hash(const uint8_t * data, size_t len) {
253-
const uint64_t fnv_prime = 0x100000001b3ULL;
254-
uint64_t hash = 0xcbf29ce484222325ULL;
251+
// Computes xxHash (64 bit) of data, using 64-bit arithmetic.
252+
// see: https://create.stephan-brumme.com/xxhash/ for more details
253+
// see: https://github.com/Cyan4973/xxHash for benchmark comparisons
254+
static uint64_t generate_hash(const uint8_t* data, size_t len) {
255255

256-
for (size_t i = 0; i < len; ++i) {
257-
hash ^= data[i];
258-
hash *= fnv_prime;
256+
const uint64_t seed = 0ULL;
257+
258+
const uint64_t prime1 = 11400714785074694791ULL;
259+
const uint64_t prime2 = 14029467366897019727ULL;
260+
const uint64_t prime3 = 1609587929392839161ULL;
261+
const uint64_t prime4 = 9650029242287828579ULL;
262+
const uint64_t prime5 = 2870177450012600261ULL;
263+
264+
const uint8_t* p = data;
265+
const uint8_t* end = data + len;
266+
267+
uint64_t hash;
268+
269+
if (len >= 32) {
270+
uint64_t state[4] = {
271+
seed + prime1 + prime2,
272+
seed + prime2,
273+
seed,
274+
seed - prime1
275+
};
276+
277+
// process full 32-byte stripe(s): update state[0..3] with 4x64-bit words
278+
while (static_cast<size_t>(end - p) >= 32) {
279+
const uint64_t* block = reinterpret_cast<const uint64_t*>(p);
280+
for (int i = 0; i < 4; ++i) {
281+
uint64_t v = state[i] + block[i] * prime2;
282+
state[i] = ((v << 31) | (v >> (64 - 31))) * prime1;
283+
}
284+
p += 32;
285+
}
286+
287+
// fold 4x64-bit into 1x64-bit
288+
hash = ((state[0] << 1) | (state[0] >> (64 - 1)));
289+
hash += ((state[1] << 7) | (state[1] >> (64 - 7)));
290+
hash += ((state[2] << 12) | (state[2] >> (64 - 12)));
291+
hash += ((state[3] << 18) | (state[3] >> (64 - 18)));
292+
293+
// mix in state lanes
294+
for (int i = 0; i < 4; ++i) {
295+
uint64_t v = state[i] * prime2;
296+
hash ^= ((v << 31) | (v >> (64 - 31))) * prime1;
297+
hash = hash * prime1 + prime4;
298+
}
299+
} else {
300+
hash = seed + prime5;
301+
}
302+
303+
hash += static_cast<uint64_t>(len);
304+
305+
// process any remaining 8-byte chunk(s)
306+
while (static_cast<size_t>(end - p) >= 8) {
307+
const uint64_t chunk = *reinterpret_cast<const uint64_t*>(p);
308+
uint64_t v = chunk * prime2;
309+
hash ^= ((v << 31) | (v >> (64 - 31))) * prime1;
310+
hash = ((hash << 27) | (hash >> (64 - 27))) * prime1 + prime4;
311+
p += 8;
312+
}
313+
314+
// process any remaining 4-byte chunk
315+
if (static_cast<size_t>(end - p) >= 4) {
316+
const uint32_t chunk = *reinterpret_cast<const uint32_t*>(p);
317+
hash ^= chunk * prime1;
318+
hash = ((hash << 23) | (hash >> (64 - 23))) * prime2 + prime3;
319+
p += 4;
320+
}
321+
322+
// process any remaining byte(s)
323+
while (p < end) {
324+
const uint8_t chunk = *p++;
325+
hash ^= chunk * prime5;
326+
hash = ((hash << 11) | (hash >> (64 - 11))) * prime1;
259327
}
328+
329+
// final avalanche
330+
hash ^= hash >> 33;
331+
hash *= prime2;
332+
hash ^= hash >> 29;
333+
hash *= prime3;
334+
hash ^= hash >> 32;
335+
260336
return hash;
261337
}
262338

@@ -602,7 +678,7 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
602678
rpc_msg_set_tensor_hash_req request;
603679
request.tensor = rpc_tensor;
604680
request.offset = offset;
605-
request.hash = fnv_hash((const uint8_t*)data, size);
681+
request.hash = generate_hash((const uint8_t*)data, size);
606682
rpc_msg_set_tensor_hash_rsp response;
607683
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
608684
RPC_STATUS_ASSERT(status);
@@ -1159,7 +1235,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11591235

11601236
const void * data = input.data() + sizeof(rpc_tensor) + sizeof(offset);
11611237
if (cache_dir && size > HASH_THRESHOLD) {
1162-
uint64_t hash = fnv_hash((const uint8_t*)data, size);
1238+
uint64_t hash = generate_hash((const uint8_t*)data, size);
11631239
char hash_str[17];
11641240
snprintf(hash_str, sizeof(hash_str), "%016" PRIx64, hash);
11651241
// save to cache_dir/hash_str

0 commit comments

Comments
 (0)