@@ -248,15 +248,91 @@ struct ggml_backend_rpc_buffer_context {
248248
249249// RPC helper functions
250250
251- // Computes FNV-1a hash of the data
252- static uint64_t fnv_hash ( const uint8_t * data, size_t len) {
253- const uint64_t fnv_prime = 0x100000001b3ULL ;
254- uint64_t hash = 0xcbf29ce484222325ULL ;
251+ // Computes xxHash (64 bit) of data, using 64-bit arithmetic.
252+ // see: https://create.stephan-brumme.com/xxhash/ for more details
253+ // see: https://github.com/Cyan4973/xxHash for benchmark comparisons
254+ static uint64_t generate_hash ( const uint8_t * data, size_t len) {
255255
256- for (size_t i = 0 ; i < len; ++i) {
257- hash ^= data[i];
258- hash *= fnv_prime;
256+ const uint64_t seed = 0ULL ;
257+
258+ const uint64_t prime1 = 11400714785074694791ULL ;
259+ const uint64_t prime2 = 14029467366897019727ULL ;
260+ const uint64_t prime3 = 1609587929392839161ULL ;
261+ const uint64_t prime4 = 9650029242287828579ULL ;
262+ const uint64_t prime5 = 2870177450012600261ULL ;
263+
264+ const uint8_t * p = data;
265+ const uint8_t * end = data + len;
266+
267+ uint64_t hash;
268+
269+ if (len >= 32 ) {
270+ uint64_t state[4 ] = {
271+ seed + prime1 + prime2,
272+ seed + prime2,
273+ seed,
274+ seed - prime1
275+ };
276+
277+ // process full 32-byte stripe(s): update state[0..3] with 4x64-bit words
278+ while (static_cast <size_t >(end - p) >= 32 ) {
279+ const uint64_t * block = reinterpret_cast <const uint64_t *>(p);
280+ for (int i = 0 ; i < 4 ; ++i) {
281+ uint64_t v = state[i] + block[i] * prime2;
282+ state[i] = ((v << 31 ) | (v >> (64 - 31 ))) * prime1;
283+ }
284+ p += 32 ;
285+ }
286+
287+ // fold 4x64-bit into 1x64-bit
288+ hash = ((state[0 ] << 1 ) | (state[0 ] >> (64 - 1 )));
289+ hash += ((state[1 ] << 7 ) | (state[1 ] >> (64 - 7 )));
290+ hash += ((state[2 ] << 12 ) | (state[2 ] >> (64 - 12 )));
291+ hash += ((state[3 ] << 18 ) | (state[3 ] >> (64 - 18 )));
292+
293+ // mix in state lanes
294+ for (int i = 0 ; i < 4 ; ++i) {
295+ uint64_t v = state[i] * prime2;
296+ hash ^= ((v << 31 ) | (v >> (64 - 31 ))) * prime1;
297+ hash = hash * prime1 + prime4;
298+ }
299+ } else {
300+ hash = seed + prime5;
301+ }
302+
303+ hash += static_cast <uint64_t >(len);
304+
305+ // process any remaining 8-byte chunk(s)
306+ while (static_cast <size_t >(end - p) >= 8 ) {
307+ const uint64_t chunk = *reinterpret_cast <const uint64_t *>(p);
308+ uint64_t v = chunk * prime2;
309+ hash ^= ((v << 31 ) | (v >> (64 - 31 ))) * prime1;
310+ hash = ((hash << 27 ) | (hash >> (64 - 27 ))) * prime1 + prime4;
311+ p += 8 ;
312+ }
313+
314+ // process any remaining 4-byte chunk
315+ if (static_cast <size_t >(end - p) >= 4 ) {
316+ const uint32_t chunk = *reinterpret_cast <const uint32_t *>(p);
317+ hash ^= chunk * prime1;
318+ hash = ((hash << 23 ) | (hash >> (64 - 23 ))) * prime2 + prime3;
319+ p += 4 ;
320+ }
321+
322+ // process any remaining byte(s)
323+ while (p < end) {
324+ const uint8_t chunk = *p++;
325+ hash ^= chunk * prime5;
326+ hash = ((hash << 11 ) | (hash >> (64 - 11 ))) * prime1;
259327 }
328+
329+ // final avalanche
330+ hash ^= hash >> 33 ;
331+ hash *= prime2;
332+ hash ^= hash >> 29 ;
333+ hash *= prime3;
334+ hash ^= hash >> 32 ;
335+
260336 return hash;
261337}
262338
@@ -602,7 +678,7 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
602678 rpc_msg_set_tensor_hash_req request;
603679 request.tensor = rpc_tensor;
604680 request.offset = offset;
605- request.hash = fnv_hash ((const uint8_t *)data, size);
681+ request.hash = generate_hash ((const uint8_t *)data, size);
606682 rpc_msg_set_tensor_hash_rsp response;
607683 bool status = send_rpc_cmd (ctx->sock , RPC_CMD_SET_TENSOR_HASH, &request, sizeof (request), &response, sizeof (response));
608684 RPC_STATUS_ASSERT (status);
@@ -1159,7 +1235,7 @@ bool rpc_server::set_tensor(const std::vector<uint8_t> & input) {
11591235
11601236 const void * data = input.data () + sizeof (rpc_tensor) + sizeof (offset);
11611237 if (cache_dir && size > HASH_THRESHOLD) {
1162- uint64_t hash = fnv_hash ((const uint8_t *)data, size);
1238+ uint64_t hash = generate_hash ((const uint8_t *)data, size);
11631239 char hash_str[17 ];
11641240 snprintf (hash_str, sizeof (hash_str), " %016" PRIx64, hash);
11651241 // save to cache_dir/hash_str
0 commit comments