@@ -344,6 +344,293 @@ load_le(::Type{T}, ptr::Ptr{UInt8}, i) where {T <: Union{UInt32, UInt64}} =
344
344
return hash_mix (a ⊻ secret[4 ], b ⊻ secret[2 ] ⊻ i)
345
345
end
346
346
347
+ @inline function load_le_array (:: Type{UInt64} , arr:: AbstractArray{UInt8} , idx)
348
+ # n.b. for whatever reason, writing this as a loop ensures LLVM
349
+ # optimizations (particular SROA) don't make a disaster of this code
350
+ # early on so it can actually emit the optimal result
351
+ result = zero (UInt64)
352
+ for i in 0 : 7
353
+ byte = @inbounds arr[idx + i]
354
+ result |= UInt64 (byte) << (8 * i)
355
+ end
356
+ return result
357
+ end
358
+
359
+ @inline function load_le_array (:: Type{UInt32} , arr:: AbstractArray{UInt8} , idx)
360
+ result = zero (UInt32)
361
+ for i in 0 : 3
362
+ byte = @inbounds arr[idx + i]
363
+ result |= UInt32 (byte) << (8 * i)
364
+ end
365
+ return result
366
+ end
367
+
368
+ @assume_effects :terminates_globally function hash_bytes (
369
+ arr:: AbstractArray{UInt8} ,
370
+ seed:: UInt64 ,
371
+ secret:: NTuple{4, UInt64}
372
+ )
373
+ # Adapted with gratitude from [rapidhash](https://github.com/Nicoshev/rapidhash)
374
+ n = length (arr)
375
+ buflen = UInt64 (n)
376
+ seed = seed ⊻ hash_mix (seed ⊻ secret[3 ], secret[2 ])
377
+ firstidx = firstindex (arr)
378
+
379
+ a = zero (UInt64)
380
+ b = zero (UInt64)
381
+ i = buflen
382
+
383
+ if buflen ≤ 16
384
+ if buflen ≥ 4
385
+ seed ⊻= buflen
386
+ if buflen ≥ 8
387
+ a = load_le_array (UInt64, arr, firstidx)
388
+ b = load_le_array (UInt64, arr, firstidx + n - 8 )
389
+ else
390
+ a = UInt64 (load_le_array (UInt32, arr, firstidx))
391
+ b = UInt64 (load_le_array (UInt32, arr, firstidx + n - 4 ))
392
+ end
393
+ elseif buflen > 0
394
+ a = (UInt64 (@inbounds arr[firstidx]) << 45 ) | UInt64 (@inbounds arr[firstidx + n - 1 ])
395
+ b = UInt64 (@inbounds arr[firstidx + div (n, 2 )])
396
+ end
397
+ else
398
+ pos = 0
399
+ if i > 48
400
+ see1 = seed
401
+ see2 = seed
402
+ while i > 48
403
+ seed = hash_mix (
404
+ load_le_array (UInt64, arr, firstidx + pos) ⊻ secret[1 ],
405
+ load_le_array (UInt64, arr, firstidx + pos + 8 ) ⊻ seed
406
+ )
407
+ see1 = hash_mix (
408
+ load_le_array (UInt64, arr, firstidx + pos + 16 ) ⊻ secret[2 ],
409
+ load_le_array (UInt64, arr, firstidx + pos + 24 ) ⊻ see1
410
+ )
411
+ see2 = hash_mix (
412
+ load_le_array (UInt64, arr, firstidx + pos + 32 ) ⊻ secret[3 ],
413
+ load_le_array (UInt64, arr, firstidx + pos + 40 ) ⊻ see2
414
+ )
415
+ pos += 48
416
+ i -= 48
417
+ end
418
+ seed ⊻= see1
419
+ seed ⊻= see2
420
+ end
421
+ if i > 16
422
+ seed = hash_mix (
423
+ load_le_array (UInt64, arr, firstidx + pos) ⊻ secret[3 ],
424
+ load_le_array (UInt64, arr, firstidx + pos + 8 ) ⊻ seed
425
+ )
426
+ if i > 32
427
+ seed = hash_mix (
428
+ load_le_array (UInt64, arr, firstidx + pos + 16 ) ⊻ secret[3 ],
429
+ load_le_array (UInt64, arr, firstidx + pos + 24 ) ⊻ seed
430
+ )
431
+ end
432
+ end
433
+
434
+ a = load_le_array (UInt64, arr, firstidx + n - 16 ) ⊻ i
435
+ b = load_le_array (UInt64, arr, firstidx + n - 8 )
436
+ end
437
+
438
+ a = a ⊻ secret[2 ]
439
+ b = b ⊻ seed
440
+ b, a = mul_parts (a, b)
441
+ return hash_mix (a ⊻ secret[4 ], b ⊻ secret[2 ] ⊻ i)
442
+ end
443
+
444
+
445
+ # Helper function to concatenate two UInt64 values with a byte shift
446
+ # Returns the result of shifting 'low' right by 'shift_bytes' bytes and
447
+ # filling the high bits with the low bits of 'high'
448
+ @inline function concat_shift (low:: UInt64 , high:: UInt64 , shift_bytes:: Int )
449
+ shift_bits = shift_bytes * 8
450
+ return (low >> shift_bits) | (high << (64 - shift_bits))
451
+ end
452
+
453
+ @eval @inline function read_uint64_from_uint8_iter (iter, state)
454
+ value = zero (UInt64)
455
+ bytes_read = 0
456
+ shift = 0
457
+
458
+ for _ in 1 : 8
459
+ next_result = iterate (iter, state)
460
+ next_result === nothing && break
461
+ byte, state = next_result
462
+ value |= UInt64 (byte) << shift
463
+ shift += 8
464
+ bytes_read += 1
465
+ $ (Expr (:loopinfo , (Symbol (" llvm.loop.disable_nonforced" ))))
466
+ $ (Expr (:loopinfo , (Symbol (" llvm.loop.vectorize.enable" ), false )))
467
+ end
468
+
469
+ return value, state, bytes_read
470
+ end
471
+
472
+ @eval @inline function read_uint64_from_uint8_iter (iter)
473
+ next_result = iterate (iter)
474
+ next_result === nothing && return nothing
475
+
476
+ byte, state = next_result
477
+ value = UInt64 (byte)
478
+ bytes_read = 1
479
+
480
+ # Loop for remaining bytes
481
+ for i in 2 : 8
482
+ next_result = iterate (iter, state)
483
+ next_result === nothing && break
484
+ byte, state = next_result
485
+ value |= UInt64 (byte:: UInt8 ) << ((i- 1 ) * 8 )
486
+ bytes_read += 1
487
+ $ (Expr (:loopinfo , (Symbol (" llvm.loop.disable_nonforced" ))))
488
+ $ (Expr (:loopinfo , (Symbol (" llvm.loop.vectorize.enable" ), false )))
489
+ end
490
+
491
+ return value, state, bytes_read
492
+ end
493
+
494
+ @assume_effects :terminates_globally function hash_bytes (
495
+ iter,
496
+ seed:: UInt64 ,
497
+ secret:: NTuple{4, UInt64}
498
+ )
499
+ seed = seed ⊻ hash_mix (seed ⊻ secret[3 ], secret[2 ])
500
+
501
+ a = zero (UInt64)
502
+ b = zero (UInt64)
503
+ buflen = zero (UInt64)
504
+
505
+ see1 = seed
506
+ see2 = seed
507
+ l0 = zero (UInt64)
508
+ l1 = zero (UInt64)
509
+ l2 = zero (UInt64)
510
+ l3 = zero (UInt64)
511
+ l4 = zero (UInt64)
512
+ l5 = zero (UInt64)
513
+ b0 = 0
514
+ b1 = 0
515
+ b2 = 0
516
+ b3 = 0
517
+ b4 = 0
518
+ b5 = 0
519
+ t0 = zero (UInt64)
520
+ t1 = zero (UInt64)
521
+
522
+ # Handle first iteration separately
523
+ read = read_uint64_from_uint8_iter (iter)
524
+ if read != = nothing
525
+ l0, state, b0 = read
526
+ # Repeat hashing chunks until a short read
527
+ while true
528
+ l1, state, b1 = read_uint64_from_uint8_iter (iter, state)
529
+ if b1 == 8
530
+ l2, state, b2 = read_uint64_from_uint8_iter (iter, state)
531
+ if b2 == 8
532
+ l3, state, b3 = read_uint64_from_uint8_iter (iter, state)
533
+ if b3 == 8
534
+ l4, state, b4 = read_uint64_from_uint8_iter (iter, state)
535
+ if b4 == 8
536
+ l5, state, b5 = read_uint64_from_uint8_iter (iter, state)
537
+ if b5 == 8
538
+ # Read start of next chunk
539
+ read = read_uint64_from_uint8_iter (iter, state)
540
+ if read[3 ] == 0
541
+ # Read exactly 48 bytes
542
+ t0 = l4
543
+ t1 = l5
544
+ break
545
+ else
546
+ # Read more than 48 bytes - process and continue to next chunk
547
+ seed = hash_mix (l0 ⊻ secret[1 ], l1 ⊻ seed)
548
+ see1 = hash_mix (l2 ⊻ secret[2 ], l3 ⊻ see1)
549
+ see2 = hash_mix (l4 ⊻ secret[3 ], l5 ⊻ see2)
550
+ buflen += 48
551
+ l0, state, b0 = read
552
+ b1 = 0
553
+ b2 = 0
554
+ b3 = 0
555
+ b4 = 0
556
+ b5 = 0
557
+ if b0 != 8
558
+ t0 = concat_shift (l4, l5, b0)
559
+ t1 = concat_shift (l5, l0, b0)
560
+ break
561
+ end
562
+ end
563
+ else
564
+ # Extract final 16 bytes at the first short read
565
+ t0 = concat_shift (l3, l4, b5)
566
+ t1 = concat_shift (l4, l5, b5)
567
+ break
568
+ end
569
+ else
570
+ t0 = concat_shift (l2, l3, b4)
571
+ t1 = concat_shift (l3, l4, b4)
572
+ break
573
+ end
574
+ else
575
+ t0 = concat_shift (l1, l2, b3)
576
+ t1 = concat_shift (l2, l3, b3)
577
+ break
578
+ end
579
+ else
580
+ t0 = concat_shift (l0, l1, b2)
581
+ t1 = concat_shift (l1, l2, b2)
582
+ break
583
+ end
584
+ else
585
+ t0 = concat_shift (l5, l0, b1)
586
+ t1 = concat_shift (l0, l1, b1)
587
+ break
588
+ end
589
+ end
590
+ end
591
+
592
+ # Partial chunk, handle based on size
593
+ bytes_chunk = b0 + b1 + b2 + b3 + b4 + b5
594
+ if buflen > 0
595
+ # Finalize last full chunk
596
+ seed ⊻= see1
597
+ seed ⊻= see2
598
+ end
599
+ buflen += bytes_chunk
600
+ if buflen ≤ 16
601
+ if buflen ≥ 4
602
+ seed ⊻= buflen
603
+ if buflen ≥ 8
604
+ a = l0
605
+ b = t1
606
+ else
607
+ a = UInt64 (l0 % UInt32)
608
+ b = UInt64 ((l0 >>> (8 * (bytes_chunk - 4 ))) % UInt32)
609
+ end
610
+ elseif buflen > 0
611
+ b0 = l0 % UInt8
612
+ b1 = (l0 >>> (8 * div (buflen, 2 ))) % UInt8
613
+ b2 = (l0 >>> (8 * (buflen - 1 ))) % UInt8
614
+ a = (UInt64 (b0) << 45 ) | UInt64 (b2)
615
+ b = UInt64 (b1)
616
+ end
617
+ else
618
+ if bytes_chunk > 16
619
+ seed = hash_mix (l0 ⊻ secret[3 ], l1 ⊻ seed)
620
+ if bytes_chunk > 32
621
+ seed = hash_mix (l2 ⊻ secret[3 ], l3 ⊻ seed)
622
+ end
623
+ end
624
+ a = t0 ⊻ bytes_chunk
625
+ b = t1
626
+ end
627
+
628
+ a = a ⊻ secret[2 ]
629
+ b = b ⊻ seed
630
+ b, a = mul_parts (a, b)
631
+ return hash_mix (a ⊻ secret[4 ], b ⊻ secret[2 ] ⊻ bytes_chunk)
632
+ end
633
+
347
634
@assume_effects :total hash (data:: String , h:: UInt ) =
348
635
GC. @preserve data hash_bytes (pointer (data), sizeof (data), UInt64 (h), HASH_SECRET) % UInt
349
636
0 commit comments