Skip to content

Commit 1b3cdd5

Browse files
committed
Improve: Align inner hash-states
1 parent 763538e commit 1b3cdd5

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

include/stringzilla/hash.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ SZ_PUBLIC sz_u64_t sz_hash_serial(sz_cptr_t start, sz_size_t length, sz_u64_t se
698698
else {
699699
// Use a larger state to handle the main loop and add different offsets
700700
// to different lanes of the register
701-
sz_hash_state_t state;
701+
_SZ_ALIGN64 sz_hash_state_t state;
702702
sz_hash_state_init_serial(&state, seed);
703703

704704
for (; state.ins_length + 64 <= length; state.ins_length += 64) {
@@ -1039,7 +1039,7 @@ SZ_PUBLIC sz_u64_t sz_hash_haswell(sz_cptr_t start, sz_size_t length, sz_u64_t s
10391039
else {
10401040
// Use a larger state to handle the main loop and add different offsets
10411041
// to different lanes of the register
1042-
sz_hash_state_t state;
1042+
_SZ_ALIGN64 sz_hash_state_t state;
10431043
sz_hash_state_init_haswell(&state, seed);
10441044
for (; state.ins_length + 64 <= length; state.ins_length += 64) {
10451045
state.ins.xmms[0] = _mm_lddqu_si128((__m128i const *)(start + state.ins_length + 0));
@@ -1416,7 +1416,7 @@ SZ_PUBLIC sz_u64_t sz_hash_skylake(sz_cptr_t start, sz_size_t length, sz_u64_t s
14161416
else {
14171417
// Use a larger state to handle the main loop and add different offsets
14181418
// to different lanes of the register
1419-
sz_hash_state_t state;
1419+
_SZ_ALIGN64 sz_hash_state_t state;
14201420
sz_hash_state_init_skylake(&state, seed);
14211421

14221422
for (; state.ins_length + 64 <= length; state.ins_length += 64) {
@@ -1676,7 +1676,7 @@ SZ_PUBLIC sz_u64_t sz_hash_ice(sz_cptr_t start, sz_size_t length, sz_u64_t seed)
16761676
else {
16771677
// Use a larger state to handle the main loop and add different offsets
16781678
// to different lanes of the register
1679-
sz_hash_state_t state;
1679+
_SZ_ALIGN64 sz_hash_state_t state;
16801680
sz_hash_state_init_skylake(&state, seed);
16811681

16821682
for (; state.ins_length + 64 <= length; state.ins_length += 64) {
@@ -2107,7 +2107,7 @@ SZ_PUBLIC sz_u64_t sz_hash_neon(sz_cptr_t start, sz_size_t length, sz_u64_t seed
21072107
else {
21082108
// Use a larger state to handle the main loop and add different offsets
21092109
// to different lanes of the register
2110-
sz_hash_state_t state;
2110+
_SZ_ALIGN64 sz_hash_state_t state;
21112111
sz_hash_state_init_neon(&state, seed);
21122112
for (; state.ins_length + 64 <= length; state.ins_length += 64) {
21132113
state.ins.u8x16s[0] = vld1q_u8((sz_u8_t const *)(start + state.ins_length + 0));

rust/lib.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ pub mod sz {
3333

3434
#[repr(C)]
3535
#[derive(Debug, Clone, Copy)]
36+
#[repr(align(64))] // For optimal performance we align to 64 bytes.
3637
pub struct HashState {
3738
aes: [u64; 8],
3839
sum: [u64; 8],
@@ -2027,7 +2028,9 @@ mod tests {
20272028

20282029
#[test]
20292030
fn hash() {
2030-
assert_ne!(sz::hash("Hello"), sz::hash("World"));
2031+
let hash_hello = sz::hash("Hello");
2032+
let hash_world = sz::hash("World");
2033+
assert_ne!(hash_hello, hash_world);
20312034

20322035
// Hashing should work the same for any seed
20332036
for seed in [0u64, 42, 123456789].iter() {

0 commit comments

Comments
 (0)