@@ -698,7 +698,7 @@ SZ_PUBLIC sz_u64_t sz_hash_serial(sz_cptr_t start, sz_size_t length, sz_u64_t se
698698 else {
699699 // Use a larger state to handle the main loop and add different offsets
700700 // to different lanes of the register
701- sz_hash_state_t state;
701+ _SZ_ALIGN64 sz_hash_state_t state;
702702 sz_hash_state_init_serial (&state, seed);
703703
704704 for (; state.ins_length + 64 <= length; state.ins_length += 64 ) {
@@ -1039,7 +1039,7 @@ SZ_PUBLIC sz_u64_t sz_hash_haswell(sz_cptr_t start, sz_size_t length, sz_u64_t s
10391039 else {
10401040 // Use a larger state to handle the main loop and add different offsets
10411041 // to different lanes of the register
1042- sz_hash_state_t state;
1042+ _SZ_ALIGN64 sz_hash_state_t state;
10431043 sz_hash_state_init_haswell (&state, seed);
10441044 for (; state.ins_length + 64 <= length; state.ins_length += 64 ) {
10451045 state.ins .xmms [0 ] = _mm_lddqu_si128 ((__m128i const *)(start + state.ins_length + 0 ));
@@ -1416,7 +1416,7 @@ SZ_PUBLIC sz_u64_t sz_hash_skylake(sz_cptr_t start, sz_size_t length, sz_u64_t s
14161416 else {
14171417 // Use a larger state to handle the main loop and add different offsets
14181418 // to different lanes of the register
1419- sz_hash_state_t state;
1419+ _SZ_ALIGN64 sz_hash_state_t state;
14201420 sz_hash_state_init_skylake (&state, seed);
14211421
14221422 for (; state.ins_length + 64 <= length; state.ins_length += 64 ) {
@@ -1676,7 +1676,7 @@ SZ_PUBLIC sz_u64_t sz_hash_ice(sz_cptr_t start, sz_size_t length, sz_u64_t seed)
16761676 else {
16771677 // Use a larger state to handle the main loop and add different offsets
16781678 // to different lanes of the register
1679- sz_hash_state_t state;
1679+ _SZ_ALIGN64 sz_hash_state_t state;
16801680 sz_hash_state_init_skylake (&state, seed);
16811681
16821682 for (; state.ins_length + 64 <= length; state.ins_length += 64 ) {
@@ -2107,7 +2107,7 @@ SZ_PUBLIC sz_u64_t sz_hash_neon(sz_cptr_t start, sz_size_t length, sz_u64_t seed
21072107 else {
21082108 // Use a larger state to handle the main loop and add different offsets
21092109 // to different lanes of the register
2110- sz_hash_state_t state;
2110+ _SZ_ALIGN64 sz_hash_state_t state;
21112111 sz_hash_state_init_neon (&state, seed);
21122112 for (; state.ins_length + 64 <= length; state.ins_length += 64 ) {
21132113 state.ins .u8x16s [0 ] = vld1q_u8 ((sz_u8_t const *)(start + state.ins_length + 0 ));
0 commit comments