@@ -234,7 +234,7 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
234234 rx_vec_i128 fill_state2 = rx_load_vec_i128 ((rx_vec_i128*)fill_state + 2 );
235235 rx_vec_i128 fill_state3 = rx_load_vec_i128 ((rx_vec_i128*)fill_state + 3 );
236236
237- constexpr int PREFETCH_DISTANCE = 4096 ;
237+ constexpr int PREFETCH_DISTANCE = 7168 ;
238238 const char * prefetchPtr = ((const char *)scratchpad) + PREFETCH_DISTANCE;
239239 scratchpadEnd -= PREFETCH_DISTANCE;
240240
@@ -258,8 +258,25 @@ void hashAndFillAes1Rx4(void *scratchpad, size_t scratchpadSize, void *hash, voi
258258
259259 rx_prefetch_t0 (prefetchPtr);
260260
261- scratchpadPtr += 64 ;
262- prefetchPtr += 64 ;
261+ hash_state0 = aesenc<softAes>(hash_state0, rx_load_vec_i128 ((rx_vec_i128*)scratchpadPtr + 4 ));
262+ hash_state1 = aesdec<softAes>(hash_state1, rx_load_vec_i128 ((rx_vec_i128*)scratchpadPtr + 5 ));
263+ hash_state2 = aesenc<softAes>(hash_state2, rx_load_vec_i128 ((rx_vec_i128*)scratchpadPtr + 6 ));
264+ hash_state3 = aesdec<softAes>(hash_state3, rx_load_vec_i128 ((rx_vec_i128*)scratchpadPtr + 7 ));
265+
266+ fill_state0 = aesdec<softAes>(fill_state0, key0);
267+ fill_state1 = aesenc<softAes>(fill_state1, key1);
268+ fill_state2 = aesdec<softAes>(fill_state2, key2);
269+ fill_state3 = aesenc<softAes>(fill_state3, key3);
270+
271+ rx_store_vec_i128 ((rx_vec_i128*)scratchpadPtr + 4 , fill_state0);
272+ rx_store_vec_i128 ((rx_vec_i128*)scratchpadPtr + 5 , fill_state1);
273+ rx_store_vec_i128 ((rx_vec_i128*)scratchpadPtr + 6 , fill_state2);
274+ rx_store_vec_i128 ((rx_vec_i128*)scratchpadPtr + 7 , fill_state3);
275+
276+ rx_prefetch_t0 (prefetchPtr + 64 );
277+
278+ scratchpadPtr += 128 ;
279+ prefetchPtr += 128 ;
263280 }
264281 prefetchPtr = (const char *) scratchpad;
265282 scratchpadEnd += PREFETCH_DISTANCE;
0 commit comments