26
26
27
27
import static jdk .graal .compiler .asm .amd64 .AMD64Assembler .ConditionFlag .LessEqual ;
28
28
import static jdk .graal .compiler .asm .amd64 .AMD64Assembler .ConditionFlag .NotEqual ;
29
+ import static jdk .graal .compiler .lir .amd64 .AMD64AESEncryptOp .asXMMRegister ;
29
30
import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .pointerConstant ;
30
31
import static jdk .graal .compiler .lir .amd64 .AMD64LIRHelper .recordExternalAddress ;
31
32
import static jdk .vm .ci .amd64 .AMD64 .k1 ;
78
79
import jdk .vm .ci .meta .Value ;
79
80
80
81
// @formatter:off
81
- @ SyncPort (from = "https://github.com/openjdk/jdk/blob/a937f6db30ab55b98dae25d5b6d041cf4b7b7291 /src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp#L41-L337 " ,
82
- sha1 = "d9d050bb8e4213f750eae298d436ace9a086b233 " )
82
+ @ SyncPort (from = "https://github.com/openjdk/jdk/blob/c447a10225576bc59e1ba9477417367d2ac28511 /src/hotspot/cpu/x86/stubGenerator_x86_64_sha3.cpp#L43-L320 " ,
83
+ sha1 = "85dbee8cb0c0f6d8f37d07da6cf8b2f9f4fc8ce8 " )
83
84
// @formatter:on
84
85
public final class AMD64SHA3Op extends AMD64LIRInstruction {
85
86
@@ -231,29 +232,16 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
231
232
masm .kshiftrw (k1 , k5 , 4 );
232
233
233
234
// load the state
234
- masm .evmovdqu64 (xmm0 , k5 , new AMD64Address (state , 0 ));
235
- masm .evmovdqu64 (xmm1 , k5 , new AMD64Address (state , 40 ));
236
- masm .evmovdqu64 (xmm2 , k5 , new AMD64Address (state , 80 ));
237
- masm .evmovdqu64 (xmm3 , k5 , new AMD64Address (state , 120 ));
238
- masm .evmovdqu64 (xmm4 , k5 , new AMD64Address (state , 160 ));
235
+ for (int i = 0 ; i < 5 ; i ++) {
236
+ masm .evmovdqu64 (asXMMRegister (i ), k5 , new AMD64Address (state , i * 40 ));
237
+ }
239
238
240
239
// load the permutation and rotation constants
241
- masm .evmovdqu64 (xmm17 , new AMD64Address (permsAndRots , 0 ));
242
- masm .evmovdqu64 (xmm18 , new AMD64Address (permsAndRots , 64 ));
243
- masm .evmovdqu64 (xmm19 , new AMD64Address (permsAndRots , 128 ));
244
- masm .evmovdqu64 (xmm20 , new AMD64Address (permsAndRots , 192 ));
245
- masm .evmovdqu64 (xmm21 , new AMD64Address (permsAndRots , 256 ));
246
- masm .evmovdqu64 (xmm22 , new AMD64Address (permsAndRots , 320 ));
247
- masm .evmovdqu64 (xmm23 , new AMD64Address (permsAndRots , 384 ));
248
- masm .evmovdqu64 (xmm24 , new AMD64Address (permsAndRots , 448 ));
249
- masm .evmovdqu64 (xmm25 , new AMD64Address (permsAndRots , 512 ));
250
- masm .evmovdqu64 (xmm26 , new AMD64Address (permsAndRots , 576 ));
251
- masm .evmovdqu64 (xmm27 , new AMD64Address (permsAndRots , 640 ));
252
- masm .evmovdqu64 (xmm28 , new AMD64Address (permsAndRots , 704 ));
253
- masm .evmovdqu64 (xmm29 , new AMD64Address (permsAndRots , 768 ));
254
- masm .evmovdqu64 (xmm30 , new AMD64Address (permsAndRots , 832 ));
255
- masm .evmovdqu64 (xmm31 , new AMD64Address (permsAndRots , 896 ));
240
+ for (int i = 0 ; i < 15 ; i ++) {
241
+ masm .evmovdqu64 (asXMMRegister (i + 17 ), new AMD64Address (permsAndRots , i * 64 ));
242
+ }
256
243
244
+ masm .align (preferredLoopAlignment (crb ));
257
245
masm .bind (sha3Loop );
258
246
259
247
// there will be 24 keccak rounds
@@ -304,6 +292,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
304
292
// The implementation closely follows the Java version, with the state
305
293
// array "rows" in the lowest 5 64-bit slots of zmm0 - zmm4, i.e.
306
294
// each row of the SHA3 specification is located in one zmm register.
295
+ masm .align (preferredLoopAlignment (crb ));
307
296
masm .bind (rounds24Loop );
308
297
masm .subl (roundsLeft , 1 );
309
298
@@ -330,7 +319,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
330
319
331
320
// Do the cyclical permutation of the 24 moving state elements
332
321
// and the required rotations within each element (the combined
333
- // rho and sigma steps).
322
+ // rho and pi steps).
334
323
masm .evpermt2q (xmm4 , xmm17 , xmm3 );
335
324
masm .evpermt2q (xmm3 , xmm18 , xmm2 );
336
325
masm .evpermt2q (xmm2 , xmm17 , xmm1 );
@@ -352,7 +341,7 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
352
341
masm .evpermt2q (xmm2 , xmm24 , xmm4 );
353
342
masm .evpermt2q (xmm3 , xmm25 , xmm4 );
354
343
masm .evpermt2q (xmm4 , xmm26 , xmm5 );
355
- // The combined rho and sigma steps are done.
344
+ // The combined rho and pi steps are done.
356
345
357
346
// Do the chi step (the same operation on all 5 rows).
358
347
// vpternlogq(x, 180, y, z) does x = x ^ (y & ~z).
@@ -394,11 +383,9 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
394
383
}
395
384
396
385
// store the state
397
- masm .evmovdqu64 (new AMD64Address (state , 0 ), k5 , xmm0 );
398
- masm .evmovdqu64 (new AMD64Address (state , 40 ), k5 , xmm1 );
399
- masm .evmovdqu64 (new AMD64Address (state , 80 ), k5 , xmm2 );
400
- masm .evmovdqu64 (new AMD64Address (state , 120 ), k5 , xmm3 );
401
- masm .evmovdqu64 (new AMD64Address (state , 160 ), k5 , xmm4 );
386
+ for (int i = 0 ; i < 5 ; i ++) {
387
+ masm .evmovdqu64 (new AMD64Address (state , i * 40 ), k5 , asXMMRegister (i ));
388
+ }
402
389
403
390
masm .pop (r14 );
404
391
masm .pop (r13 );
0 commit comments