diff --git a/dev/aarch64_clean/src/rej_uniform_asm.S b/dev/aarch64_clean/src/rej_uniform_asm.S index 941ec842cd..a44456660d 100644 --- a/dev/aarch64_clean/src/rej_uniform_asm.S +++ b/dev/aarch64_clean/src/rej_uniform_asm.S @@ -44,14 +44,14 @@ // We save the output on the stack first, and copy to the actual // output buffer only in the end. This is because the main loop can overwrite // by up to 62 bytes, which we account for here (we use 64 bytes for alignment). -#define STACK_SIZE (2*MLKEM_N + 64) +#define MLK_STACK_SIZE (2*MLKEM_N + 64) .macro push_stack - sub sp, sp, #STACK_SIZE + sub sp, sp, #MLK_STACK_SIZE .endm .macro pop_stack - add sp, sp, #STACK_SIZE + add sp, sp, #MLK_STACK_SIZE .endm /* Parameters */ @@ -458,7 +458,7 @@ rej_uniform_return: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_SIZE +#undef MLK_STACK_SIZE /* simpasm: footer-start */ #endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/dev/aarch64_opt/src/rej_uniform_asm.S b/dev/aarch64_opt/src/rej_uniform_asm.S index 941ec842cd..a44456660d 100644 --- a/dev/aarch64_opt/src/rej_uniform_asm.S +++ b/dev/aarch64_opt/src/rej_uniform_asm.S @@ -44,14 +44,14 @@ // We save the output on the stack first, and copy to the actual // output buffer only in the end. This is because the main loop can overwrite // by up to 62 bytes, which we account for here (we use 64 bytes for alignment). -#define STACK_SIZE (2*MLKEM_N + 64) +#define MLK_STACK_SIZE (2*MLKEM_N + 64) .macro push_stack - sub sp, sp, #STACK_SIZE + sub sp, sp, #MLK_STACK_SIZE .endm .macro pop_stack - add sp, sp, #STACK_SIZE + add sp, sp, #MLK_STACK_SIZE .endm /* Parameters */ @@ -458,7 +458,7 @@ rej_uniform_return: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_SIZE +#undef MLK_STACK_SIZE /* simpasm: footer-start */ #endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */ diff --git a/dev/fips202/aarch64/src/Makefile b/dev/fips202/aarch64/src/Makefile index 4ff963d037..f9f50665ce 100644 --- a/dev/fips202/aarch64/src/Makefile +++ b/dev/fips202/aarch64/src/Makefile @@ -10,6 +10,7 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $^ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c inputs_are_outputs \ -c variable_size \ @@ -23,10 +24,11 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $@ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c variable_size \ -c inputs_are_outputs \ - -c outputs="[hint_STACK_LOC_COUNT]" \ + -c outputs="[hint_MLK_STACK_LOC_COUNT]" \ -c constraints.stalls_first_attempt=64 \ -c constraints.allow_spills \ -c constraints.minimize_spills \ @@ -38,6 +40,7 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $^ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c inputs_are_outputs \ -c variable_size \ @@ -48,12 +51,13 @@ keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_ -c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \ -c split_heuristic_estimate_performance=False \ -c absorb_spills=False \ - -c outputs="[hint_STACK_OFFSET_COUNT]" \ + -c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \ -s keccak_f1600_x4_v8a_scalar_hybrid_initial \ -e keccak_f1600_x4_v8a_scalar_hybrid_loop slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $@ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c inputs_are_outputs \ -c variable_size \ @@ -63,7 +67,7 @@ keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_ -c split_heuristic_preprocess_naive_interleaving \ -c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \ -c split_heuristic_estimate_performance=False \ - -c outputs="[hint_STACK_OFFSET_COUNT]" \ + -c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \ -c absorb_spills=False \ -s keccak_f1600_x4_v8a_scalar_hybrid_loop \ -e keccak_f1600_x4_v8a_scalar_hybrid_loop_end @@ -71,6 +75,7 @@ keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_ keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $^ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c inputs_are_outputs \ -c variable_size \ @@ -81,12 +86,13 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f160 -c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \ -c split_heuristic_estimate_performance=False \ -c absorb_spills=False \ - -c outputs="[hint_STACK_OFFSET_COUNT]" \ + -c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \ -s keccak_f1600_x4_v8a_v84a_scalar_hybrid_initial \ -e keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop slothy-cli Arm_AArch64 Arm_Cortex_A55 \ $@ -o $@ \ + -c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \ -c reserved_regs="[x18,sp]" \ -c inputs_are_outputs \ -c variable_size \ @@ -96,7 +102,7 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f160 -c split_heuristic_preprocess_naive_interleaving \ -c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \ -c split_heuristic_estimate_performance=False \ - -c outputs="[hint_STACK_OFFSET_COUNT]" \ + -c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \ -c absorb_spills=False \ -s keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop \ -e keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop_end diff --git a/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_asm.S index 3124f43161..ca83bd949a 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x1_scalar_asm.S @@ -71,39 +71,39 @@ /************************ MACROS ****************************/ -#define STACK_LOCS 4 +#define MLK_STACK_LOCS 4 -#define STACK_SIZE (16*6 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (3*8+8) -#define STACK_LOC_INPUT (0*8) -#define STACK_LOC_CONST (1*8) -#define STACK_LOC_COUNT (2*8) -#define STACK_LOC_MISC0 (3*8) +#define MLK_STACK_SIZE (16*6 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (3*8+8) +#define MLK_STACK_LOC_INPUT (0*8) +#define MLK_STACK_LOC_CONST (1*8) +#define MLK_STACK_LOC_COUNT (2*8) +#define MLK_STACK_LOC_MISC0 (3*8) .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro load_state @@ -164,7 +164,7 @@ ror Asu, Asu,#(64-55) .endm -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 .text .global MLK_ASM_NAMESPACE(keccak_f1600_x1_scalar_asm) @@ -175,9 +175,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x1_scalar_asm) keccak_f1600_x1_scalar_initial: mov const_addr, input_rc - str input_rc, [sp, #STACK_LOC_CONST] + str input_rc, [sp, #MLK_STACK_LOC_CONST] load_state - str input_addr, [sp, #STACK_LOC_INPUT] // @slothy:writes=STACK_LOC_INPUT + str input_addr, [sp, #MLK_STACK_LOC_INPUT] // @slothy:writes=MLK_STACK_LOC_INPUT // (Optimized for Cortex-A55) // Instructions: 107 @@ -209,7 +209,7 @@ keccak_f1600_x1_scalar_initial: eor x30, x30, x29, ror #63 // .........*............................................ eor x22, x22, x30 // ..........*........................................... eor x23, x23, x30 // ..........*........................................... - str x23, [sp, #STACK_LOC_MISC0] // ...........*.......................................... + str x23, [sp, #MLK_STACK_LOC_MISC0] // ...........*.......................................... eor x23, x14, x15 // ...........*.......................................... eor x14, x14, x0 // ............*......................................... eor x23, x23, x11 // ............*......................................... @@ -237,7 +237,7 @@ keccak_f1600_x1_scalar_initial: eor x12, x3, x27 // ........................*............................. bic x3, x13, x17, ror #19 // ........................*............................. eor x5, x5, x27 // .........................*............................ - ldr x27, [sp, #STACK_LOC_MISC0] // .........................*............................ + ldr x27, [sp, #MLK_STACK_LOC_MISC0] // .........................*............................ bic x25, x17, x2, ror #5 // ..........................*........................... eor x9, x9, x29 // ..........................*........................... eor x23, x25, x5, ror #52 // ...........................*.......................... @@ -267,12 +267,12 @@ keccak_f1600_x1_scalar_initial: eor x12, x15, x12, ror #58 // .......................................*.............. eor x15, x5, x27, ror #27 // .......................................*.............. eor x5, x20, x11, ror #41 // ........................................*............. - ldr x11, [sp, #STACK_LOC_CONST] // ........................................*............. + ldr x11, [sp, #MLK_STACK_LOC_CONST] // ........................................*............. eor x20, x17, x4, ror #21 // .........................................*............ eor x17, x24, x9, ror #47 // .........................................*............ mov x24, #1 // ..........................................*........... bic x9, x0, x16, ror #9 // ..........................................*........... - str x24, [sp, #STACK_LOC_COUNT] // ...........................................*.......... + str x24, [sp, #MLK_STACK_LOC_COUNT] // ...........................................*.......... bic x24, x29, x1, ror #44 // ...........................................*.......... bic x27, x1, x21, ror #50 // ............................................*......... bic x4, x26, x29, ror #63 // ............................................*......... @@ -323,7 +323,7 @@ keccak_f1600_x1_scalar_initial: ror x26, x26, #58 // ........*................................................ eor x16, x30, x16 // .........*............................................... eor x28, x30, x28, ror #63 // .........*............................................... - str x28, [sp, #STACK_LOC_MISC0] // ..........*.............................................. + str x28, [sp, #MLK_STACK_LOC_MISC0] // ..........*.............................................. eor x29, x29, x17, ror #36 // ..........*.............................................. eor x28, x1, x2, ror #61 // ...........*............................................. eor x19, x30, x19, ror #37 // ...........*............................................. @@ -377,9 +377,9 @@ keccak_f1600_x1_scalar_initial: eor x16, x21, x19, ror #43 // ....................................*.................... eor x21, x17, x25, ror #30 // ....................................*.................... bic x19, x25, x19, ror #57 // .....................................*................... - ldr x25, [sp, #STACK_LOC_COUNT] // .....................................*................... + ldr x25, [sp, #MLK_STACK_LOC_COUNT] // .....................................*................... eor x17, x10, x9, ror #47 // ......................................*.................. - ldr x9, [sp, #STACK_LOC_CONST] // ......................................*.................. + ldr x9, [sp, #MLK_STACK_LOC_CONST] // ......................................*.................. eor x15, x20, x28, ror #27 // .......................................*................. bic x20, x4, x28, ror #2 // .......................................*................. eor x10, x20, x1, ror #50 // ........................................*................ @@ -388,10 +388,10 @@ keccak_f1600_x1_scalar_initial: bic x4, x28, x1, ror #48 // .........................................*............... bic x1, x1, x11, ror #57 // ..........................................*.............. ldr x28, [x9, x25, LSL #3] // ..........................................*.............. - ldr x9, [sp, #STACK_LOC_MISC0] // ...........................................*............. + ldr x9, [sp, #MLK_STACK_LOC_MISC0] // ...........................................*............. add x25, x25, #1 // ...........................................*............. - str x25, [sp, #STACK_LOC_COUNT] // ............................................*............ - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............ + str x25, [sp, #MLK_STACK_LOC_COUNT] // ............................................*............ + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............ eor x25, x1, x27, ror #53 // .............................................*........... bic x27, x30, x26, ror #47 // .............................................*........... eor x1, x5, x28 // ..............................................*.......... @@ -419,7 +419,7 @@ keccak_f1600_x1_scalar_initial: ble keccak_f1600_x1_scalar_loop final_rotate - ldr input_addr, [sp, #STACK_LOC_INPUT] + ldr input_addr, [sp, #MLK_STACK_LOC_INPUT] store_state restore_gprs @@ -458,14 +458,14 @@ keccak_f1600_x1_scalar_initial: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_LOC_INPUT -#undef STACK_LOC_CONST -#undef STACK_LOC_COUNT -#undef STACK_LOC_MISC0 -#undef KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_LOC_INPUT +#undef MLK_STACK_LOC_CONST +#undef MLK_STACK_LOC_COUNT +#undef MLK_STACK_LOC_MISC0 +#undef MLK_KECCAK_F1600_ROUNDS /* simpasm: footer-start */ #endif /* MLK_FIPS202_AARCH64_NEED_X1_SCALAR && \ diff --git a/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S index 7e5568d169..ddcaf46bd9 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S @@ -216,15 +216,15 @@ str Asud, [input_addr, #0xC0] .endm -#define STACK_SIZE (16*4) /* VREGS (16*4) */ +#define MLK_STACK_SIZE (16*4) /* VREGS (16*4) */ -#define STACK_BASE_GPRS (16*4) +#define MLK_STACK_BASE_GPRS (16*4) .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro save_vregs @@ -339,7 +339,7 @@ .endm -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 .text .global MLK_ASM_NAMESPACE(keccak_f1600_x1_v84a_asm) @@ -349,7 +349,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x1_v84a_asm) save_vregs load_input - mov count, #(KECCAK_F1600_ROUNDS) + mov count, #(MLK_KECCAK_F1600_ROUNDS) keccak_f1600_x1_v84a_loop: keccak_f1600_round sub count, count, #1 @@ -478,9 +478,9 @@ keccak_f1600_x1_v84a_loop: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef KECCAK_F1600_ROUNDS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_KECCAK_F1600_ROUNDS /* simpasm: footer-start */ #endif /* __ARM_FEATURE_SHA3 */ diff --git a/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S index be3608108c..f15173b4a9 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x2_v84a_asm.S @@ -198,15 +198,15 @@ store_lane_single Asu, 12 .endm -#define STACK_SIZE (16*4) /* VREGS (16*4) */ +#define MLK_STACK_SIZE (16*4) /* VREGS (16*4) */ -#define STACK_BASE_GPRS (16*4) +#define MLK_STACK_BASE_GPRS (16*4) .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro save_vregs @@ -321,7 +321,7 @@ .endm -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 .text .global MLK_ASM_NAMESPACE(keccak_f1600_x2_v84a_asm) @@ -332,7 +332,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x2_v84a_asm) save_vregs load_input - mov count, #(KECCAK_F1600_ROUNDS) + mov count, #(MLK_KECCAK_F1600_ROUNDS) keccak_f1600_x2_v84a_loop: keccak_f1600_round sub count, count, #1 @@ -423,9 +423,9 @@ keccak_f1600_x2_v84a_loop: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef KECCAK_F1600_ROUNDS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_KECCAK_F1600_ROUNDS /* simpasm: footer-start */ #endif /* __ARM_FEATURE_SHA3 */ diff --git a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S index 104881c0e9..bea8774918 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_scalar_hybrid_asm.S @@ -35,7 +35,7 @@ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) /* simpasm: header-end */ -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 /****************** REGISTER ALLOCATIONS *******************/ @@ -276,60 +276,60 @@ sub input_addr, input_addr, #((2 + \idx)*25*8) .endm -#define STACK_LOCS 2 +#define MLK_STACK_LOCS 2 -#define STACK_SIZE (16*6 + 8*8 + 6*8 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (6*8) -#define STACK_BASE_VREGS (6*8 + 16*6) -#define STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) +#define MLK_STACK_SIZE (16*6 + 8*8 + 6*8 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (6*8) +#define MLK_STACK_BASE_VREGS (6*8 + 16*6) +#define MLK_STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) -#define STACK_OFFSET_INPUT (0*8) -#define STACK_OFFSET_CONST_SCALAR (1*8) -#define STACK_OFFSET_CONST_VECTOR (2*8) -#define STACK_OFFSET_COUNT (3*8) -#define STACK_OFFSET_OUTER (4*8) +#define MLK_STACK_OFFSET_INPUT (0*8) +#define MLK_STACK_OFFSET_CONST_SCALAR (1*8) +#define MLK_STACK_OFFSET_CONST_VECTOR (2*8) +#define MLK_STACK_OFFSET_COUNT (3*8) +#define MLK_STACK_OFFSET_OUTER (4*8) -#define STACK_LOC_0 ((STACK_OFFSET_LOCS) + 0*8) -#define STACK_LOC_1 ((STACK_OFFSET_LOCS) + 1*8) +#define MLK_STACK_LOC_0 ((MLK_STACK_OFFSET_LOCS) + 0*8) +#define MLK_STACK_LOC_1 ((MLK_STACK_OFFSET_LOCS) + 1*8) .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro save_vregs - stp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - stp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - stp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - stp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + stp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + stp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + stp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + stp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro restore_vregs - ldp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - ldp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - ldp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - ldp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + ldp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + ldp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + ldp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + ldp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro eor5 dst, src0, src1, src2, src3, src4 @@ -378,7 +378,7 @@ eor x30, x30, x29, ror #63 // .........*............................................ eor x22, x22, x30 // ..........*........................................... eor x23, x23, x30 // ..........*........................................... - str x23, [sp, #STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 + str x23, [sp, #MLK_STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 eor x23, x14, x15 // ...........*.......................................... eor x14, x14, x0 // ............*......................................... eor x23, x23, x11 // ............*......................................... @@ -406,7 +406,7 @@ eor x12, x3, x27 // ........................*............................. bic x3, x13, x17, ror #19 // ........................*............................. eor x5, x5, x27 // .........................*............................ - ldr x27, [sp, #STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 + ldr x27, [sp, #MLK_STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 bic x25, x17, x2, ror #5 // ..........................*........................... eor x9, x9, x29 // ..........................*........................... eor x23, x25, x5, ror #52 // ...........................*.......................... @@ -436,12 +436,12 @@ eor x12, x15, x12, ror #58 // .......................................*.............. eor x15, x5, x27, ror #27 // .......................................*.............. eor x5, x20, x11, ror #41 // ........................................*............. - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ........................................*............. + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ........................................*............. eor x20, x17, x4, ror #21 // .........................................*............ eor x17, x24, x9, ror #47 // .........................................*............ mov x24, #1 // ..........................................*........... bic x9, x0, x16, ror #9 // ..........................................*........... - str x24, [sp, #STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=STACK_OFFSET_COUNT + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=MLK_STACK_OFFSET_COUNT bic x24, x29, x1, ror #44 // ...........................................*.......... bic x27, x1, x21, ror #50 // ............................................*......... bic x4, x26, x29, ror #63 // ............................................*......... @@ -502,10 +502,10 @@ // eor X, sAga, X // eor X, sAge, X - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ldr X, [X] // mov X, #1 - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -577,9 +577,9 @@ xar_m1 vAsu_, vAse, E1, 62 xar_m1 vAme_, vAga, E0, 28 xar_m1 vAbe_, vAge, E1, 20 - ldr tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:reads=STACK_OFFSET_CONST_VECTOR + ldr tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR ld1r {v28.2d}, [tmp], #8 - str tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR + str tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR bcax_m1 vAga, vAga_, vAgi_, vAge_ bcax_m1 vAge, vAge_, vAgo_, vAgi_ bcax_m1 vAgi, vAgi_, vAgu_, vAgo_ @@ -628,7 +628,7 @@ ror x26, x26, #58 // ........*................................................ eor x16, x30, x16 // .........*............................................... eor x28, x30, x28, ror #63 // .........*............................................... - str x28, [sp, #STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 + str x28, [sp, #MLK_STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 eor x29, x29, x17, ror #36 // ..........*.............................................. eor x28, x1, x2, ror #61 // ...........*............................................. eor x19, x30, x19, ror #37 // ...........*............................................. @@ -682,9 +682,9 @@ eor x16, x21, x19, ror #43 // ....................................*.................... eor x21, x17, x25, ror #30 // ....................................*.................... bic x19, x25, x19, ror #57 // .....................................*................... - ldr x25, [sp, #STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=STACK_OFFSET_COUNT + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=MLK_STACK_OFFSET_COUNT eor x17, x10, x9, ror #47 // ......................................*.................. - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................*.................. + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................*.................. eor x15, x20, x28, ror #27 // .......................................*................. bic x20, x4, x28, ror #2 // .......................................*................. eor x10, x20, x1, ror #50 // ........................................*................ @@ -693,10 +693,10 @@ bic x4, x28, x1, ror #48 // .........................................*............... bic x1, x1, x11, ror #57 // ..........................................*.............. ldr x28, [x9, x25, LSL #3] // ..........................................*.............. - ldr x9, [sp, #STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 add x25, x25, #1 // ...........................................*............. - str x25, [sp, #STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=STACK_OFFSET_COUNT - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=MLK_STACK_OFFSET_COUNT + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output eor x25, x1, x27, ror #53 // .............................................*........... bic x27, x30, x26, ror #47 // .............................................*........... eor x1, x5, x28 // ..............................................*.......... @@ -782,12 +782,12 @@ // eor X, X, sAga, ror #61 // eor X, X, sAge, ror #19 - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] - // ldr X, [sp, #STACK_OFFSET_COUNT] // @slothy:reads=STACK_OFFSET_COUNT + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:reads=MLK_STACK_OFFSET_COUNT // ldr X, [X, X, LSL #3] // add X, X, #1 - // cmp X, #(KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // cmp X, #(MLK_KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -855,10 +855,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) mov const_addr, input_rc mov outer, #0 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER - str const_addr, [sp, #STACK_OFFSET_CONST_SCALAR] // @slothy:writes=STACK_OFFSET_CONST_SCALAR - str const_addr, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR - str input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:writes=STACK_OFFSET_INPUT + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // @slothy:writes=MLK_STACK_OFFSET_CONST_SCALAR + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR + str input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:writes=MLK_STACK_OFFSET_INPUT load_input_vector // Vector input load_input_scalar 0 // First scalar input @@ -910,7 +910,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) eor v27.16B, v27.16B, v23.16B // ..................*........................................................................................................................................................................... // @slothy:interleaving_class=1 eor x23, x23, x30 // ..................*........................................................................................................................................................................... // @slothy:interleaving_class=0 eor v26.16B, v4.16B, v9.16B // ...................*.......................................................................................................................................................................... // @slothy:interleaving_class=1 - str x23, [sp, #STACK_LOC_0] // ...................*.......................................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x23, [sp, #MLK_STACK_LOC_0] // ...................*.......................................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor v26.16B, v26.16B, v14.16B // ....................*......................................................................................................................................................................... // @slothy:interleaving_class=1 eor x23, x14, x15 // ....................*......................................................................................................................................................................... // @slothy:interleaving_class=0 eor x14, x14, x0 // .....................*........................................................................................................................................................................ // @slothy:interleaving_class=0 @@ -958,7 +958,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic x3, x13, x17, ror #19 // ..........................................*................................................................................................................................................... // @slothy:interleaving_class=0 eor v31.16B, v2.16B, v29.16B // ..........................................*................................................................................................................................................... // @slothy:interleaving_class=1 eor x5, x5, x27 // ...........................................*.................................................................................................................................................. // @slothy:interleaving_class=0 - ldr x27, [sp, #STACK_LOC_0] // ...........................................*.................................................................................................................................................. // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x27, [sp, #MLK_STACK_LOC_0] // ...........................................*.................................................................................................................................................. // @slothy:reads=stack_0 // @slothy:interleaving_class=0 shl v0.2D, v31.2D, #(64-2) // ............................................*................................................................................................................................................. // @slothy:interleaving_class=1 bic x25, x17, x2, ror #5 // ............................................*................................................................................................................................................. // @slothy:interleaving_class=0 sri v0.2D, v31.2D, #(2) // .............................................*................................................................................................................................................ // @slothy:interleaving_class=1 @@ -1011,7 +1011,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) shl v1.2D, v31.2D, #(64-9) // ....................................................................*......................................................................................................................... // @slothy:interleaving_class=1 eor x5, x20, x11, ror #41 // .....................................................................*........................................................................................................................ // @slothy:interleaving_class=0 sri v1.2D, v31.2D, #(9) // .....................................................................*........................................................................................................................ // @slothy:interleaving_class=1 - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................................................*....................................................................................................................... // @slothy:interleaving_class=0 + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................................................*....................................................................................................................... // @slothy:interleaving_class=0 eor x20, x17, x4, ror #21 // ......................................................................*....................................................................................................................... // @slothy:interleaving_class=0 eor v31.16B, v16.16B, v25.16B // .......................................................................*...................................................................................................................... // @slothy:interleaving_class=1 eor x17, x24, x9, ror #47 // .......................................................................*...................................................................................................................... // @slothy:interleaving_class=0 @@ -1019,7 +1019,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) mov x24, #1 // ........................................................................*..................................................................................................................... // @slothy:interleaving_class=0 sri v8.2D, v31.2D, #(19) // .........................................................................*.................................................................................................................... // @slothy:interleaving_class=1 bic x9, x0, x16, ror #9 // .........................................................................*.................................................................................................................... // @slothy:interleaving_class=0 - str x24, [sp, #STACK_OFFSET_COUNT] // ..........................................................................*................................................................................................................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ..........................................................................*................................................................................................................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 eor v31.16B, v7.16B, v29.16B // ..........................................................................*................................................................................................................... // @slothy:interleaving_class=1 bic x24, x29, x1, ror #44 // ...........................................................................*.................................................................................................................. // @slothy:interleaving_class=0 shl v16.2D, v31.2D, #(64-58) // ...........................................................................*.................................................................................................................. // @slothy:interleaving_class=1 @@ -1091,7 +1091,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) sri v14.2D, v31.2D, #(46) // ............................................................................................................*................................................................................. // @slothy:interleaving_class=1 eor x28, x30, x28, ror #63 // .............................................................................................................*................................................................................ // @slothy:interleaving_class=0 eor v31.16B, v4.16B, v27.16B // .............................................................................................................*................................................................................ // @slothy:interleaving_class=1 - str x28, [sp, #STACK_LOC_0] // ..............................................................................................................*............................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // ..............................................................................................................*............................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor x29, x29, x17, ror #36 // ..............................................................................................................*............................................................................... // @slothy:interleaving_class=0 shl v20.2D, v31.2D, #(64-37) // ...............................................................................................................*.............................................................................. // @slothy:interleaving_class=1 eor x28, x1, x2, ror #61 // ...............................................................................................................*.............................................................................. // @slothy:interleaving_class=0 @@ -1185,10 +1185,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic v31.16B, v19.16B, v18.16B // ...........................................................................................................................................................*.................................. // @slothy:interleaving_class=1 bic x19, x25, x19, ror #57 // ............................................................................................................................................................*................................. // @slothy:interleaving_class=0 eor v17.16B, v31.16B, v17.16B // ............................................................................................................................................................*................................. // @slothy:interleaving_class=1 - ldr x25, [sp, #STACK_OFFSET_COUNT] // .............................................................................................................................................................*................................ // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .............................................................................................................................................................*................................ // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 eor x17, x10, x9, ror #47 // .............................................................................................................................................................*................................ // @slothy:interleaving_class=0 bic v31.16B, v20.16B, v19.16B // ..............................................................................................................................................................*............................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................................*............................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................................*............................... // @slothy:interleaving_class=0 eor v18.16B, v31.16B, v18.16B // ...............................................................................................................................................................*.............................. // @slothy:interleaving_class=1 eor x15, x20, x28, ror #27 // ...............................................................................................................................................................*.............................. // @slothy:interleaving_class=0 bic v31.16B, v21.16B, v20.16B // ................................................................................................................................................................*............................. // @slothy:interleaving_class=1 @@ -1204,12 +1204,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic v31.16B, v23.16B, v22.16B // .....................................................................................................................................................................*........................ // @slothy:interleaving_class=1 ldr x28, [x9, x25, LSL #3] // .....................................................................................................................................................................*........................ // @slothy:interleaving_class=0 eor v21.16B, v31.16B, v1.16B // ......................................................................................................................................................................*....................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_LOC_0] // ......................................................................................................................................................................*....................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ......................................................................................................................................................................*....................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 bic v31.16B, v24.16B, v23.16B // .......................................................................................................................................................................*...................... // @slothy:interleaving_class=1 add x25, x25, #1 // .......................................................................................................................................................................*...................... // @slothy:interleaving_class=0 - str x25, [sp, #STACK_OFFSET_COUNT] // ........................................................................................................................................................................*..................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ........................................................................................................................................................................*..................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 eor v22.16B, v31.16B, v22.16B // ........................................................................................................................................................................*..................... // @slothy:interleaving_class=1 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // .........................................................................................................................................................................*.................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // .........................................................................................................................................................................*.................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 bic v31.16B, v0.16B, v24.16B // .........................................................................................................................................................................*.................... // @slothy:interleaving_class=1 eor x25, x1, x27, ror #53 // ..........................................................................................................................................................................*................... // @slothy:interleaving_class=0 bic x27, x30, x26, ror #47 // ..........................................................................................................................................................................*................... // @slothy:interleaving_class=0 @@ -1248,9 +1248,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic x3, x0, x30, ror #5 // ...........................................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x23, x3, x26, ror #52 // ...........................................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x3, x29, x30, ror #24 // ............................................................................................................................................................................................*. // @slothy:interleaving_class=0 - ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ............................................................................................................................................................................................*. // @slothy:reads=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ............................................................................................................................................................................................*. // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 ld1r {v28.2D}, [x30], #8 // .............................................................................................................................................................................................* // @slothy:interleaving_class=1 - str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .............................................................................................................................................................................................* // @slothy:writes=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .............................................................................................................................................................................................* // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 eor v0.16B, v0.16B, v28.16B // ..............................................................................................................................................................................................* // @slothy:interleaving_class=1 // -------------------------------------------------------------------------------------- cycle (expected) --------------------------------------------------------------------------------------> @@ -1278,7 +1278,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x30, x30, x29, ror #63 // .................*............................................................................................................................................................................. // eor x22, x22, x30 // .................*............................................................................................................................................................................. // eor x23, x23, x30 // ..................*............................................................................................................................................................................ - // str x23, [sp, #STACK_LOC_0] // ...................*........................................................................................................................................................................... + // str x23, [sp, #MLK_STACK_LOC_0] // ...................*........................................................................................................................................................................... // eor x23, x14, x15 // ....................*.......................................................................................................................................................................... // eor x14, x14, x0 // .....................*......................................................................................................................................................................... // eor x23, x23, x11 // ......................*........................................................................................................................................................................ @@ -1306,7 +1306,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x12, x3, x27 // .........................................*..................................................................................................................................................... // bic x3, x13, x17, ror #19 // ..........................................*.................................................................................................................................................... // eor x5, x5, x27 // ...........................................*................................................................................................................................................... - // ldr x27, [sp, #STACK_LOC_0] // ...........................................*................................................................................................................................................... + // ldr x27, [sp, #MLK_STACK_LOC_0] // ...........................................*................................................................................................................................................... // bic x25, x17, x2, ror #5 // ............................................*.................................................................................................................................................. // eor x9, x9, x29 // .............................................*................................................................................................................................................. // eor x23, x25, x5, ror #52 // ..............................................*................................................................................................................................................ @@ -1336,12 +1336,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x12, x15, x12, ror #58 // ...................................................................*........................................................................................................................... // eor x15, x5, x27, ror #27 // ....................................................................*.......................................................................................................................... // eor x5, x20, x11, ror #41 // .....................................................................*......................................................................................................................... - // ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................................................*........................................................................................................................ + // ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................................................*........................................................................................................................ // eor x20, x17, x4, ror #21 // ......................................................................*........................................................................................................................ // eor x17, x24, x9, ror #47 // .......................................................................*....................................................................................................................... // mov x24, #1 // ........................................................................*...................................................................................................................... // bic x9, x0, x16, ror #9 // .........................................................................*..................................................................................................................... - // str x24, [sp, #STACK_OFFSET_COUNT] // ..........................................................................*.................................................................................................................... + // str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ..........................................................................*.................................................................................................................... // bic x24, x29, x1, ror #44 // ...........................................................................*................................................................................................................... // bic x27, x1, x21, ror #50 // ............................................................................*.................................................................................................................. // bic x4, x26, x29, ror #63 // .............................................................................*................................................................................................................. @@ -1382,7 +1382,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // ror x26, x26, #58 // ...........................................................................................................*................................................................................... // eor x16, x30, x16 // ............................................................................................................*.................................................................................. // eor x28, x30, x28, ror #63 // .............................................................................................................*................................................................................. - // str x28, [sp, #STACK_LOC_0] // ..............................................................................................................*................................................................................ + // str x28, [sp, #MLK_STACK_LOC_0] // ..............................................................................................................*................................................................................ // eor x29, x29, x17, ror #36 // ..............................................................................................................*................................................................................ // eor x28, x1, x2, ror #61 // ...............................................................................................................*............................................................................... // eor x19, x30, x19, ror #37 // ................................................................................................................*.............................................................................. @@ -1436,9 +1436,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // ..........................................................................................................................................................*.................................... // eor x21, x17, x25, ror #30 // ...........................................................................................................................................................*................................... // bic x19, x25, x19, ror #57 // ............................................................................................................................................................*.................................. - // ldr x25, [sp, #STACK_OFFSET_COUNT] // .............................................................................................................................................................*................................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .............................................................................................................................................................*................................. // eor x17, x10, x9, ror #47 // .............................................................................................................................................................*................................. - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................................*................................ + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................................*................................ // eor x15, x20, x28, ror #27 // ...............................................................................................................................................................*............................... // bic x20, x4, x28, ror #2 // ................................................................................................................................................................*.............................. // eor x10, x20, x1, ror #50 // .................................................................................................................................................................*............................. @@ -1447,10 +1447,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // ....................................................................................................................................................................*.......................... // bic x1, x1, x11, ror #57 // ....................................................................................................................................................................*.......................... // ldr x28, [x9, x25, LSL #3] // .....................................................................................................................................................................*......................... - // ldr x9, [sp, #STACK_LOC_0] // ......................................................................................................................................................................*........................ + // ldr x9, [sp, #MLK_STACK_LOC_0] // ......................................................................................................................................................................*........................ // add x25, x25, #1 // .......................................................................................................................................................................*....................... - // str x25, [sp, #STACK_OFFSET_COUNT] // ........................................................................................................................................................................*...................... - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // .........................................................................................................................................................................*..................... + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ........................................................................................................................................................................*...................... + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // .........................................................................................................................................................................*..................... // eor x25, x1, x27, ror #53 // ..........................................................................................................................................................................*.................... // bic x27, x30, x26, ror #47 // ..........................................................................................................................................................................*.................... // eor x1, x5, x28 // ...........................................................................................................................................................................*................... @@ -1583,9 +1583,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor v31.16b, v6.16b, v25.16b // ...........................................................................................................................*................................................................... // shl v27.2d, v31.2d, #(64-20) // .............................................................................................................................*................................................................. // sri v27.2d, v31.2d, #(20) // ..............................................................................................................................*................................................................ - // ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ............................................................................................................................................................................................*.. + // ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ............................................................................................................................................................................................*.. // ld1r {v28.2d}, [x30], #8 // .............................................................................................................................................................................................*. - // str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .............................................................................................................................................................................................*. + // str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .............................................................................................................................................................................................*. // bic v31.16b, v7.16b, v11.16b // ...............................................................................................................................*............................................................... // eor v5.16b, v31.16b, v10.16b // ................................................................................................................................*.............................................................. // bic v31.16b, v8.16b, v7.16b // .................................................................................................................................*............................................................. @@ -1679,7 +1679,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) eor x16, x30, x16 // ...............*................................................................................................................................................................................. // @slothy:interleaving_class=0 eor v27.16B, v27.16B, v13.16B // ...............*................................................................................................................................................................................. // @slothy:interleaving_class=1 eor x28, x30, x28, ror #63 // ................*................................................................................................................................................................................ // @slothy:interleaving_class=0 - str x28, [sp, #STACK_LOC_0] // ................*................................................................................................................................................................................ // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // ................*................................................................................................................................................................................ // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor v27.16B, v27.16B, v18.16B // .................*............................................................................................................................................................................... // @slothy:interleaving_class=1 eor x29, x29, x17, ror #36 // .................*............................................................................................................................................................................... // @slothy:interleaving_class=0 eor v27.16B, v27.16B, v23.16B // ..................*.............................................................................................................................................................................. // @slothy:interleaving_class=1 @@ -1772,11 +1772,11 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) eor x21, x17, x25, ror #30 // .............................................................*................................................................................................................................... // @slothy:interleaving_class=0 shl v23.2D, v31.2D, #(64-23) // ..............................................................*.................................................................................................................................. // @slothy:interleaving_class=1 bic x19, x25, x19, ror #57 // ..............................................................*.................................................................................................................................. // @slothy:interleaving_class=0 - ldr x25, [sp, #STACK_OFFSET_COUNT] // ...............................................................*................................................................................................................................. // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...............................................................*................................................................................................................................. // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 sri v23.2D, v31.2D, #(23) // ...............................................................*................................................................................................................................. // @slothy:interleaving_class=1 eor x17, x10, x9, ror #47 // ................................................................*................................................................................................................................ // @slothy:interleaving_class=0 eor v31.16B, v1.16B, v25.16B // ................................................................*................................................................................................................................ // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .................................................................*............................................................................................................................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .................................................................*............................................................................................................................... // @slothy:interleaving_class=0 eor x15, x20, x28, ror #27 // .................................................................*............................................................................................................................... // @slothy:interleaving_class=0 shl v15.2D, v31.2D, #(64-63) // ..................................................................*.............................................................................................................................. // @slothy:interleaving_class=1 bic x20, x4, x28, ror #2 // ..................................................................*.............................................................................................................................. // @slothy:interleaving_class=0 @@ -1791,12 +1791,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic x1, x1, x11, ror #57 // .......................................................................*......................................................................................................................... // @slothy:interleaving_class=0 eor v31.16B, v16.16B, v25.16B // .......................................................................*......................................................................................................................... // @slothy:interleaving_class=1 ldr x28, [x9, x25, LSL #3] // ........................................................................*........................................................................................................................ // @slothy:interleaving_class=0 - ldr x9, [sp, #STACK_LOC_0] // ........................................................................*........................................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ........................................................................*........................................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 shl v8.2D, v31.2D, #(64-19) // .........................................................................*....................................................................................................................... // @slothy:interleaving_class=1 add x25, x25, #1 // .........................................................................*....................................................................................................................... // @slothy:interleaving_class=0 sri v8.2D, v31.2D, #(19) // ..........................................................................*...................................................................................................................... // @slothy:interleaving_class=1 - str x25, [sp, #STACK_OFFSET_COUNT] // ..........................................................................*...................................................................................................................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ...........................................................................*..................................................................................................................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ..........................................................................*...................................................................................................................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ...........................................................................*..................................................................................................................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 eor v31.16B, v7.16B, v29.16B // ...........................................................................*..................................................................................................................... // @slothy:interleaving_class=1 eor x25, x1, x27, ror #53 // ............................................................................*.................................................................................................................... // @slothy:interleaving_class=0 shl v16.2D, v31.2D, #(64-58) // ............................................................................*.................................................................................................................... // @slothy:interleaving_class=1 @@ -1872,7 +1872,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) eor x16, x30, x16 // ...............................................................................................................*................................................................................. // @slothy:interleaving_class=0 shl v20.2D, v31.2D, #(64-37) // ................................................................................................................*................................................................................ // @slothy:interleaving_class=1 eor x28, x30, x28, ror #63 // ................................................................................................................*................................................................................ // @slothy:interleaving_class=0 - str x28, [sp, #STACK_LOC_0] // .................................................................................................................*............................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // .................................................................................................................*............................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 sri v20.2D, v31.2D, #(37) // .................................................................................................................*............................................................................... // @slothy:interleaving_class=1 eor x29, x29, x17, ror #36 // ..................................................................................................................*.............................................................................. // @slothy:interleaving_class=0 eor v31.16B, v24.16B, v27.16B // ..................................................................................................................*.............................................................................. // @slothy:interleaving_class=1 @@ -1965,11 +1965,11 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) eor x21, x17, x25, ror #30 // ..............................................................................................................................................................*.................................. // @slothy:interleaving_class=0 eor v17.16B, v31.16B, v17.16B // ..............................................................................................................................................................*.................................. // @slothy:interleaving_class=1 bic x19, x25, x19, ror #57 // ...............................................................................................................................................................*................................. // @slothy:interleaving_class=0 - ldr x25, [sp, #STACK_OFFSET_COUNT] // ...............................................................................................................................................................*................................. // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...............................................................................................................................................................*................................. // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 bic v31.16B, v20.16B, v19.16B // ................................................................................................................................................................*................................ // @slothy:interleaving_class=1 eor x17, x10, x9, ror #47 // ................................................................................................................................................................*................................ // @slothy:interleaving_class=0 eor v18.16B, v31.16B, v18.16B // .................................................................................................................................................................*............................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .................................................................................................................................................................*............................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .................................................................................................................................................................*............................... // @slothy:interleaving_class=0 eor x15, x20, x28, ror #27 // ..................................................................................................................................................................*.............................. // @slothy:interleaving_class=0 bic v31.16B, v21.16B, v20.16B // ..................................................................................................................................................................*.............................. // @slothy:interleaving_class=1 bic x20, x4, x28, ror #2 // ...................................................................................................................................................................*............................. // @slothy:interleaving_class=0 @@ -1984,12 +1984,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic x1, x1, x11, ror #57 // .......................................................................................................................................................................*......................... // @slothy:interleaving_class=0 eor v21.16B, v31.16B, v1.16B // ........................................................................................................................................................................*........................ // @slothy:interleaving_class=1 ldr x28, [x9, x25, LSL #3] // ........................................................................................................................................................................*........................ // @slothy:interleaving_class=0 - ldr x9, [sp, #STACK_LOC_0] // .........................................................................................................................................................................*....................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // .........................................................................................................................................................................*....................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 bic v31.16B, v24.16B, v23.16B // .........................................................................................................................................................................*....................... // @slothy:interleaving_class=1 add x25, x25, #1 // ..........................................................................................................................................................................*...................... // @slothy:interleaving_class=0 eor v22.16B, v31.16B, v22.16B // ..........................................................................................................................................................................*...................... // @slothy:interleaving_class=1 - str x25, [sp, #STACK_OFFSET_COUNT] // ...........................................................................................................................................................................*..................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ...........................................................................................................................................................................*..................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................................................................................................................................................*..................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ...........................................................................................................................................................................*..................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 bic v31.16B, v0.16B, v24.16B // ............................................................................................................................................................................*.................... // @slothy:interleaving_class=1 eor x25, x1, x27, ror #53 // ............................................................................................................................................................................*.................... // @slothy:interleaving_class=0 eor v23.16B, v31.16B, v23.16B // .............................................................................................................................................................................*................... // @slothy:interleaving_class=1 @@ -2028,9 +2028,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) bic x3, x0, x30, ror #5 // .............................................................................................................................................................................................*... // @slothy:interleaving_class=0 eor x23, x3, x26, ror #52 // ..............................................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x3, x29, x30, ror #24 // ..............................................................................................................................................................................................*.. // @slothy:interleaving_class=0 - ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ...............................................................................................................................................................................................*. // @slothy:reads=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ...............................................................................................................................................................................................*. // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 ld1r {v28.2D}, [x30], #8 // ...............................................................................................................................................................................................*. // @slothy:interleaving_class=1 - str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ................................................................................................................................................................................................* // @slothy:writes=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ................................................................................................................................................................................................* // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 eor v0.16B, v0.16B, v28.16B // ................................................................................................................................................................................................* // @slothy:interleaving_class=1 // --------------------------------------------------------------------------------------- cycle (expected) ---------------------------------------------------------------------------------------> @@ -2055,7 +2055,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // ror x26, x26, #58 // ..............*.................................................................................................................................................................................. // eor x16, x30, x16 // ...............*................................................................................................................................................................................. // eor x28, x30, x28, ror #63 // ................*................................................................................................................................................................................ - // str x28, [sp, #STACK_LOC_0] // ................*................................................................................................................................................................................ + // str x28, [sp, #MLK_STACK_LOC_0] // ................*................................................................................................................................................................................ // eor x29, x29, x17, ror #36 // .................*............................................................................................................................................................................... // eor x28, x1, x2, ror #61 // ..................*.............................................................................................................................................................................. // eor x19, x30, x19, ror #37 // ...................*............................................................................................................................................................................. @@ -2109,9 +2109,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // ............................................................*.................................................................................................................................... // eor x21, x17, x25, ror #30 // .............................................................*................................................................................................................................... // bic x19, x25, x19, ror #57 // ..............................................................*.................................................................................................................................. - // ldr x25, [sp, #STACK_OFFSET_COUNT] // ...............................................................*................................................................................................................................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...............................................................*................................................................................................................................. // eor x17, x10, x9, ror #47 // ................................................................*................................................................................................................................ - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .................................................................*............................................................................................................................... + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .................................................................*............................................................................................................................... // eor x15, x20, x28, ror #27 // .................................................................*............................................................................................................................... // bic x20, x4, x28, ror #2 // ..................................................................*.............................................................................................................................. // eor x10, x20, x1, ror #50 // ...................................................................*............................................................................................................................. @@ -2120,10 +2120,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // ......................................................................*.......................................................................................................................... // bic x1, x1, x11, ror #57 // .......................................................................*......................................................................................................................... // ldr x28, [x9, x25, LSL #3] // ........................................................................*........................................................................................................................ - // ldr x9, [sp, #STACK_LOC_0] // ........................................................................*........................................................................................................................ + // ldr x9, [sp, #MLK_STACK_LOC_0] // ........................................................................*........................................................................................................................ // add x25, x25, #1 // .........................................................................*....................................................................................................................... - // str x25, [sp, #STACK_OFFSET_COUNT] // ..........................................................................*...................................................................................................................... - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // ...........................................................................*..................................................................................................................... + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ..........................................................................*...................................................................................................................... + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ...........................................................................*..................................................................................................................... // eor x25, x1, x27, ror #53 // ............................................................................*.................................................................................................................... // bic x27, x30, x26, ror #47 // .............................................................................*................................................................................................................... // eor x1, x5, x28 // ..............................................................................*.................................................................................................................. @@ -2167,7 +2167,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // ror x26, x26, #58 // ..............................................................................................................*.................................................................................. // eor x16, x30, x16 // ...............................................................................................................*................................................................................. // eor x28, x30, x28, ror #63 // ................................................................................................................*................................................................................ - // str x28, [sp, #STACK_LOC_0] // .................................................................................................................*............................................................................... + // str x28, [sp, #MLK_STACK_LOC_0] // .................................................................................................................*............................................................................... // eor x29, x29, x17, ror #36 // ..................................................................................................................*.............................................................................. // eor x28, x1, x2, ror #61 // ...................................................................................................................*............................................................................. // eor x19, x30, x19, ror #37 // ...................................................................................................................*............................................................................. @@ -2221,9 +2221,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // .............................................................................................................................................................*................................... // eor x21, x17, x25, ror #30 // ..............................................................................................................................................................*.................................. // bic x19, x25, x19, ror #57 // ...............................................................................................................................................................*................................. - // ldr x25, [sp, #STACK_OFFSET_COUNT] // ...............................................................................................................................................................*................................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...............................................................................................................................................................*................................. // eor x17, x10, x9, ror #47 // ................................................................................................................................................................*................................ - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .................................................................................................................................................................*............................... + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .................................................................................................................................................................*............................... // eor x15, x20, x28, ror #27 // ..................................................................................................................................................................*.............................. // bic x20, x4, x28, ror #2 // ...................................................................................................................................................................*............................. // eor x10, x20, x1, ror #50 // ....................................................................................................................................................................*............................ @@ -2232,10 +2232,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // ......................................................................................................................................................................*.......................... // bic x1, x1, x11, ror #57 // .......................................................................................................................................................................*......................... // ldr x28, [x9, x25, LSL #3] // ........................................................................................................................................................................*........................ - // ldr x9, [sp, #STACK_LOC_0] // .........................................................................................................................................................................*....................... + // ldr x9, [sp, #MLK_STACK_LOC_0] // .........................................................................................................................................................................*....................... // add x25, x25, #1 // ..........................................................................................................................................................................*...................... - // str x25, [sp, #STACK_OFFSET_COUNT] // ...........................................................................................................................................................................*..................... - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // ...........................................................................................................................................................................*..................... + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................................................................................................................................................*..................... + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ...........................................................................................................................................................................*..................... // eor x25, x1, x27, ror #53 // ............................................................................................................................................................................*.................... // bic x27, x30, x26, ror #47 // .............................................................................................................................................................................*................... // eor x1, x5, x28 // ..............................................................................................................................................................................*.................. @@ -2368,9 +2368,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) // eor v31.16b, v6.16b, v25.16b // .............................................................................................................................*................................................................... // shl v27.2d, v31.2d, #(64-20) // ..............................................................................................................................*.................................................................. // sri v27.2d, v31.2d, #(20) // ...............................................................................................................................*................................................................. - // ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ...............................................................................................................................................................................................*. + // ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ...............................................................................................................................................................................................*. // ld1r {v28.2d}, [x30], #8 // ...............................................................................................................................................................................................*. - // str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ................................................................................................................................................................................................* + // str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ................................................................................................................................................................................................* // bic v31.16b, v7.16b, v11.16b // .................................................................................................................................*............................................................... // eor v5.16b, v31.16b, v10.16b // ..................................................................................................................................*.............................................................. // bic v31.16b, v8.16b, v7.16b // ...................................................................................................................................*............................................................. @@ -2429,22 +2429,22 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) final_scalar_rotate // Read outer loop flag: We repeat the above twice - ldr outer, [sp, #STACK_OFFSET_OUTER] // @slothy:reads=STACK_OFFSET_OUTER + ldr outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:reads=MLK_STACK_OFFSET_OUTER cmp outer, #1 beq keccak_f1600_x4_v8a_scalar_hybrid_done // Update outer loop flag mov outer, #1 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 0 // Store first scalar data load_input_scalar 1 // Load second scalar input b keccak_f1600_x4_v8a_scalar_hybrid_initial keccak_f1600_x4_v8a_scalar_hybrid_done: - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 1 store_input_vector @@ -2561,19 +2561,19 @@ keccak_f1600_x4_v8a_scalar_hybrid_done: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef KECCAK_F1600_ROUNDS -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_BASE_VREGS -#undef STACK_OFFSET_LOCS -#undef STACK_OFFSET_INPUT -#undef STACK_OFFSET_CONST_SCALAR -#undef STACK_OFFSET_CONST_VECTOR -#undef STACK_OFFSET_COUNT -#undef STACK_OFFSET_OUTER -#undef STACK_LOC_0 -#undef STACK_LOC_1 +#undef MLK_KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_BASE_VREGS +#undef MLK_STACK_OFFSET_LOCS +#undef MLK_STACK_OFFSET_INPUT +#undef MLK_STACK_OFFSET_CONST_SCALAR +#undef MLK_STACK_OFFSET_CONST_VECTOR +#undef MLK_STACK_OFFSET_COUNT +#undef MLK_STACK_OFFSET_OUTER +#undef MLK_STACK_LOC_0 +#undef MLK_STACK_LOC_1 /* simpasm: footer-start */ #endif /* MLK_FIPS202_AARCH64_NEED_X4_V8A_SCALAR_HYBRID && \ diff --git a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S index 3b6c4298b4..b0bf6f7e30 100644 --- a/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S +++ b/dev/fips202/aarch64/src/keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S @@ -37,7 +37,7 @@ #if defined(__ARM_FEATURE_SHA3) /* simpasm: header-end */ -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 /****************** REGISTER ALLOCATIONS *******************/ @@ -294,60 +294,60 @@ sub input_addr, input_addr, #((2 + \idx)*25*8) .endm -#define STACK_LOCS 2 +#define MLK_STACK_LOCS 2 -#define STACK_SIZE (16*6 + 8*8 + 6*8 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (6*8) -#define STACK_BASE_VREGS (6*8 + 16*6) -#define STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) +#define MLK_STACK_SIZE (16*6 + 8*8 + 6*8 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (6*8) +#define MLK_STACK_BASE_VREGS (6*8 + 16*6) +#define MLK_STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) -#define STACK_OFFSET_INPUT (0*8) -#define STACK_OFFSET_CONST_SCALAR (1*8) -#define STACK_OFFSET_CONST_VECTOR (2*8) -#define STACK_OFFSET_COUNT (3*8) -#define STACK_OFFSET_OUTER (4*8) +#define MLK_STACK_OFFSET_INPUT (0*8) +#define MLK_STACK_OFFSET_CONST_SCALAR (1*8) +#define MLK_STACK_OFFSET_CONST_VECTOR (2*8) +#define MLK_STACK_OFFSET_COUNT (3*8) +#define MLK_STACK_OFFSET_OUTER (4*8) -#define STACK_LOC_0 ((STACK_OFFSET_LOCS) + 0*8) -#define STACK_LOC_1 ((STACK_OFFSET_LOCS) + 1*8) +#define MLK_STACK_LOC_0 ((MLK_STACK_OFFSET_LOCS) + 0*8) +#define MLK_STACK_LOC_1 ((MLK_STACK_OFFSET_LOCS) + 1*8) .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro save_vregs - stp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - stp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - stp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - stp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + stp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + stp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + stp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + stp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro restore_vregs - ldp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - ldp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - ldp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - ldp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + ldp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + ldp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + ldp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + ldp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro eor5 dst, src0, src1, src2, src3, src4 @@ -396,7 +396,7 @@ eor x30, x30, x29, ror #63 // .........*............................................ eor x22, x22, x30 // ..........*........................................... eor x23, x23, x30 // ..........*........................................... - str x23, [sp, #STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 + str x23, [sp, #MLK_STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 eor x23, x14, x15 // ...........*.......................................... eor x14, x14, x0 // ............*......................................... eor x23, x23, x11 // ............*......................................... @@ -424,7 +424,7 @@ eor x12, x3, x27 // ........................*............................. bic x3, x13, x17, ror #19 // ........................*............................. eor x5, x5, x27 // .........................*............................ - ldr x27, [sp, #STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 + ldr x27, [sp, #MLK_STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 bic x25, x17, x2, ror #5 // ..........................*........................... eor x9, x9, x29 // ..........................*........................... eor x23, x25, x5, ror #52 // ...........................*.......................... @@ -454,12 +454,12 @@ eor x12, x15, x12, ror #58 // .......................................*.............. eor x15, x5, x27, ror #27 // .......................................*.............. eor x5, x20, x11, ror #41 // ........................................*............. - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ........................................*............. + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ........................................*............. eor x20, x17, x4, ror #21 // .........................................*............ eor x17, x24, x9, ror #47 // .........................................*............ mov x24, #1 // ..........................................*........... bic x9, x0, x16, ror #9 // ..........................................*........... - str x24, [sp, #STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=STACK_OFFSET_COUNT + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=MLK_STACK_OFFSET_COUNT bic x24, x29, x1, ror #44 // ...........................................*.......... bic x27, x1, x21, ror #50 // ............................................*......... bic x4, x26, x29, ror #63 // ............................................*......... @@ -520,10 +520,10 @@ // eor X, sAga, X // eor X, sAge, X - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ldr X, [X] // mov X, #1 - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -595,9 +595,9 @@ xar_m0 vAsu_, vAse, E1, 62 xar_m1 vAme_, vAga, E0, 28 xar_m0 vAbe_, vAge, E1, 20 - ldr tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:reads=STACK_OFFSET_CONST_VECTOR + ldr tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR ld1r {v28.2d}, [tmp], #8 - str tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR + str tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR bcax_m1 vAga, vAga_, vAgi_, vAge_ bcax_m0 vAge, vAge_, vAgo_, vAgi_ bcax_m1 vAgi, vAgi_, vAgu_, vAgo_ @@ -646,7 +646,7 @@ ror x26, x26, #58 // ........*................................................ eor x16, x30, x16 // .........*............................................... eor x28, x30, x28, ror #63 // .........*............................................... - str x28, [sp, #STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 + str x28, [sp, #MLK_STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 eor x29, x29, x17, ror #36 // ..........*.............................................. eor x28, x1, x2, ror #61 // ...........*............................................. eor x19, x30, x19, ror #37 // ...........*............................................. @@ -700,9 +700,9 @@ eor x16, x21, x19, ror #43 // ....................................*.................... eor x21, x17, x25, ror #30 // ....................................*.................... bic x19, x25, x19, ror #57 // .....................................*................... - ldr x25, [sp, #STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=STACK_OFFSET_COUNT + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=MLK_STACK_OFFSET_COUNT eor x17, x10, x9, ror #47 // ......................................*.................. - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................*.................. + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................*.................. eor x15, x20, x28, ror #27 // .......................................*................. bic x20, x4, x28, ror #2 // .......................................*................. eor x10, x20, x1, ror #50 // ........................................*................ @@ -711,10 +711,10 @@ bic x4, x28, x1, ror #48 // .........................................*............... bic x1, x1, x11, ror #57 // ..........................................*.............. ldr x28, [x9, x25, LSL #3] // ..........................................*.............. - ldr x9, [sp, #STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 add x25, x25, #1 // ...........................................*............. - str x25, [sp, #STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=STACK_OFFSET_COUNT - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=MLK_STACK_OFFSET_COUNT + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output eor x25, x1, x27, ror #53 // .............................................*........... bic x27, x30, x26, ror #47 // .............................................*........... eor x1, x5, x28 // ..............................................*.......... @@ -800,12 +800,12 @@ // eor X, X, sAga, ror #61 // eor X, X, sAge, ror #19 - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] - // ldr X, [sp, #STACK_OFFSET_COUNT] // @slothy:reads=STACK_OFFSET_COUNT + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:reads=MLK_STACK_OFFSET_COUNT // ldr X, [X, X, LSL #3] // add X, X, #1 - // cmp X, #(KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // cmp X, #(MLK_KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -873,10 +873,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) mov const_addr, input_rc mov outer, #0 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER - str const_addr, [sp, #STACK_OFFSET_CONST_SCALAR] // @slothy:writes=STACK_OFFSET_CONST_SCALAR - str const_addr, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR - str input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:writes=STACK_OFFSET_INPUT + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // @slothy:writes=MLK_STACK_OFFSET_CONST_SCALAR + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR + str input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:writes=MLK_STACK_OFFSET_INPUT load_input_vector // Vector input load_input_scalar 0 // First scalar input @@ -923,7 +923,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x22, x22, x30 // ...............*....................................................................................................................................................... // @slothy:interleaving_class=0 eor v27.16B, v27.16B, v23.16B // ................*...................................................................................................................................................... // @slothy:interleaving_class=1 eor x23, x23, x30 // ................*...................................................................................................................................................... // @slothy:interleaving_class=0 - str x23, [sp, #STACK_LOC_0] // .................*..................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x23, [sp, #MLK_STACK_LOC_0] // .................*..................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor3 v26.16B, v4.16B, v9.16B, v14.16B // .................*..................................................................................................................................................... // @slothy:interleaving_class=1 eor x23, x14, x15 // ..................*.................................................................................................................................................... // @slothy:interleaving_class=0 eor x14, x14, x0 // ..................*.................................................................................................................................................... // @slothy:interleaving_class=0 @@ -966,7 +966,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x3, x13, x17, ror #19 // .....................................*................................................................................................................................. // @slothy:interleaving_class=0 eor x5, x5, x27 // .....................................*................................................................................................................................. // @slothy:interleaving_class=0 shl v0.2D, v31.2D, #(64-2) // ......................................*................................................................................................................................ // @slothy:interleaving_class=1 - ldr x27, [sp, #STACK_LOC_0] // ......................................*................................................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x27, [sp, #MLK_STACK_LOC_0] // ......................................*................................................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 bic x25, x17, x2, ror #5 // .......................................*............................................................................................................................... // @slothy:interleaving_class=0 sri v0.2D, v31.2D, #(2) // .......................................*............................................................................................................................... // @slothy:interleaving_class=1 eor x9, x9, x29 // ........................................*.............................................................................................................................. // @slothy:interleaving_class=0 @@ -1012,14 +1012,14 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor v31.16B, v16.16B, v25.16B // ............................................................*.......................................................................................................... // @slothy:interleaving_class=1 eor x5, x20, x11, ror #41 // ............................................................*.......................................................................................................... // @slothy:interleaving_class=0 shl v8.2D, v31.2D, #(64-19) // .............................................................*......................................................................................................... // @slothy:interleaving_class=1 - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // .............................................................*......................................................................................................... // @slothy:interleaving_class=0 + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .............................................................*......................................................................................................... // @slothy:interleaving_class=0 eor x20, x17, x4, ror #21 // ..............................................................*........................................................................................................ // @slothy:interleaving_class=0 sri v8.2D, v31.2D, #(19) // ..............................................................*........................................................................................................ // @slothy:interleaving_class=1 eor x17, x24, x9, ror #47 // ...............................................................*....................................................................................................... // @slothy:interleaving_class=0 mov x24, #1 // ...............................................................*....................................................................................................... // @slothy:interleaving_class=0 xar v16.2D, v7.2D, v29.2D, #58 // ................................................................*...................................................................................................... // @slothy:interleaving_class=1 bic x9, x0, x16, ror #9 // ................................................................*...................................................................................................... // @slothy:interleaving_class=0 - str x24, [sp, #STACK_OFFSET_COUNT] // .................................................................*..................................................................................................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // .................................................................*..................................................................................................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 eor v31.16B, v10.16B, v26.16B // .................................................................*..................................................................................................... // @slothy:interleaving_class=1 bic x24, x29, x1, ror #44 // ..................................................................*.................................................................................................... // @slothy:interleaving_class=0 bic x27, x1, x21, ror #50 // ..................................................................*.................................................................................................... // @slothy:interleaving_class=0 @@ -1082,7 +1082,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x16, x30, x16 // ...............................................................................................*....................................................................... // @slothy:interleaving_class=0 eor x28, x30, x28, ror #63 // ...............................................................................................*....................................................................... // @slothy:interleaving_class=0 shl v4.2D, v31.2D, #(64-50) // ................................................................................................*...................................................................... // @slothy:interleaving_class=1 - str x28, [sp, #STACK_LOC_0] // ................................................................................................*...................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // ................................................................................................*...................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor x29, x29, x17, ror #36 // .................................................................................................*..................................................................... // @slothy:interleaving_class=0 sri v4.2D, v31.2D, #(50) // .................................................................................................*..................................................................... // @slothy:interleaving_class=1 eor x28, x1, x2, ror #61 // ..................................................................................................*.................................................................... // @slothy:interleaving_class=0 @@ -1164,10 +1164,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x21, x17, x25, ror #30 // ........................................................................................................................................*.............................. // @slothy:interleaving_class=0 bic x19, x25, x19, ror #57 // ........................................................................................................................................*.............................. // @slothy:interleaving_class=0 eor v18.16B, v31.16B, v18.16B // .........................................................................................................................................*............................. // @slothy:interleaving_class=1 - ldr x25, [sp, #STACK_OFFSET_COUNT] // .........................................................................................................................................*............................. // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .........................................................................................................................................*............................. // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 bcax v19.16B, v19.16B, v21.16B, v20.16B // ..........................................................................................................................................*............................ // @slothy:interleaving_class=1 eor x17, x10, x9, ror #47 // ..........................................................................................................................................*............................ // @slothy:interleaving_class=0 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ...........................................................................................................................................*........................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ...........................................................................................................................................*........................... // @slothy:interleaving_class=0 bic v31.16B, v22.16B, v1.16B // ...........................................................................................................................................*........................... // @slothy:interleaving_class=1 eor x15, x20, x28, ror #27 // ............................................................................................................................................*.......................... // @slothy:interleaving_class=0 bic x20, x4, x28, ror #2 // ............................................................................................................................................*.......................... // @slothy:interleaving_class=0 @@ -1181,11 +1181,11 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x1, x1, x11, ror #57 // ................................................................................................................................................*...................... // @slothy:interleaving_class=0 ldr x28, [x9, x25, LSL #3] // .................................................................................................................................................*..................... // @slothy:interleaving_class=0 eor v22.16B, v31.16B, v22.16B // .................................................................................................................................................*..................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_LOC_0] // ..................................................................................................................................................*.................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ..................................................................................................................................................*.................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 add x25, x25, #1 // ..................................................................................................................................................*.................... // @slothy:interleaving_class=0 bcax v23.16B, v23.16B, v0.16B, v24.16B // ...................................................................................................................................................*................... // @slothy:interleaving_class=1 - str x25, [sp, #STACK_OFFSET_COUNT] // ...................................................................................................................................................*................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ....................................................................................................................................................*.................. // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...................................................................................................................................................*................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ....................................................................................................................................................*.................. // @slothy:ignore_useless_output // @slothy:interleaving_class=0 bic v31.16B, v1.16B, v0.16B // ....................................................................................................................................................*.................. // @slothy:interleaving_class=1 eor x25, x1, x27, ror #53 // .....................................................................................................................................................*................. // @slothy:interleaving_class=0 bic x27, x30, x26, ror #47 // .....................................................................................................................................................*................. // @slothy:interleaving_class=0 @@ -1219,9 +1219,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x3, x0, x30, ror #5 // ...................................................................................................................................................................*... // @slothy:interleaving_class=0 eor x23, x3, x26, ror #52 // ....................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x3, x29, x30, ror #24 // ....................................................................................................................................................................*.. // @slothy:interleaving_class=0 - ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .....................................................................................................................................................................*. // @slothy:reads=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .....................................................................................................................................................................*. // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 ld1r {v28.2D}, [x30], #8 // .....................................................................................................................................................................*. // @slothy:interleaving_class=1 - str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ......................................................................................................................................................................* // @slothy:writes=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ......................................................................................................................................................................* // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 eor v0.16B, v0.16B, v28.16B // ......................................................................................................................................................................* // @slothy:interleaving_class=1 // -------------------------------------------------------------------------- cycle (expected) --------------------------------------------------------------------------> @@ -1249,7 +1249,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x30, x30, x29, ror #63 // ..............*........................................................................................................................................................ // eor x22, x22, x30 // ...............*....................................................................................................................................................... // eor x23, x23, x30 // ................*...................................................................................................................................................... - // str x23, [sp, #STACK_LOC_0] // .................*..................................................................................................................................................... + // str x23, [sp, #MLK_STACK_LOC_0] // .................*..................................................................................................................................................... // eor x23, x14, x15 // ..................*.................................................................................................................................................... // eor x14, x14, x0 // ..................*.................................................................................................................................................... // eor x23, x23, x11 // ...................*................................................................................................................................................... @@ -1277,7 +1277,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x12, x3, x27 // ....................................*.................................................................................................................................. // bic x3, x13, x17, ror #19 // .....................................*................................................................................................................................. // eor x5, x5, x27 // .....................................*................................................................................................................................. - // ldr x27, [sp, #STACK_LOC_0] // ......................................*................................................................................................................................ + // ldr x27, [sp, #MLK_STACK_LOC_0] // ......................................*................................................................................................................................ // bic x25, x17, x2, ror #5 // .......................................*............................................................................................................................... // eor x9, x9, x29 // ........................................*.............................................................................................................................. // eor x23, x25, x5, ror #52 // ........................................*.............................................................................................................................. @@ -1307,12 +1307,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x12, x15, x12, ror #58 // ...........................................................*........................................................................................................... // eor x15, x5, x27, ror #27 // ...........................................................*........................................................................................................... // eor x5, x20, x11, ror #41 // ............................................................*.......................................................................................................... - // ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // .............................................................*......................................................................................................... + // ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .............................................................*......................................................................................................... // eor x20, x17, x4, ror #21 // ..............................................................*........................................................................................................ // eor x17, x24, x9, ror #47 // ...............................................................*....................................................................................................... // mov x24, #1 // ...............................................................*....................................................................................................... // bic x9, x0, x16, ror #9 // ................................................................*...................................................................................................... - // str x24, [sp, #STACK_OFFSET_COUNT] // .................................................................*..................................................................................................... + // str x24, [sp, #MLK_STACK_OFFSET_COUNT] // .................................................................*..................................................................................................... // bic x24, x29, x1, ror #44 // ..................................................................*.................................................................................................... // bic x27, x1, x21, ror #50 // ..................................................................*.................................................................................................... // bic x4, x26, x29, ror #63 // ...................................................................*................................................................................................... @@ -1353,7 +1353,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // ror x26, x26, #58 // ..............................................................................................*........................................................................ // eor x16, x30, x16 // ...............................................................................................*....................................................................... // eor x28, x30, x28, ror #63 // ...............................................................................................*....................................................................... - // str x28, [sp, #STACK_LOC_0] // ................................................................................................*...................................................................... + // str x28, [sp, #MLK_STACK_LOC_0] // ................................................................................................*...................................................................... // eor x29, x29, x17, ror #36 // .................................................................................................*..................................................................... // eor x28, x1, x2, ror #61 // ..................................................................................................*.................................................................... // eor x19, x30, x19, ror #37 // ..................................................................................................*.................................................................... @@ -1407,9 +1407,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // .......................................................................................................................................*............................... // eor x21, x17, x25, ror #30 // ........................................................................................................................................*.............................. // bic x19, x25, x19, ror #57 // ........................................................................................................................................*.............................. - // ldr x25, [sp, #STACK_OFFSET_COUNT] // .........................................................................................................................................*............................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .........................................................................................................................................*............................. // eor x17, x10, x9, ror #47 // ..........................................................................................................................................*............................ - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ...........................................................................................................................................*........................... + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ...........................................................................................................................................*........................... // eor x15, x20, x28, ror #27 // ............................................................................................................................................*.......................... // bic x20, x4, x28, ror #2 // ............................................................................................................................................*.......................... // eor x10, x20, x1, ror #50 // .............................................................................................................................................*......................... @@ -1418,10 +1418,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // ...............................................................................................................................................*....................... // bic x1, x1, x11, ror #57 // ................................................................................................................................................*...................... // ldr x28, [x9, x25, LSL #3] // .................................................................................................................................................*..................... - // ldr x9, [sp, #STACK_LOC_0] // ..................................................................................................................................................*.................... + // ldr x9, [sp, #MLK_STACK_LOC_0] // ..................................................................................................................................................*.................... // add x25, x25, #1 // ..................................................................................................................................................*.................... - // str x25, [sp, #STACK_OFFSET_COUNT] // ...................................................................................................................................................*................... - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // ....................................................................................................................................................*.................. + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ...................................................................................................................................................*................... + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ....................................................................................................................................................*.................. // eor x25, x1, x27, ror #53 // .....................................................................................................................................................*................. // bic x27, x30, x26, ror #47 // .....................................................................................................................................................*................. // eor x1, x5, x28 // ......................................................................................................................................................*................ @@ -1519,9 +1519,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // shl v21.2d, v31.2d, #(64-28) // ......................................................................................................*................................................................ // sri v21.2d, v31.2d, #(28) // .......................................................................................................*............................................................... // xar v27.2d, v6.2d, v25.2d, #20 // .........................................................................................................*............................................................. - // ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .....................................................................................................................................................................*. + // ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .....................................................................................................................................................................*. // ld1r {v28.2d}, [x30], #8 // .....................................................................................................................................................................*. - // str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ......................................................................................................................................................................* + // str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ......................................................................................................................................................................* // bic v31.16b, v7.16b, v11.16b // ..........................................................................................................*............................................................ // eor v5.16b, v31.16b, v10.16b // ...........................................................................................................*........................................................... // bcax v6.16b, v11.16b, v8.16b, v7.16b // .............................................................................................................*......................................................... @@ -1599,7 +1599,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x16, x30, x16 // .............*........................................................................................................................................................... // @slothy:interleaving_class=0 eor3 v27.16B, v3.16B, v8.16B, v13.16B // .............*........................................................................................................................................................... // @slothy:interleaving_class=1 eor x28, x30, x28, ror #63 // ..............*.......................................................................................................................................................... // @slothy:interleaving_class=0 - str x28, [sp, #STACK_LOC_0] // ..............*.......................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // ..............*.......................................................................................................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor v27.16B, v27.16B, v18.16B // ...............*......................................................................................................................................................... // @slothy:interleaving_class=1 eor x29, x29, x17, ror #36 // ...............*......................................................................................................................................................... // @slothy:interleaving_class=0 eor x28, x1, x2, ror #61 // ................*........................................................................................................................................................ // @slothy:interleaving_class=0 @@ -1681,10 +1681,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x21, x17, x25, ror #30 // ......................................................*.................................................................................................................. // @slothy:interleaving_class=0 bic x19, x25, x19, ror #57 // ......................................................*.................................................................................................................. // @slothy:interleaving_class=0 eor v31.16B, v1.16B, v25.16B // .......................................................*................................................................................................................. // @slothy:interleaving_class=1 - ldr x25, [sp, #STACK_OFFSET_COUNT] // .......................................................*................................................................................................................. // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .......................................................*................................................................................................................. // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 eor x17, x10, x9, ror #47 // ........................................................*................................................................................................................ // @slothy:interleaving_class=0 shl v15.2D, v31.2D, #(64-63) // ........................................................*................................................................................................................ // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .........................................................*............................................................................................................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .........................................................*............................................................................................................... // @slothy:interleaving_class=0 sri v15.2D, v31.2D, #(63) // .........................................................*............................................................................................................... // @slothy:interleaving_class=1 eor x15, x20, x28, ror #27 // ..........................................................*.............................................................................................................. // @slothy:interleaving_class=0 bic x20, x4, x28, ror #2 // ..........................................................*.............................................................................................................. // @slothy:interleaving_class=0 @@ -1698,11 +1698,11 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x1, x1, x11, ror #57 // ..............................................................*.......................................................................................................... // @slothy:interleaving_class=0 ldr x28, [x9, x25, LSL #3] // ...............................................................*......................................................................................................... // @slothy:interleaving_class=0 sri v8.2D, v31.2D, #(19) // ...............................................................*......................................................................................................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_LOC_0] // ................................................................*........................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ................................................................*........................................................................................................ // @slothy:reads=stack_0 // @slothy:interleaving_class=0 add x25, x25, #1 // ................................................................*........................................................................................................ // @slothy:interleaving_class=0 xar v16.2D, v7.2D, v29.2D, #58 // .................................................................*....................................................................................................... // @slothy:interleaving_class=1 - str x25, [sp, #STACK_OFFSET_COUNT] // .................................................................*....................................................................................................... // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ..................................................................*...................................................................................................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // .................................................................*....................................................................................................... // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ..................................................................*...................................................................................................... // @slothy:ignore_useless_output // @slothy:interleaving_class=0 eor v31.16B, v10.16B, v26.16B // ..................................................................*...................................................................................................... // @slothy:interleaving_class=1 eor x25, x1, x27, ror #53 // ...................................................................*..................................................................................................... // @slothy:interleaving_class=0 bic x27, x30, x26, ror #47 // ...................................................................*..................................................................................................... // @slothy:interleaving_class=0 @@ -1769,7 +1769,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x16, x30, x16 // ..................................................................................................*...................................................................... // @slothy:interleaving_class=0 eor x28, x30, x28, ror #63 // ..................................................................................................*...................................................................... // @slothy:interleaving_class=0 sri v4.2D, v31.2D, #(50) // ...................................................................................................*..................................................................... // @slothy:interleaving_class=1 - str x28, [sp, #STACK_LOC_0] // ...................................................................................................*..................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 + str x28, [sp, #MLK_STACK_LOC_0] // ...................................................................................................*..................................................................... // @slothy:writes=stack_0 // @slothy:interleaving_class=0 eor x29, x29, x17, ror #36 // ....................................................................................................*.................................................................... // @slothy:interleaving_class=0 xar v24.2D, v21.2D, v25.2D, #62 // ....................................................................................................*.................................................................... // @slothy:interleaving_class=1 eor x28, x1, x2, ror #61 // .....................................................................................................*................................................................... // @slothy:interleaving_class=0 @@ -1850,11 +1850,11 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) eor x21, x17, x25, ror #30 // ..........................................................................................................................................*.............................. // @slothy:interleaving_class=0 eor v18.16B, v31.16B, v18.16B // ...........................................................................................................................................*............................. // @slothy:interleaving_class=1 bic x19, x25, x19, ror #57 // ...........................................................................................................................................*............................. // @slothy:interleaving_class=0 - ldr x25, [sp, #STACK_OFFSET_COUNT] // ............................................................................................................................................*............................ // @slothy:reads=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................................................................................................................*............................ // @slothy:reads=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 bcax v19.16B, v19.16B, v21.16B, v20.16B // ............................................................................................................................................*............................ // @slothy:interleaving_class=1 eor x17, x10, x9, ror #47 // .............................................................................................................................................*........................... // @slothy:interleaving_class=0 bic v31.16B, v22.16B, v1.16B // .............................................................................................................................................*........................... // @slothy:interleaving_class=1 - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................*.......................... // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................*.......................... // @slothy:interleaving_class=0 eor x15, x20, x28, ror #27 // ..............................................................................................................................................*.......................... // @slothy:interleaving_class=0 eor v20.16B, v31.16B, v0.16B // ...............................................................................................................................................*......................... // @slothy:interleaving_class=1 bic x20, x4, x28, ror #2 // ...............................................................................................................................................*......................... // @slothy:interleaving_class=0 @@ -1867,12 +1867,12 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x1, x1, x11, ror #57 // ...................................................................................................................................................*..................... // @slothy:interleaving_class=0 eor v22.16B, v31.16B, v22.16B // ...................................................................................................................................................*..................... // @slothy:interleaving_class=1 ldr x28, [x9, x25, LSL #3] // ....................................................................................................................................................*.................... // @slothy:interleaving_class=0 - ldr x9, [sp, #STACK_LOC_0] // ....................................................................................................................................................*.................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ....................................................................................................................................................*.................... // @slothy:reads=stack_0 // @slothy:interleaving_class=0 bcax v23.16B, v23.16B, v0.16B, v24.16B // .....................................................................................................................................................*................... // @slothy:interleaving_class=1 add x25, x25, #1 // .....................................................................................................................................................*................... // @slothy:interleaving_class=0 - str x25, [sp, #STACK_OFFSET_COUNT] // ......................................................................................................................................................*.................. // @slothy:writes=STACK_OFFSET_COUNT // @slothy:interleaving_class=0 + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ......................................................................................................................................................*.................. // @slothy:writes=MLK_STACK_OFFSET_COUNT // @slothy:interleaving_class=0 bic v31.16B, v1.16B, v0.16B // ......................................................................................................................................................*.................. // @slothy:interleaving_class=1 - cmp x25, #(KECCAK_F1600_ROUNDS-1) // .......................................................................................................................................................*................. // @slothy:ignore_useless_output // @slothy:interleaving_class=0 + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // .......................................................................................................................................................*................. // @slothy:ignore_useless_output // @slothy:interleaving_class=0 eor x25, x1, x27, ror #53 // .......................................................................................................................................................*................. // @slothy:interleaving_class=0 eor v24.16B, v31.16B, v24.16B // ........................................................................................................................................................*................ // @slothy:interleaving_class=1 bic x27, x30, x26, ror #47 // ........................................................................................................................................................*................ // @slothy:interleaving_class=0 @@ -1905,9 +1905,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) bic x3, x0, x30, ror #5 // ......................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x23, x3, x26, ror #52 // ......................................................................................................................................................................*.. // @slothy:interleaving_class=0 eor x3, x29, x30, ror #24 // .......................................................................................................................................................................*. // @slothy:interleaving_class=0 - ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .......................................................................................................................................................................*. // @slothy:reads=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .......................................................................................................................................................................*. // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 ld1r {v28.2D}, [x30], #8 // ........................................................................................................................................................................* // @slothy:interleaving_class=1 - str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ........................................................................................................................................................................* // @slothy:writes=STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 + str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ........................................................................................................................................................................* // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR // @slothy:interleaving_class=1 eor v0.16B, v0.16B, v28.16B // .........................................................................................................................................................................* // @slothy:interleaving_class=1 // --------------------------------------------------------------------------- cycle (expected) ----------------------------------------------------------------------------> @@ -1932,7 +1932,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // ror x26, x26, #58 // ............*............................................................................................................................................................. // eor x16, x30, x16 // .............*............................................................................................................................................................ // eor x28, x30, x28, ror #63 // ..............*........................................................................................................................................................... - // str x28, [sp, #STACK_LOC_0] // ..............*........................................................................................................................................................... + // str x28, [sp, #MLK_STACK_LOC_0] // ..............*........................................................................................................................................................... // eor x29, x29, x17, ror #36 // ...............*.......................................................................................................................................................... // eor x28, x1, x2, ror #61 // ................*......................................................................................................................................................... // eor x19, x30, x19, ror #37 // .................*........................................................................................................................................................ @@ -1986,9 +1986,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // .....................................................*.................................................................................................................... // eor x21, x17, x25, ror #30 // ......................................................*................................................................................................................... // bic x19, x25, x19, ror #57 // ......................................................*................................................................................................................... - // ldr x25, [sp, #STACK_OFFSET_COUNT] // .......................................................*.................................................................................................................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .......................................................*.................................................................................................................. // eor x17, x10, x9, ror #47 // ........................................................*................................................................................................................. - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // .........................................................*................................................................................................................ + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // .........................................................*................................................................................................................ // eor x15, x20, x28, ror #27 // ..........................................................*............................................................................................................... // bic x20, x4, x28, ror #2 // ..........................................................*............................................................................................................... // eor x10, x20, x1, ror #50 // ...........................................................*.............................................................................................................. @@ -1997,10 +1997,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // .............................................................*............................................................................................................ // bic x1, x1, x11, ror #57 // ..............................................................*........................................................................................................... // ldr x28, [x9, x25, LSL #3] // ...............................................................*.......................................................................................................... - // ldr x9, [sp, #STACK_LOC_0] // ................................................................*......................................................................................................... + // ldr x9, [sp, #MLK_STACK_LOC_0] // ................................................................*......................................................................................................... // add x25, x25, #1 // ................................................................*......................................................................................................... - // str x25, [sp, #STACK_OFFSET_COUNT] // .................................................................*........................................................................................................ - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // ..................................................................*....................................................................................................... + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // .................................................................*........................................................................................................ + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ..................................................................*....................................................................................................... // eor x25, x1, x27, ror #53 // ...................................................................*...................................................................................................... // bic x27, x30, x26, ror #47 // ...................................................................*...................................................................................................... // eor x1, x5, x28 // ....................................................................*..................................................................................................... @@ -2044,7 +2044,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // ror x26, x26, #58 // .................................................................................................*........................................................................ // eor x16, x30, x16 // ..................................................................................................*....................................................................... // eor x28, x30, x28, ror #63 // ..................................................................................................*....................................................................... - // str x28, [sp, #STACK_LOC_0] // ...................................................................................................*...................................................................... + // str x28, [sp, #MLK_STACK_LOC_0] // ...................................................................................................*...................................................................... // eor x29, x29, x17, ror #36 // ....................................................................................................*..................................................................... // eor x28, x1, x2, ror #61 // .....................................................................................................*.................................................................... // eor x19, x30, x19, ror #37 // .....................................................................................................*.................................................................... @@ -2098,9 +2098,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // eor x16, x21, x19, ror #43 // ..........................................................................................................................................*............................... // eor x21, x17, x25, ror #30 // ..........................................................................................................................................*............................... // bic x19, x25, x19, ror #57 // ...........................................................................................................................................*.............................. - // ldr x25, [sp, #STACK_OFFSET_COUNT] // ............................................................................................................................................*............................. + // ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................................................................................................................*............................. // eor x17, x10, x9, ror #47 // .............................................................................................................................................*............................ - // ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................*........................... + // ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ..............................................................................................................................................*........................... // eor x15, x20, x28, ror #27 // ..............................................................................................................................................*........................... // bic x20, x4, x28, ror #2 // ...............................................................................................................................................*.......................... // eor x10, x20, x1, ror #50 // ................................................................................................................................................*......................... @@ -2109,10 +2109,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // bic x4, x28, x1, ror #48 // ..................................................................................................................................................*....................... // bic x1, x1, x11, ror #57 // ...................................................................................................................................................*...................... // ldr x28, [x9, x25, LSL #3] // ....................................................................................................................................................*..................... - // ldr x9, [sp, #STACK_LOC_0] // ....................................................................................................................................................*..................... + // ldr x9, [sp, #MLK_STACK_LOC_0] // ....................................................................................................................................................*..................... // add x25, x25, #1 // .....................................................................................................................................................*.................... - // str x25, [sp, #STACK_OFFSET_COUNT] // ......................................................................................................................................................*................... - // cmp x25, #(KECCAK_F1600_ROUNDS-1) // .......................................................................................................................................................*.................. + // str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ......................................................................................................................................................*................... + // cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // .......................................................................................................................................................*.................. // eor x25, x1, x27, ror #53 // .......................................................................................................................................................*.................. // bic x27, x30, x26, ror #47 // ........................................................................................................................................................*................. // eor x1, x5, x28 // .........................................................................................................................................................*................ @@ -2210,9 +2210,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) // shl v21.2d, v31.2d, #(64-28) // .......................................................................................................*.................................................................. // sri v21.2d, v31.2d, #(28) // .........................................................................................................*................................................................ // xar v27.2d, v6.2d, v25.2d, #20 // ..........................................................................................................*............................................................... - // ldr x30, [sp, #STACK_OFFSET_CONST_VECTOR] // .......................................................................................................................................................................*.. + // ldr x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // .......................................................................................................................................................................*.. // ld1r {v28.2d}, [x30], #8 // ........................................................................................................................................................................*. - // str x30, [sp, #STACK_OFFSET_CONST_VECTOR] // ........................................................................................................................................................................*. + // str x30, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // ........................................................................................................................................................................*. // bic v31.16b, v7.16b, v11.16b // ............................................................................................................*............................................................. // eor v5.16b, v31.16b, v10.16b // .............................................................................................................*............................................................ // bcax v6.16b, v11.16b, v8.16b, v7.16b // ...............................................................................................................*.......................................................... @@ -2259,22 +2259,22 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) final_scalar_rotate // Read outer loop flag: We repeat the above twice - ldr outer, [sp, #STACK_OFFSET_OUTER] // @slothy:reads=STACK_OFFSET_OUTER + ldr outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:reads=MLK_STACK_OFFSET_OUTER cmp outer, #1 beq keccak_f1600_x4_v8a_v84a_scalar_hybrid_done // Update outer loop flag mov outer, #1 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 0 // Store first scalar data load_input_scalar 1 // Load second scalar input b keccak_f1600_x4_v8a_v84a_scalar_hybrid_initial keccak_f1600_x4_v8a_v84a_scalar_hybrid_done: - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 1 store_input_vector @@ -2391,19 +2391,19 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_done: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef KECCAK_F1600_ROUNDS -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_BASE_VREGS -#undef STACK_OFFSET_LOCS -#undef STACK_OFFSET_INPUT -#undef STACK_OFFSET_CONST_SCALAR -#undef STACK_OFFSET_CONST_VECTOR -#undef STACK_OFFSET_COUNT -#undef STACK_OFFSET_OUTER -#undef STACK_LOC_0 -#undef STACK_LOC_1 +#undef MLK_KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_BASE_VREGS +#undef MLK_STACK_OFFSET_LOCS +#undef MLK_STACK_OFFSET_INPUT +#undef MLK_STACK_OFFSET_CONST_SCALAR +#undef MLK_STACK_OFFSET_CONST_VECTOR +#undef MLK_STACK_OFFSET_COUNT +#undef MLK_STACK_OFFSET_OUTER +#undef MLK_STACK_LOC_0 +#undef MLK_STACK_LOC_1 /* simpasm: footer-start */ #endif /* __ARM_FEATURE_SHA3 */ diff --git a/dev/fips202/aarch64_symbolic/keccak_f1600_x1_scalar_symbolic.S b/dev/fips202/aarch64_symbolic/keccak_f1600_x1_scalar_symbolic.S index 946013a94c..7405c72ff7 100644 --- a/dev/fips202/aarch64_symbolic/keccak_f1600_x1_scalar_symbolic.S +++ b/dev/fips202/aarch64_symbolic/keccak_f1600_x1_scalar_symbolic.S @@ -81,81 +81,81 @@ * This should to be manually trimmed to the number of stack locations * actually used after running SLOTHY. */ -#define STACK_LOCS 40 +#define MLK_STACK_LOCS 40 // GPRs (16*6), count (8), const (8), input (8), padding (8) -#define STACK_SIZE (16*6 + 3*8 + 8 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (3*8+8) -#define STACK_LOC_INPUT (0*8) -#define STACK_LOC_CONST (1*8) -#define STACK_LOC_COUNT (2*8) -#define STACK_LOC_MISC (16*6 + 4*8) - -#define STACK_LOC_0 ((STACK_LOC_MISC) + 0*8) -#define STACK_LOC_1 ((STACK_LOC_MISC) + 1*8) -#define STACK_LOC_2 ((STACK_LOC_MISC) + 2*8) -#define STACK_LOC_3 ((STACK_LOC_MISC) + 3*8) -#define STACK_LOC_4 ((STACK_LOC_MISC) + 4*8) -#define STACK_LOC_5 ((STACK_LOC_MISC) + 5*8) -#define STACK_LOC_6 ((STACK_LOC_MISC) + 6*8) -#define STACK_LOC_7 ((STACK_LOC_MISC) + 7*8) -#define STACK_LOC_8 ((STACK_LOC_MISC) + 8*8) -#define STACK_LOC_9 ((STACK_LOC_MISC) + 9*8) -#define STACK_LOC_10 ((STACK_LOC_MISC) + 10*8) -#define STACK_LOC_11 ((STACK_LOC_MISC) + 11*8) -#define STACK_LOC_12 ((STACK_LOC_MISC) + 12*8) -#define STACK_LOC_13 ((STACK_LOC_MISC) + 13*8) -#define STACK_LOC_14 ((STACK_LOC_MISC) + 14*8) -#define STACK_LOC_15 ((STACK_LOC_MISC) + 15*8) -#define STACK_LOC_16 ((STACK_LOC_MISC) + 16*8) -#define STACK_LOC_17 ((STACK_LOC_MISC) + 17*8) -#define STACK_LOC_18 ((STACK_LOC_MISC) + 18*8) -#define STACK_LOC_19 ((STACK_LOC_MISC) + 19*8) -#define STACK_LOC_20 ((STACK_LOC_MISC) + 20*8) -#define STACK_LOC_21 ((STACK_LOC_MISC) + 21*8) -#define STACK_LOC_22 ((STACK_LOC_MISC) + 22*8) -#define STACK_LOC_23 ((STACK_LOC_MISC) + 23*8) -#define STACK_LOC_24 ((STACK_LOC_MISC) + 24*8) -#define STACK_LOC_25 ((STACK_LOC_MISC) + 25*8) -#define STACK_LOC_26 ((STACK_LOC_MISC) + 26*8) -#define STACK_LOC_27 ((STACK_LOC_MISC) + 27*8) -#define STACK_LOC_28 ((STACK_LOC_MISC) + 28*8) -#define STACK_LOC_29 ((STACK_LOC_MISC) + 29*8) -#define STACK_LOC_30 ((STACK_LOC_MISC) + 30*8) -#define STACK_LOC_31 ((STACK_LOC_MISC) + 31*8) -#define STACK_LOC_32 ((STACK_LOC_MISC) + 32*8) -#define STACK_LOC_33 ((STACK_LOC_MISC) + 33*8) -#define STACK_LOC_34 ((STACK_LOC_MISC) + 34*8) -#define STACK_LOC_35 ((STACK_LOC_MISC) + 35*8) -#define STACK_LOC_36 ((STACK_LOC_MISC) + 36*8) -#define STACK_LOC_37 ((STACK_LOC_MISC) + 37*8) -#define STACK_LOC_38 ((STACK_LOC_MISC) + 38*8) -#define STACK_LOC_39 ((STACK_LOC_MISC) + 39*8) +#define MLK_STACK_SIZE (16*6 + 3*8 + 8 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (3*8+8) +#define MLK_STACK_LOC_INPUT (0*8) +#define MLK_STACK_LOC_CONST (1*8) +#define MLK_STACK_LOC_COUNT (2*8) +#define MLK_STACK_LOC_MISC (16*6 + 4*8) + +#define MLK_STACK_LOC_0 ((MLK_STACK_LOC_MISC) + 0*8) +#define MLK_STACK_LOC_1 ((MLK_STACK_LOC_MISC) + 1*8) +#define MLK_STACK_LOC_2 ((MLK_STACK_LOC_MISC) + 2*8) +#define MLK_STACK_LOC_3 ((MLK_STACK_LOC_MISC) + 3*8) +#define MLK_STACK_LOC_4 ((MLK_STACK_LOC_MISC) + 4*8) +#define MLK_STACK_LOC_5 ((MLK_STACK_LOC_MISC) + 5*8) +#define MLK_STACK_LOC_6 ((MLK_STACK_LOC_MISC) + 6*8) +#define MLK_STACK_LOC_7 ((MLK_STACK_LOC_MISC) + 7*8) +#define MLK_STACK_LOC_8 ((MLK_STACK_LOC_MISC) + 8*8) +#define MLK_STACK_LOC_9 ((MLK_STACK_LOC_MISC) + 9*8) +#define MLK_STACK_LOC_10 ((MLK_STACK_LOC_MISC) + 10*8) +#define MLK_STACK_LOC_11 ((MLK_STACK_LOC_MISC) + 11*8) +#define MLK_STACK_LOC_12 ((MLK_STACK_LOC_MISC) + 12*8) +#define MLK_STACK_LOC_13 ((MLK_STACK_LOC_MISC) + 13*8) +#define MLK_STACK_LOC_14 ((MLK_STACK_LOC_MISC) + 14*8) +#define MLK_STACK_LOC_15 ((MLK_STACK_LOC_MISC) + 15*8) +#define MLK_STACK_LOC_16 ((MLK_STACK_LOC_MISC) + 16*8) +#define MLK_STACK_LOC_17 ((MLK_STACK_LOC_MISC) + 17*8) +#define MLK_STACK_LOC_18 ((MLK_STACK_LOC_MISC) + 18*8) +#define MLK_STACK_LOC_19 ((MLK_STACK_LOC_MISC) + 19*8) +#define MLK_STACK_LOC_20 ((MLK_STACK_LOC_MISC) + 20*8) +#define MLK_STACK_LOC_21 ((MLK_STACK_LOC_MISC) + 21*8) +#define MLK_STACK_LOC_22 ((MLK_STACK_LOC_MISC) + 22*8) +#define MLK_STACK_LOC_23 ((MLK_STACK_LOC_MISC) + 23*8) +#define MLK_STACK_LOC_24 ((MLK_STACK_LOC_MISC) + 24*8) +#define MLK_STACK_LOC_25 ((MLK_STACK_LOC_MISC) + 25*8) +#define MLK_STACK_LOC_26 ((MLK_STACK_LOC_MISC) + 26*8) +#define MLK_STACK_LOC_27 ((MLK_STACK_LOC_MISC) + 27*8) +#define MLK_STACK_LOC_28 ((MLK_STACK_LOC_MISC) + 28*8) +#define MLK_STACK_LOC_29 ((MLK_STACK_LOC_MISC) + 29*8) +#define MLK_STACK_LOC_30 ((MLK_STACK_LOC_MISC) + 30*8) +#define MLK_STACK_LOC_31 ((MLK_STACK_LOC_MISC) + 31*8) +#define MLK_STACK_LOC_32 ((MLK_STACK_LOC_MISC) + 32*8) +#define MLK_STACK_LOC_33 ((MLK_STACK_LOC_MISC) + 33*8) +#define MLK_STACK_LOC_34 ((MLK_STACK_LOC_MISC) + 34*8) +#define MLK_STACK_LOC_35 ((MLK_STACK_LOC_MISC) + 35*8) +#define MLK_STACK_LOC_36 ((MLK_STACK_LOC_MISC) + 36*8) +#define MLK_STACK_LOC_37 ((MLK_STACK_LOC_MISC) + 37*8) +#define MLK_STACK_LOC_38 ((MLK_STACK_LOC_MISC) + 38*8) +#define MLK_STACK_LOC_39 ((MLK_STACK_LOC_MISC) + 39*8) .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro eor5 dst, src0, src1, src2, src3, src4 @@ -214,10 +214,10 @@ eor X, Aga, X eor X, Age, X - ldr X, [sp, #STACK_LOC_CONST] + ldr X, [sp, #MLK_STACK_LOC_CONST] ldr X, [X] mov X, #1 - str X, [sp, #STACK_LOC_COUNT] // @slothy:writes=STACK_LOC_COUNT + str X, [sp, #MLK_STACK_LOC_COUNT] // @slothy:writes=MLK_STACK_LOC_COUNT chi_step_ror Aga, X, X, X, 47, 39 chi_step_ror Age, X, X, X, 42, 25 @@ -311,12 +311,12 @@ eor X, X, Aga, ror #61 eor X, X, Age, ror #19 - ldr X, [sp, #STACK_LOC_CONST] - ldr X, [sp, #STACK_LOC_COUNT] // @slothy:reads=STACK_LOC_COUNT + ldr X, [sp, #MLK_STACK_LOC_CONST] + ldr X, [sp, #MLK_STACK_LOC_COUNT] // @slothy:reads=MLK_STACK_LOC_COUNT ldr X, [X, X, LSL #3] add X, X, #1 - cmp X, #(KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output - str X, [sp, #STACK_LOC_COUNT] // @slothy:writes=STACK_LOC_COUNT + cmp X, #(MLK_KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output + str X, [sp, #MLK_STACK_LOC_COUNT] // @slothy:writes=MLK_STACK_LOC_COUNT chi_step_ror Aga, X, X, X, 47, 39 chi_step_ror Age, X, X, X, 42, 25 @@ -405,7 +405,7 @@ ror Asu, Asu,#(64-55) .endm -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 .text .global MLK_ASM_NAMESPACE(keccak_f1600_x1_scalar_asm) @@ -416,9 +416,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x1_scalar_asm) keccak_f1600_x1_scalar_initial: mov const_addr, input_rc - str input_rc, [sp, #STACK_LOC_CONST] + str input_rc, [sp, #MLK_STACK_LOC_CONST] load_state - str input_addr, [sp, #STACK_LOC_INPUT] // @slothy:writes=STACK_LOC_INPUT + str input_addr, [sp, #MLK_STACK_LOC_INPUT] // @slothy:writes=MLK_STACK_LOC_INPUT keccak_f1600_x1_scalar_initial_start: keccak_f1600_round_initial @@ -430,7 +430,7 @@ keccak_f1600_x1_scalar_end_loop: keccak_f1600_x1_scalar_final: final_rotate - ldr input_addr, [sp, #STACK_LOC_INPUT] // @slothy:reads=STACK_LOC_INPUT + ldr input_addr, [sp, #MLK_STACK_LOC_INPUT] // @slothy:reads=MLK_STACK_LOC_INPUT store_state keccak_f1600_x1_scalar_end_final: @@ -470,54 +470,54 @@ keccak_f1600_x1_scalar_end_final: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_LOC_INPUT -#undef STACK_LOC_CONST -#undef STACK_LOC_COUNT -#undef STACK_LOC_MISC -#undef STACK_LOC_0 -#undef STACK_LOC_1 -#undef STACK_LOC_2 -#undef STACK_LOC_3 -#undef STACK_LOC_4 -#undef STACK_LOC_5 -#undef STACK_LOC_6 -#undef STACK_LOC_7 -#undef STACK_LOC_8 -#undef STACK_LOC_9 -#undef STACK_LOC_10 -#undef STACK_LOC_11 -#undef STACK_LOC_12 -#undef STACK_LOC_13 -#undef STACK_LOC_14 -#undef STACK_LOC_15 -#undef STACK_LOC_16 -#undef STACK_LOC_17 -#undef STACK_LOC_18 -#undef STACK_LOC_19 -#undef STACK_LOC_20 -#undef STACK_LOC_21 -#undef STACK_LOC_22 -#undef STACK_LOC_23 -#undef STACK_LOC_24 -#undef STACK_LOC_25 -#undef STACK_LOC_26 -#undef STACK_LOC_27 -#undef STACK_LOC_28 -#undef STACK_LOC_29 -#undef STACK_LOC_30 -#undef STACK_LOC_31 -#undef STACK_LOC_32 -#undef STACK_LOC_33 -#undef STACK_LOC_34 -#undef STACK_LOC_35 -#undef STACK_LOC_36 -#undef STACK_LOC_37 -#undef STACK_LOC_38 -#undef STACK_LOC_39 -#undef KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_LOC_INPUT +#undef MLK_STACK_LOC_CONST +#undef MLK_STACK_LOC_COUNT +#undef MLK_STACK_LOC_MISC +#undef MLK_STACK_LOC_0 +#undef MLK_STACK_LOC_1 +#undef MLK_STACK_LOC_2 +#undef MLK_STACK_LOC_3 +#undef MLK_STACK_LOC_4 +#undef MLK_STACK_LOC_5 +#undef MLK_STACK_LOC_6 +#undef MLK_STACK_LOC_7 +#undef MLK_STACK_LOC_8 +#undef MLK_STACK_LOC_9 +#undef MLK_STACK_LOC_10 +#undef MLK_STACK_LOC_11 +#undef MLK_STACK_LOC_12 +#undef MLK_STACK_LOC_13 +#undef MLK_STACK_LOC_14 +#undef MLK_STACK_LOC_15 +#undef MLK_STACK_LOC_16 +#undef MLK_STACK_LOC_17 +#undef MLK_STACK_LOC_18 +#undef MLK_STACK_LOC_19 +#undef MLK_STACK_LOC_20 +#undef MLK_STACK_LOC_21 +#undef MLK_STACK_LOC_22 +#undef MLK_STACK_LOC_23 +#undef MLK_STACK_LOC_24 +#undef MLK_STACK_LOC_25 +#undef MLK_STACK_LOC_26 +#undef MLK_STACK_LOC_27 +#undef MLK_STACK_LOC_28 +#undef MLK_STACK_LOC_29 +#undef MLK_STACK_LOC_30 +#undef MLK_STACK_LOC_31 +#undef MLK_STACK_LOC_32 +#undef MLK_STACK_LOC_33 +#undef MLK_STACK_LOC_34 +#undef MLK_STACK_LOC_35 +#undef MLK_STACK_LOC_36 +#undef MLK_STACK_LOC_37 +#undef MLK_STACK_LOC_38 +#undef MLK_STACK_LOC_39 +#undef MLK_KECCAK_F1600_ROUNDS /* simpasm: footer-start */ #endif /* MLK_FIPS202_AARCH64_NEED_X1_SCALAR && \ diff --git a/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S b/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S index 4ebc76ad18..32dd8f94c4 100644 --- a/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S +++ b/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S @@ -35,7 +35,7 @@ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) /* simpasm: header-end */ -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 /****************** REGISTER ALLOCATIONS *******************/ @@ -276,60 +276,60 @@ sub input_addr, input_addr, #((2 + \idx)*25*8) .endm -#define STACK_LOCS 2 +#define MLK_STACK_LOCS 2 -#define STACK_SIZE (16*6 + 8*8 + 6*8 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (6*8) -#define STACK_BASE_VREGS (6*8 + 16*6) -#define STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) +#define MLK_STACK_SIZE (16*6 + 8*8 + 6*8 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (6*8) +#define MLK_STACK_BASE_VREGS (6*8 + 16*6) +#define MLK_STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) -#define STACK_OFFSET_INPUT (0*8) -#define STACK_OFFSET_CONST_SCALAR (1*8) -#define STACK_OFFSET_CONST_VECTOR (2*8) -#define STACK_OFFSET_COUNT (3*8) -#define STACK_OFFSET_OUTER (4*8) +#define MLK_STACK_OFFSET_INPUT (0*8) +#define MLK_STACK_OFFSET_CONST_SCALAR (1*8) +#define MLK_STACK_OFFSET_CONST_VECTOR (2*8) +#define MLK_STACK_OFFSET_COUNT (3*8) +#define MLK_STACK_OFFSET_OUTER (4*8) -#define STACK_LOC_0 ((STACK_OFFSET_LOCS) + 0*8) -#define STACK_LOC_1 ((STACK_OFFSET_LOCS) + 1*8) +#define MLK_STACK_LOC_0 ((MLK_STACK_OFFSET_LOCS) + 0*8) +#define MLK_STACK_LOC_1 ((MLK_STACK_OFFSET_LOCS) + 1*8) .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro save_vregs - stp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - stp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - stp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - stp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + stp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + stp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + stp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + stp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro restore_vregs - ldp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - ldp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - ldp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - ldp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + ldp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + ldp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + ldp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + ldp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro eor5 dst, src0, src1, src2, src3, src4 @@ -378,7 +378,7 @@ eor x30, x30, x29, ror #63 // .........*............................................ eor x22, x22, x30 // ..........*........................................... eor x23, x23, x30 // ..........*........................................... - str x23, [sp, #STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 + str x23, [sp, #MLK_STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 eor x23, x14, x15 // ...........*.......................................... eor x14, x14, x0 // ............*......................................... eor x23, x23, x11 // ............*......................................... @@ -406,7 +406,7 @@ eor x12, x3, x27 // ........................*............................. bic x3, x13, x17, ror #19 // ........................*............................. eor x5, x5, x27 // .........................*............................ - ldr x27, [sp, #STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 + ldr x27, [sp, #MLK_STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 bic x25, x17, x2, ror #5 // ..........................*........................... eor x9, x9, x29 // ..........................*........................... eor x23, x25, x5, ror #52 // ...........................*.......................... @@ -436,12 +436,12 @@ eor x12, x15, x12, ror #58 // .......................................*.............. eor x15, x5, x27, ror #27 // .......................................*.............. eor x5, x20, x11, ror #41 // ........................................*............. - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ........................................*............. + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ........................................*............. eor x20, x17, x4, ror #21 // .........................................*............ eor x17, x24, x9, ror #47 // .........................................*............ mov x24, #1 // ..........................................*........... bic x9, x0, x16, ror #9 // ..........................................*........... - str x24, [sp, #STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=STACK_OFFSET_COUNT + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=MLK_STACK_OFFSET_COUNT bic x24, x29, x1, ror #44 // ...........................................*.......... bic x27, x1, x21, ror #50 // ............................................*......... bic x4, x26, x29, ror #63 // ............................................*......... @@ -502,10 +502,10 @@ // eor X, sAga, X // eor X, sAge, X - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ldr X, [X] // mov X, #1 - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -577,9 +577,9 @@ xar_m1 vAsu_, vAse, E1, 62 xar_m1 vAme_, vAga, E0, 28 xar_m1 vAbe_, vAge, E1, 20 - ldr tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:reads=STACK_OFFSET_CONST_VECTOR + ldr tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR ld1r {v28.2d}, [tmp], #8 - str tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR + str tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR bcax_m1 vAga, vAga_, vAgi_, vAge_ bcax_m1 vAge, vAge_, vAgo_, vAgi_ bcax_m1 vAgi, vAgi_, vAgu_, vAgo_ @@ -628,7 +628,7 @@ ror x26, x26, #58 // ........*................................................ eor x16, x30, x16 // .........*............................................... eor x28, x30, x28, ror #63 // .........*............................................... - str x28, [sp, #STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 + str x28, [sp, #MLK_STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 eor x29, x29, x17, ror #36 // ..........*.............................................. eor x28, x1, x2, ror #61 // ...........*............................................. eor x19, x30, x19, ror #37 // ...........*............................................. @@ -682,9 +682,9 @@ eor x16, x21, x19, ror #43 // ....................................*.................... eor x21, x17, x25, ror #30 // ....................................*.................... bic x19, x25, x19, ror #57 // .....................................*................... - ldr x25, [sp, #STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=STACK_OFFSET_COUNT + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=MLK_STACK_OFFSET_COUNT eor x17, x10, x9, ror #47 // ......................................*.................. - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................*.................. + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................*.................. eor x15, x20, x28, ror #27 // .......................................*................. bic x20, x4, x28, ror #2 // .......................................*................. eor x10, x20, x1, ror #50 // ........................................*................ @@ -693,10 +693,10 @@ bic x4, x28, x1, ror #48 // .........................................*............... bic x1, x1, x11, ror #57 // ..........................................*.............. ldr x28, [x9, x25, LSL #3] // ..........................................*.............. - ldr x9, [sp, #STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 add x25, x25, #1 // ...........................................*............. - str x25, [sp, #STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=STACK_OFFSET_COUNT - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=MLK_STACK_OFFSET_COUNT + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output eor x25, x1, x27, ror #53 // .............................................*........... bic x27, x30, x26, ror #47 // .............................................*........... eor x1, x5, x28 // ..............................................*.......... @@ -782,12 +782,12 @@ // eor X, X, sAga, ror #61 // eor X, X, sAge, ror #19 - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] - // ldr X, [sp, #STACK_OFFSET_COUNT] // @slothy:reads=STACK_OFFSET_COUNT + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:reads=MLK_STACK_OFFSET_COUNT // ldr X, [X, X, LSL #3] // add X, X, #1 - // cmp X, #(KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // cmp X, #(MLK_KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -855,10 +855,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_scalar_hybrid_asm) mov const_addr, input_rc mov outer, #0 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER - str const_addr, [sp, #STACK_OFFSET_CONST_SCALAR] // @slothy:writes=STACK_OFFSET_CONST_SCALAR - str const_addr, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR - str input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:writes=STACK_OFFSET_INPUT + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // @slothy:writes=MLK_STACK_OFFSET_CONST_SCALAR + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR + str input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:writes=MLK_STACK_OFFSET_INPUT load_input_vector // Vector input load_input_scalar 0 // First scalar input @@ -876,22 +876,22 @@ keccak_f1600_x4_v8a_scalar_hybrid_loop_end: final_scalar_rotate // Read outer loop flag: We repeat the above twice - ldr outer, [sp, #STACK_OFFSET_OUTER] // @slothy:reads=STACK_OFFSET_OUTER + ldr outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:reads=MLK_STACK_OFFSET_OUTER cmp outer, #1 beq keccak_f1600_x4_v8a_scalar_hybrid_done // Update outer loop flag mov outer, #1 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 0 // Store first scalar data load_input_scalar 1 // Load second scalar input b keccak_f1600_x4_v8a_scalar_hybrid_initial keccak_f1600_x4_v8a_scalar_hybrid_done: - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 1 store_input_vector @@ -1008,19 +1008,19 @@ keccak_f1600_x4_v8a_scalar_hybrid_done: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef KECCAK_F1600_ROUNDS -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_BASE_VREGS -#undef STACK_OFFSET_LOCS -#undef STACK_OFFSET_INPUT -#undef STACK_OFFSET_CONST_SCALAR -#undef STACK_OFFSET_CONST_VECTOR -#undef STACK_OFFSET_COUNT -#undef STACK_OFFSET_OUTER -#undef STACK_LOC_0 -#undef STACK_LOC_1 +#undef MLK_KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_BASE_VREGS +#undef MLK_STACK_OFFSET_LOCS +#undef MLK_STACK_OFFSET_INPUT +#undef MLK_STACK_OFFSET_CONST_SCALAR +#undef MLK_STACK_OFFSET_CONST_VECTOR +#undef MLK_STACK_OFFSET_COUNT +#undef MLK_STACK_OFFSET_OUTER +#undef MLK_STACK_LOC_0 +#undef MLK_STACK_LOC_1 /* simpasm: footer-start */ #endif /* MLK_FIPS202_AARCH64_NEED_X4_V8A_SCALAR_HYBRID && \ diff --git a/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S b/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S index 61c7a16a45..ad73e4578d 100644 --- a/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S +++ b/dev/fips202/aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S @@ -37,7 +37,7 @@ #if defined(__ARM_FEATURE_SHA3) /* simpasm: header-end */ -#define KECCAK_F1600_ROUNDS 24 +#define MLK_KECCAK_F1600_ROUNDS 24 /****************** REGISTER ALLOCATIONS *******************/ @@ -294,60 +294,60 @@ sub input_addr, input_addr, #((2 + \idx)*25*8) .endm -#define STACK_LOCS 2 +#define MLK_STACK_LOCS 2 -#define STACK_SIZE (16*6 + 8*8 + 6*8 + (STACK_LOCS) * 8) -#define STACK_BASE_GPRS (6*8) -#define STACK_BASE_VREGS (6*8 + 16*6) -#define STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) +#define MLK_STACK_SIZE (16*6 + 8*8 + 6*8 + (MLK_STACK_LOCS) * 8) +#define MLK_STACK_BASE_GPRS (6*8) +#define MLK_STACK_BASE_VREGS (6*8 + 16*6) +#define MLK_STACK_OFFSET_LOCS (16*6 + 8*8 + 6*8) -#define STACK_OFFSET_INPUT (0*8) -#define STACK_OFFSET_CONST_SCALAR (1*8) -#define STACK_OFFSET_CONST_VECTOR (2*8) -#define STACK_OFFSET_COUNT (3*8) -#define STACK_OFFSET_OUTER (4*8) +#define MLK_STACK_OFFSET_INPUT (0*8) +#define MLK_STACK_OFFSET_CONST_SCALAR (1*8) +#define MLK_STACK_OFFSET_CONST_VECTOR (2*8) +#define MLK_STACK_OFFSET_COUNT (3*8) +#define MLK_STACK_OFFSET_OUTER (4*8) -#define STACK_LOC_0 ((STACK_OFFSET_LOCS) + 0*8) -#define STACK_LOC_1 ((STACK_OFFSET_LOCS) + 1*8) +#define MLK_STACK_LOC_0 ((MLK_STACK_OFFSET_LOCS) + 0*8) +#define MLK_STACK_LOC_1 ((MLK_STACK_OFFSET_LOCS) + 1*8) .macro save_gprs - stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro restore_gprs - ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)] - ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)] - ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)] - ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)] - ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)] - ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)] + ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)] + ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)] + ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)] + ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)] + ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)] + ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)] .endm .macro save_vregs - stp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - stp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - stp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - stp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + stp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + stp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + stp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + stp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro restore_vregs - ldp d8, d9, [sp,#(STACK_BASE_VREGS+0*16)] - ldp d10, d11, [sp,#(STACK_BASE_VREGS+1*16)] - ldp d12, d13, [sp,#(STACK_BASE_VREGS+2*16)] - ldp d14, d15, [sp,#(STACK_BASE_VREGS+3*16)] + ldp d8, d9, [sp,#(MLK_STACK_BASE_VREGS+0*16)] + ldp d10, d11, [sp,#(MLK_STACK_BASE_VREGS+1*16)] + ldp d12, d13, [sp,#(MLK_STACK_BASE_VREGS+2*16)] + ldp d14, d15, [sp,#(MLK_STACK_BASE_VREGS+3*16)] .endm .macro alloc_stack - sub sp, sp, #(STACK_SIZE) + sub sp, sp, #(MLK_STACK_SIZE) .endm .macro free_stack - add sp, sp, #(STACK_SIZE) + add sp, sp, #(MLK_STACK_SIZE) .endm .macro eor5 dst, src0, src1, src2, src3, src4 @@ -396,7 +396,7 @@ eor x30, x30, x29, ror #63 // .........*............................................ eor x22, x22, x30 // ..........*........................................... eor x23, x23, x30 // ..........*........................................... - str x23, [sp, #STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 + str x23, [sp, #MLK_STACK_LOC_0] // ...........*.......................................... // @slothy:writes=stack_0 eor x23, x14, x15 // ...........*.......................................... eor x14, x14, x0 // ............*......................................... eor x23, x23, x11 // ............*......................................... @@ -424,7 +424,7 @@ eor x12, x3, x27 // ........................*............................. bic x3, x13, x17, ror #19 // ........................*............................. eor x5, x5, x27 // .........................*............................ - ldr x27, [sp, #STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 + ldr x27, [sp, #MLK_STACK_LOC_0] // .........................*............................ // @slothy:reads=stack_0 bic x25, x17, x2, ror #5 // ..........................*........................... eor x9, x9, x29 // ..........................*........................... eor x23, x25, x5, ror #52 // ...........................*.......................... @@ -454,12 +454,12 @@ eor x12, x15, x12, ror #58 // .......................................*.............. eor x15, x5, x27, ror #27 // .......................................*.............. eor x5, x20, x11, ror #41 // ........................................*............. - ldr x11, [sp, #STACK_OFFSET_CONST_SCALAR] // ........................................*............. + ldr x11, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ........................................*............. eor x20, x17, x4, ror #21 // .........................................*............ eor x17, x24, x9, ror #47 // .........................................*............ mov x24, #1 // ..........................................*........... bic x9, x0, x16, ror #9 // ..........................................*........... - str x24, [sp, #STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=STACK_OFFSET_COUNT + str x24, [sp, #MLK_STACK_OFFSET_COUNT] // ...........................................*.......... // @slothy:writes=MLK_STACK_OFFSET_COUNT bic x24, x29, x1, ror #44 // ...........................................*.......... bic x27, x1, x21, ror #50 // ............................................*......... bic x4, x26, x29, ror #63 // ............................................*......... @@ -520,10 +520,10 @@ // eor X, sAga, X // eor X, sAge, X - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ldr X, [X] // mov X, #1 - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -595,9 +595,9 @@ xar_m0 vAsu_, vAse, E1, 62 xar_m1 vAme_, vAga, E0, 28 xar_m0 vAbe_, vAge, E1, 20 - ldr tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:reads=STACK_OFFSET_CONST_VECTOR + ldr tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:reads=MLK_STACK_OFFSET_CONST_VECTOR ld1r {v28.2d}, [tmp], #8 - str tmp, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR + str tmp, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR bcax_m1 vAga, vAga_, vAgi_, vAge_ bcax_m0 vAge, vAge_, vAgo_, vAgi_ bcax_m1 vAgi, vAgi_, vAgu_, vAgo_ @@ -646,7 +646,7 @@ ror x26, x26, #58 // ........*................................................ eor x16, x30, x16 // .........*............................................... eor x28, x30, x28, ror #63 // .........*............................................... - str x28, [sp, #STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 + str x28, [sp, #MLK_STACK_LOC_0] // ..........*.............................................. // @slothy:writes=stack_0 eor x29, x29, x17, ror #36 // ..........*.............................................. eor x28, x1, x2, ror #61 // ...........*............................................. eor x19, x30, x19, ror #37 // ...........*............................................. @@ -700,9 +700,9 @@ eor x16, x21, x19, ror #43 // ....................................*.................... eor x21, x17, x25, ror #30 // ....................................*.................... bic x19, x25, x19, ror #57 // .....................................*................... - ldr x25, [sp, #STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=STACK_OFFSET_COUNT + ldr x25, [sp, #MLK_STACK_OFFSET_COUNT] // .....................................*................... // @slothy:reads=MLK_STACK_OFFSET_COUNT eor x17, x10, x9, ror #47 // ......................................*.................. - ldr x9, [sp, #STACK_OFFSET_CONST_SCALAR] // ......................................*.................. + ldr x9, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // ......................................*.................. eor x15, x20, x28, ror #27 // .......................................*................. bic x20, x4, x28, ror #2 // .......................................*................. eor x10, x20, x1, ror #50 // ........................................*................ @@ -711,10 +711,10 @@ bic x4, x28, x1, ror #48 // .........................................*............... bic x1, x1, x11, ror #57 // ..........................................*.............. ldr x28, [x9, x25, LSL #3] // ..........................................*.............. - ldr x9, [sp, #STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 + ldr x9, [sp, #MLK_STACK_LOC_0] // ...........................................*............. // @slothy:reads=stack_0 add x25, x25, #1 // ...........................................*............. - str x25, [sp, #STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=STACK_OFFSET_COUNT - cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output + str x25, [sp, #MLK_STACK_OFFSET_COUNT] // ............................................*............ // @slothy:writes=MLK_STACK_OFFSET_COUNT + cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............ // @slothy:ignore_useless_output eor x25, x1, x27, ror #53 // .............................................*........... bic x27, x30, x26, ror #47 // .............................................*........... eor x1, x5, x28 // ..............................................*.......... @@ -800,12 +800,12 @@ // eor X, X, sAga, ror #61 // eor X, X, sAge, ror #19 - // ldr X, [sp, #STACK_OFFSET_CONST_SCALAR] - // ldr X, [sp, #STACK_OFFSET_COUNT] // @slothy:reads=STACK_OFFSET_COUNT + // ldr X, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] + // ldr X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:reads=MLK_STACK_OFFSET_COUNT // ldr X, [X, X, LSL #3] // add X, X, #1 - // cmp X, #(KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output - // str X, [sp, #STACK_OFFSET_COUNT] // @slothy:writes=STACK_OFFSET_COUNT + // cmp X, #(MLK_KECCAK_F1600_ROUNDS-1) // @slothy:ignore_useless_output + // str X, [sp, #MLK_STACK_OFFSET_COUNT] // @slothy:writes=MLK_STACK_OFFSET_COUNT // chi_step_ror sAga, X, X, X, 47, 39 // chi_step_ror sAge, X, X, X, 42, 25 @@ -873,10 +873,10 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm) mov const_addr, input_rc mov outer, #0 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER - str const_addr, [sp, #STACK_OFFSET_CONST_SCALAR] // @slothy:writes=STACK_OFFSET_CONST_SCALAR - str const_addr, [sp, #STACK_OFFSET_CONST_VECTOR] // @slothy:writes=STACK_OFFSET_CONST_VECTOR - str input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:writes=STACK_OFFSET_INPUT + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_SCALAR] // @slothy:writes=MLK_STACK_OFFSET_CONST_SCALAR + str const_addr, [sp, #MLK_STACK_OFFSET_CONST_VECTOR] // @slothy:writes=MLK_STACK_OFFSET_CONST_VECTOR + str input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:writes=MLK_STACK_OFFSET_INPUT load_input_vector // Vector input load_input_scalar 0 // First scalar input @@ -894,22 +894,22 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop_end: final_scalar_rotate // Read outer loop flag: We repeat the above twice - ldr outer, [sp, #STACK_OFFSET_OUTER] // @slothy:reads=STACK_OFFSET_OUTER + ldr outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:reads=MLK_STACK_OFFSET_OUTER cmp outer, #1 beq keccak_f1600_x4_v8a_v84a_scalar_hybrid_done // Update outer loop flag mov outer, #1 - str outer, [sp, #STACK_OFFSET_OUTER] // @slothy:writes=STACK_OFFSET_OUTER + str outer, [sp, #MLK_STACK_OFFSET_OUTER] // @slothy:writes=MLK_STACK_OFFSET_OUTER - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 0 // Store first scalar data load_input_scalar 1 // Load second scalar input b keccak_f1600_x4_v8a_v84a_scalar_hybrid_initial keccak_f1600_x4_v8a_v84a_scalar_hybrid_done: - ldr input_addr, [sp, #STACK_OFFSET_INPUT] // @slothy:reads=STACK_OFFSET_INPUT + ldr input_addr, [sp, #MLK_STACK_OFFSET_INPUT] // @slothy:reads=MLK_STACK_OFFSET_INPUT store_input_scalar 1 store_input_vector @@ -1026,19 +1026,19 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_done: /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef KECCAK_F1600_ROUNDS -#undef STACK_LOCS -#undef STACK_SIZE -#undef STACK_BASE_GPRS -#undef STACK_BASE_VREGS -#undef STACK_OFFSET_LOCS -#undef STACK_OFFSET_INPUT -#undef STACK_OFFSET_CONST_SCALAR -#undef STACK_OFFSET_CONST_VECTOR -#undef STACK_OFFSET_COUNT -#undef STACK_OFFSET_OUTER -#undef STACK_LOC_0 -#undef STACK_LOC_1 +#undef MLK_KECCAK_F1600_ROUNDS +#undef MLK_STACK_LOCS +#undef MLK_STACK_SIZE +#undef MLK_STACK_BASE_GPRS +#undef MLK_STACK_BASE_VREGS +#undef MLK_STACK_OFFSET_LOCS +#undef MLK_STACK_OFFSET_INPUT +#undef MLK_STACK_OFFSET_CONST_SCALAR +#undef MLK_STACK_OFFSET_CONST_VECTOR +#undef MLK_STACK_OFFSET_COUNT +#undef MLK_STACK_OFFSET_OUTER +#undef MLK_STACK_LOC_0 +#undef MLK_STACK_LOC_1 /* simpasm: footer-start */ #endif /* __ARM_FEATURE_SHA3 */ diff --git a/dev/x86_64/src/rej_uniform_asm.S b/dev/x86_64/src/rej_uniform_asm.S index 4a0b7ff672..ee89f85cc9 100644 --- a/dev/x86_64/src/rej_uniform_asm.S +++ b/dev/x86_64/src/rej_uniform_asm.S @@ -24,155 +24,155 @@ !defined(MLK_CONFIG_MULTILEVEL_NO_SHARED) /* simpasm: header-end */ -#define in %rsi -#define out %rdi -#define len %rdx -#define tab %rcx - -#define cnt %rax -#define ecnt %eax -#define pos %r8 - -#define good %r11 -#define pext_mask %r9 -#define table_idx %r10 - -#define bound %xmm0 -#define temp0 %xmm1 -#define temp1 %xmm3 -#define vals %xmm2 -#define shuffle_out_mask %xmm3 -#define shuffle_in_mask %xmm4 -#define and_mask %xmm5 +#define MLK_IN %rsi +#define MLK_OUT %rdi +#define MLK_LEN %rdx +#define MLK_TAB %rcx + +#define MLK_CNT %rax +#define MLK_ECNT %eax +#define MLK_POS %r8 + +#define MLK_GOOD %r11 +#define MLK_PEXT_MASK %r9 +#define MLK_TABLE_IDX %r10 + +#define MLK_BOUND %xmm0 +#define MLK_TEMP0 %xmm1 +#define MLK_TEMP1 %xmm3 +#define MLK_VALS %xmm2 +#define MLK_SHUFFLE_OUT_MASK %xmm3 +#define MLK_SHUFFLE_IN_MASK %xmm4 +#define MLK_AND_MASK %xmm5 // High level overview of the algorithm: // For every 96 bits (12 bytes) of the input: // 1. Split 96 bits into eight 12-bit integers where each integer -// occupies a corresponding 16-bit element of `vals` xmm register, -// 2. Compute an 8-bit value `good` such that -// good[i] = vals[i] < MLKEM_Q ? 1 : 0, for i in [0, 7], -// 3. Shuffle the elements in `vals` such that all good elements +// occupies a corresponding 16-bit element of `MLK_VALS` xmm register, +// 2. Compute an 8-bit value `MLK_GOOD` such that +// MLK_GOOD[i] = MLK_VALS[i] < MLKEM_Q ? 1 : 0, for i in [0, 7], +// 3. Shuffle the elements in `MLK_VALS` such that all MLK_GOOD elements // are ordered consecutivelly, and store them. // // Notes: -// - We exit early if we find the required number of good values, +// - We exit early if we find the required number of MLK_GOOD values, // - We use the stack as a temporary storage and copy to the actual // output buffer only in the end. This is because the algorithm // can overwrite up to 14 bytes (we use 16B for alignment), // - The implementation uses x86 SSE and BMI2 extensions. -#define STACK_SIZE (2*MLKEM_N + 16) +#define MLK_STACK_SIZE (2*MLKEM_N + 16) .text .global MLK_ASM_NAMESPACE(rej_uniform_asm) .balign 4 MLK_ASM_FN_SYMBOL(rej_uniform_asm) - subq $STACK_SIZE, %rsp + subq $MLK_STACK_SIZE, %rsp // Return if input length is 0 - xorl ecnt, ecnt - testq len, len + xorl MLK_ECNT, MLK_ECNT + testq MLK_LEN, MLK_LEN jz rej_uniform_asm_end // Broadcast MLKEM_Q (3329) to all 16-bit elements of bound reg. movq $0x0D010D010D010D01, %rax - movq %rax, bound - pinsrq $1, %rax, bound + movq %rax, MLK_BOUND + pinsrq $1, %rax, MLK_BOUND - // Broadcast 12-bit mask 0xFFF to all 16-bit elements of bound reg. + // Broadcast 12-bit mask 0xFFF to all 16-bit elements of MLK_BOUND reg. movq $0x0FFF0FFF0FFF0FFF, %rax - movq %rax, and_mask - pinsrq $1, %rax, and_mask + movq %rax, MLK_AND_MASK + pinsrq $1, %rax, MLK_AND_MASK // Load shuffle mask: // 0, 1, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11. movq $0x0504040302010100, %rax - movq %rax, shuffle_in_mask + movq %rax, MLK_SHUFFLE_IN_MASK movq $0x0B0A0A0908070706, %rax - pinsrq $1, %rax, shuffle_in_mask + pinsrq $1, %rax, MLK_SHUFFLE_IN_MASK - movq $0, cnt // cnt counts the number of good values we've found. - movq $0, pos // pos is the current position in the input buffer. - movq $0x5555, pext_mask // 0x5555 mask to extract every second bit. + movq $0, MLK_CNT // MLK_CNT counts the number of MLK_GOOD values we've found. + movq $0, MLK_POS // MLK_POS is the current position in the input buffer. + movq $0x5555, MLK_PEXT_MASK // 0x5555 mask to extract every second bit. rej_uniform_asm_loop_start: // 1. Split 96 bits into eight 12-bit integers where each integer. - // We explain the algorithm by considering the lowest 64 bits of vals. - movdqu (in, pos), vals - // vals: [ 63..48 | 47..32 | 31..16 | 15..0 ] - pshufb shuffle_in_mask, vals - // vals: [ 47..32 | 39..24 | 23..8 | 15..0 ] - movdqa vals, temp1 + // We explain the algorithm by considering the lowest 64 bits of MLK_VALS. + movdqu (MLK_IN, MLK_POS), MLK_VALS + // MLK_VALS: [ 63..48 | 47..32 | 31..16 | 15..0 ] + pshufb MLK_SHUFFLE_IN_MASK, MLK_VALS + // MLK_VALS: [ 47..32 | 39..24 | 23..8 | 15..0 ] + movdqa MLK_VALS, MLK_TEMP1 // temp: [ 47..32 | 39..24 | 23..8 | 15..0 ] - psrlw $4, temp1 + psrlw $4, MLK_TEMP1 // temp: [ 47..36 | 39..28 | 23..12 | 15..4 ] - pblendw $0xAA, temp1, vals - // vals: [ 47..36 | 39..24 | 23..12 | 15..0] - pand and_mask, vals - // vals: [ 47..36 | 35..24 | 23..12 | 12..0] - - // 2. Compute an 8-bit value `good` such that - // good[i] = vals[i] < MLKEM_Q ? 1 : 0, for i in [0, 7], - movdqa bound, temp0 - pcmpgtw vals, temp0 - pmovmskb temp0, good - pext pext_mask, good, good - - // 3. Shuffle the elements in `vals` such that all good elements + pblendw $0xAA, MLK_TEMP1, MLK_VALS + // MLK_VALS: [ 47..36 | 39..24 | 23..12 | 15..0] + pand MLK_AND_MASK, MLK_VALS + // MLK_VALS: [ 47..36 | 35..24 | 23..12 | 12..0] + + // 2. Compute an 8-bit value `MLK_GOOD` such that + // MLK_GOOD[i] = MLK_VALS[i] < MLKEM_Q ? 1 : 0, for i in [0, 7], + movdqa MLK_BOUND, MLK_TEMP0 + pcmpgtw MLK_VALS, MLK_TEMP0 + pmovmskb MLK_TEMP0, MLK_GOOD + pext MLK_PEXT_MASK, MLK_GOOD, MLK_GOOD + + // 3. Shuffle the elements in `MLK_VALS` such that all MLK_GOOD elements // are ordered consecutivelly, and store them. - movq good, table_idx - shl $4, table_idx - movdqu (tab, table_idx), shuffle_out_mask - pshufb shuffle_out_mask, vals - movdqu vals, (%rsp, cnt, 2) + movq MLK_GOOD, MLK_TABLE_IDX + shl $4, MLK_TABLE_IDX + movdqu (MLK_TAB, MLK_TABLE_IDX), MLK_SHUFFLE_OUT_MASK + pshufb MLK_SHUFFLE_OUT_MASK, MLK_VALS + movdqu MLK_VALS, (%rsp, MLK_CNT, 2) // Update the counter and check if we are done. - popcnt good, good - addq good, cnt + popcnt MLK_GOOD, MLK_GOOD + addq MLK_GOOD, MLK_CNT - cmpq $256, cnt + cmpq $256, MLK_CNT jnb rej_uniform_asm_final_copy - addq $12, pos - cmpq pos, len + addq $12, MLK_POS + cmpq MLK_POS, MLK_LEN ja rej_uniform_asm_loop_start rej_uniform_asm_final_copy: - // Copy up to 256 values to the output: min(cnt, 256). + // Copy up to 256 values to the output: min(MLK_CNT, 256). mov $256, %rcx - cmp $256, cnt - cmova %rcx, cnt + cmp $256, MLK_CNT + cmova %rcx, MLK_CNT movq %rsp, %rsi - movq cnt, %rcx + movq MLK_CNT, %rcx shlq $1, %rcx rep movsb rej_uniform_asm_end: - addq $STACK_SIZE, %rsp + addq $MLK_STACK_SIZE, %rsp ret /* To facilitate single-compilation-unit (SCU) builds, undefine all macros. * Don't modify by hand -- this is auto-generated by scripts/autogen. */ -#undef in -#undef out -#undef len -#undef tab -#undef cnt -#undef ecnt -#undef pos -#undef good -#undef pext_mask -#undef table_idx -#undef bound -#undef temp0 -#undef temp1 -#undef vals -#undef shuffle_out_mask -#undef shuffle_in_mask -#undef and_mask -#undef STACK_SIZE +#undef MLK_IN +#undef MLK_OUT +#undef MLK_LEN +#undef MLK_TAB +#undef MLK_CNT +#undef MLK_ECNT +#undef MLK_POS +#undef MLK_GOOD +#undef MLK_PEXT_MASK +#undef MLK_TABLE_IDX +#undef MLK_BOUND +#undef MLK_TEMP0 +#undef MLK_TEMP1 +#undef MLK_VALS +#undef MLK_SHUFFLE_OUT_MASK +#undef MLK_SHUFFLE_IN_MASK +#undef MLK_AND_MASK +#undef MLK_STACK_SIZE /* simpasm: footer-start */ #endif /* MLK_ARITH_BACKEND_X86_64_DEFAULT && !MLK_CONFIG_MULTILEVEL_NO_SHARED \ diff --git a/nix/slothy/default.nix b/nix/slothy/default.nix index d657a3d7ea..bc1ef45091 100644 --- a/nix/slothy/default.nix +++ b/nix/slothy/default.nix @@ -17,12 +17,12 @@ let in stdenvNoCC.mkDerivation rec { pname = "slothy-cli"; - version = "915c224166207ce07b31152194305c3b6687d09b"; + version = "88374fb31f9ed0fc4022eb277ab180dbfeb9fd62"; src = fetchFromGitHub { owner = "slothy-optimizer"; repo = "slothy"; rev = version; - sha256 = "sha256-ebZjm+nhmML/+DZF78eN1ezoxbv5Rrc5kWsh0Ycww4U="; + sha256 = "sha256-tbaOQNCJKeKE9uzyX9drsBOP2g50ehYNXWPGAepCqlA="; }; nativeBuildInputs = [ pkgs.makeWrapper ]; diff --git a/scripts/autogen b/scripts/autogen index 2c6b173643..82a1e3b541 100755 --- a/scripts/autogen +++ b/scripts/autogen @@ -2031,7 +2031,11 @@ def check_macro_typos(): # 1. Makefiles use MLK_SOURCE_XXX to list source files if is_autogen or filename.endswith("/Makefile"): - if m.startswith("MLK_SOURCE") or m.startswith("MLK_OBJ"): + if ( + m.startswith("MLK_SOURCE") + or m.startswith("MLK_OBJ") + or "STACK_LOC" in m + ): return True # 2. libOQS specific identifier