Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions dev/aarch64_clean/src/rej_uniform_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
// We save the output on the stack first, and copy to the actual
// output buffer only in the end. This is because the main loop can overwrite
// by up to 62 bytes, which we account for here (we use 64 bytes for alignment).
#define STACK_SIZE (2*MLKEM_N + 64)
#define MLK_STACK_SIZE (2*MLKEM_N + 64)

.macro push_stack
sub sp, sp, #STACK_SIZE
sub sp, sp, #MLK_STACK_SIZE
.endm

.macro pop_stack
add sp, sp, #STACK_SIZE
add sp, sp, #MLK_STACK_SIZE
.endm

/* Parameters */
Expand Down Expand Up @@ -458,7 +458,7 @@ rej_uniform_return:

/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
#undef STACK_SIZE
#undef MLK_STACK_SIZE

/* simpasm: footer-start */
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
8 changes: 4 additions & 4 deletions dev/aarch64_opt/src/rej_uniform_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,14 @@
// We save the output on the stack first, and copy to the actual
// output buffer only in the end. This is because the main loop can overwrite
// by up to 62 bytes, which we account for here (we use 64 bytes for alignment).
#define STACK_SIZE (2*MLKEM_N + 64)
#define MLK_STACK_SIZE (2*MLKEM_N + 64)

.macro push_stack
sub sp, sp, #STACK_SIZE
sub sp, sp, #MLK_STACK_SIZE
.endm

.macro pop_stack
add sp, sp, #STACK_SIZE
add sp, sp, #MLK_STACK_SIZE
.endm

/* Parameters */
Expand Down Expand Up @@ -458,7 +458,7 @@ rej_uniform_return:

/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
#undef STACK_SIZE
#undef MLK_STACK_SIZE

/* simpasm: footer-start */
#endif /* MLK_ARITH_BACKEND_AARCH64 && !MLK_CONFIG_MULTILEVEL_NO_SHARED */
16 changes: 11 additions & 5 deletions dev/fips202/aarch64/src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb

slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$^ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c inputs_are_outputs \
-c variable_size \
Expand All @@ -23,10 +24,11 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb

slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$@ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c variable_size \
-c inputs_are_outputs \
-c outputs="[hint_STACK_LOC_COUNT]" \
-c outputs="[hint_MLK_STACK_LOC_COUNT]" \
-c constraints.stalls_first_attempt=64 \
-c constraints.allow_spills \
-c constraints.minimize_spills \
Expand All @@ -38,6 +40,7 @@ keccak_f1600_x1_scalar_asm.S: ../../aarch64_symbolic/keccak_f1600_x1_scalar_symb
keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_v8a_scalar_hybrid_clean.S
slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$^ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c inputs_are_outputs \
-c variable_size \
Expand All @@ -48,12 +51,13 @@ keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_
-c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \
-c split_heuristic_estimate_performance=False \
-c absorb_spills=False \
-c outputs="[hint_STACK_OFFSET_COUNT]" \
-c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \
-s keccak_f1600_x4_v8a_scalar_hybrid_initial \
-e keccak_f1600_x4_v8a_scalar_hybrid_loop

slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$@ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c inputs_are_outputs \
-c variable_size \
Expand All @@ -63,14 +67,15 @@ keccak_f1600_x4_v8a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_
-c split_heuristic_preprocess_naive_interleaving \
-c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \
-c split_heuristic_estimate_performance=False \
-c outputs="[hint_STACK_OFFSET_COUNT]" \
-c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \
-c absorb_spills=False \
-s keccak_f1600_x4_v8a_scalar_hybrid_loop \
-e keccak_f1600_x4_v8a_scalar_hybrid_loop_end

keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f1600_x4_v8a_v84a_scalar_hybrid_clean.S
slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$^ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c inputs_are_outputs \
-c variable_size \
Expand All @@ -81,12 +86,13 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f160
-c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \
-c split_heuristic_estimate_performance=False \
-c absorb_spills=False \
-c outputs="[hint_STACK_OFFSET_COUNT]" \
-c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \
-s keccak_f1600_x4_v8a_v84a_scalar_hybrid_initial \
-e keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop

slothy-cli Arm_AArch64 Arm_Cortex_A55 \
$@ -o $@ \
-c constraints.spill_stack_loc_prefix=MLK_STACK_LOC \
-c reserved_regs="[x18,sp]" \
-c inputs_are_outputs \
-c variable_size \
Expand All @@ -96,7 +102,7 @@ keccak_f1600_x4_v8a_v84a_scalar_hybrid_asm.S: ../../aarch64_symbolic/keccak_f160
-c split_heuristic_preprocess_naive_interleaving \
-c split_heuristic_preprocess_naive_interleaving_strategy="alternate" \
-c split_heuristic_estimate_performance=False \
-c outputs="[hint_STACK_OFFSET_COUNT]" \
-c outputs="[hint_MLK_STACK_OFFSET_COUNT]" \
-c absorb_spills=False \
-s keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop \
-e keccak_f1600_x4_v8a_v84a_scalar_hybrid_loop_end
Expand Down
86 changes: 43 additions & 43 deletions dev/fips202/aarch64/src/keccak_f1600_x1_scalar_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -71,39 +71,39 @@

/************************ MACROS ****************************/

#define STACK_LOCS 4
#define MLK_STACK_LOCS 4

#define STACK_SIZE (16*6 + (STACK_LOCS) * 8)
#define STACK_BASE_GPRS (3*8+8)
#define STACK_LOC_INPUT (0*8)
#define STACK_LOC_CONST (1*8)
#define STACK_LOC_COUNT (2*8)
#define STACK_LOC_MISC0 (3*8)
#define MLK_STACK_SIZE (16*6 + (MLK_STACK_LOCS) * 8)
#define MLK_STACK_BASE_GPRS (3*8+8)
#define MLK_STACK_LOC_INPUT (0*8)
#define MLK_STACK_LOC_CONST (1*8)
#define MLK_STACK_LOC_COUNT (2*8)
#define MLK_STACK_LOC_MISC0 (3*8)

.macro alloc_stack
sub sp, sp, #(STACK_SIZE)
sub sp, sp, #(MLK_STACK_SIZE)
.endm

.macro free_stack
add sp, sp, #(STACK_SIZE)
add sp, sp, #(MLK_STACK_SIZE)
.endm

.macro save_gprs
stp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)]
stp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)]
stp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)]
stp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)]
stp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)]
stp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)]
stp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)]
stp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)]
stp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)]
stp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)]
stp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)]
stp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)]
.endm

.macro restore_gprs
ldp x19, x20, [sp, #(STACK_BASE_GPRS + 16*0)]
ldp x21, x22, [sp, #(STACK_BASE_GPRS + 16*1)]
ldp x23, x24, [sp, #(STACK_BASE_GPRS + 16*2)]
ldp x25, x26, [sp, #(STACK_BASE_GPRS + 16*3)]
ldp x27, x28, [sp, #(STACK_BASE_GPRS + 16*4)]
ldp x29, x30, [sp, #(STACK_BASE_GPRS + 16*5)]
ldp x19, x20, [sp, #(MLK_STACK_BASE_GPRS + 16*0)]
ldp x21, x22, [sp, #(MLK_STACK_BASE_GPRS + 16*1)]
ldp x23, x24, [sp, #(MLK_STACK_BASE_GPRS + 16*2)]
ldp x25, x26, [sp, #(MLK_STACK_BASE_GPRS + 16*3)]
ldp x27, x28, [sp, #(MLK_STACK_BASE_GPRS + 16*4)]
ldp x29, x30, [sp, #(MLK_STACK_BASE_GPRS + 16*5)]
.endm

.macro load_state
Expand Down Expand Up @@ -164,7 +164,7 @@
ror Asu, Asu,#(64-55)
.endm

#define KECCAK_F1600_ROUNDS 24
#define MLK_KECCAK_F1600_ROUNDS 24

.text
.global MLK_ASM_NAMESPACE(keccak_f1600_x1_scalar_asm)
Expand All @@ -175,9 +175,9 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x1_scalar_asm)

keccak_f1600_x1_scalar_initial:
mov const_addr, input_rc
str input_rc, [sp, #STACK_LOC_CONST]
str input_rc, [sp, #MLK_STACK_LOC_CONST]
load_state
str input_addr, [sp, #STACK_LOC_INPUT] // @slothy:writes=STACK_LOC_INPUT
str input_addr, [sp, #MLK_STACK_LOC_INPUT] // @slothy:writes=MLK_STACK_LOC_INPUT

// (Optimized for Cortex-A55)
// Instructions: 107
Expand Down Expand Up @@ -209,7 +209,7 @@ keccak_f1600_x1_scalar_initial:
eor x30, x30, x29, ror #63 // .........*............................................
eor x22, x22, x30 // ..........*...........................................
eor x23, x23, x30 // ..........*...........................................
str x23, [sp, #STACK_LOC_MISC0] // ...........*..........................................
str x23, [sp, #MLK_STACK_LOC_MISC0] // ...........*..........................................
eor x23, x14, x15 // ...........*..........................................
eor x14, x14, x0 // ............*.........................................
eor x23, x23, x11 // ............*.........................................
Expand Down Expand Up @@ -237,7 +237,7 @@ keccak_f1600_x1_scalar_initial:
eor x12, x3, x27 // ........................*.............................
bic x3, x13, x17, ror #19 // ........................*.............................
eor x5, x5, x27 // .........................*............................
ldr x27, [sp, #STACK_LOC_MISC0] // .........................*............................
ldr x27, [sp, #MLK_STACK_LOC_MISC0] // .........................*............................
bic x25, x17, x2, ror #5 // ..........................*...........................
eor x9, x9, x29 // ..........................*...........................
eor x23, x25, x5, ror #52 // ...........................*..........................
Expand Down Expand Up @@ -267,12 +267,12 @@ keccak_f1600_x1_scalar_initial:
eor x12, x15, x12, ror #58 // .......................................*..............
eor x15, x5, x27, ror #27 // .......................................*..............
eor x5, x20, x11, ror #41 // ........................................*.............
ldr x11, [sp, #STACK_LOC_CONST] // ........................................*.............
ldr x11, [sp, #MLK_STACK_LOC_CONST] // ........................................*.............
eor x20, x17, x4, ror #21 // .........................................*............
eor x17, x24, x9, ror #47 // .........................................*............
mov x24, #1 // ..........................................*...........
bic x9, x0, x16, ror #9 // ..........................................*...........
str x24, [sp, #STACK_LOC_COUNT] // ...........................................*..........
str x24, [sp, #MLK_STACK_LOC_COUNT] // ...........................................*..........
bic x24, x29, x1, ror #44 // ...........................................*..........
bic x27, x1, x21, ror #50 // ............................................*.........
bic x4, x26, x29, ror #63 // ............................................*.........
Expand Down Expand Up @@ -323,7 +323,7 @@ keccak_f1600_x1_scalar_initial:
ror x26, x26, #58 // ........*................................................
eor x16, x30, x16 // .........*...............................................
eor x28, x30, x28, ror #63 // .........*...............................................
str x28, [sp, #STACK_LOC_MISC0] // ..........*..............................................
str x28, [sp, #MLK_STACK_LOC_MISC0] // ..........*..............................................
eor x29, x29, x17, ror #36 // ..........*..............................................
eor x28, x1, x2, ror #61 // ...........*.............................................
eor x19, x30, x19, ror #37 // ...........*.............................................
Expand Down Expand Up @@ -377,9 +377,9 @@ keccak_f1600_x1_scalar_initial:
eor x16, x21, x19, ror #43 // ....................................*....................
eor x21, x17, x25, ror #30 // ....................................*....................
bic x19, x25, x19, ror #57 // .....................................*...................
ldr x25, [sp, #STACK_LOC_COUNT] // .....................................*...................
ldr x25, [sp, #MLK_STACK_LOC_COUNT] // .....................................*...................
eor x17, x10, x9, ror #47 // ......................................*..................
ldr x9, [sp, #STACK_LOC_CONST] // ......................................*..................
ldr x9, [sp, #MLK_STACK_LOC_CONST] // ......................................*..................
eor x15, x20, x28, ror #27 // .......................................*.................
bic x20, x4, x28, ror #2 // .......................................*.................
eor x10, x20, x1, ror #50 // ........................................*................
Expand All @@ -388,10 +388,10 @@ keccak_f1600_x1_scalar_initial:
bic x4, x28, x1, ror #48 // .........................................*...............
bic x1, x1, x11, ror #57 // ..........................................*..............
ldr x28, [x9, x25, LSL #3] // ..........................................*..............
ldr x9, [sp, #STACK_LOC_MISC0] // ...........................................*.............
ldr x9, [sp, #MLK_STACK_LOC_MISC0] // ...........................................*.............
add x25, x25, #1 // ...........................................*.............
str x25, [sp, #STACK_LOC_COUNT] // ............................................*............
cmp x25, #(KECCAK_F1600_ROUNDS-1) // ............................................*............
str x25, [sp, #MLK_STACK_LOC_COUNT] // ............................................*............
cmp x25, #(MLK_KECCAK_F1600_ROUNDS-1) // ............................................*............
eor x25, x1, x27, ror #53 // .............................................*...........
bic x27, x30, x26, ror #47 // .............................................*...........
eor x1, x5, x28 // ..............................................*..........
Expand Down Expand Up @@ -419,7 +419,7 @@ keccak_f1600_x1_scalar_initial:
ble keccak_f1600_x1_scalar_loop

final_rotate
ldr input_addr, [sp, #STACK_LOC_INPUT]
ldr input_addr, [sp, #MLK_STACK_LOC_INPUT]
store_state

restore_gprs
Expand Down Expand Up @@ -458,14 +458,14 @@ keccak_f1600_x1_scalar_initial:

/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
#undef STACK_LOCS
#undef STACK_SIZE
#undef STACK_BASE_GPRS
#undef STACK_LOC_INPUT
#undef STACK_LOC_CONST
#undef STACK_LOC_COUNT
#undef STACK_LOC_MISC0
#undef KECCAK_F1600_ROUNDS
#undef MLK_STACK_LOCS
#undef MLK_STACK_SIZE
#undef MLK_STACK_BASE_GPRS
#undef MLK_STACK_LOC_INPUT
#undef MLK_STACK_LOC_CONST
#undef MLK_STACK_LOC_COUNT
#undef MLK_STACK_LOC_MISC0
#undef MLK_KECCAK_F1600_ROUNDS

/* simpasm: footer-start */
#endif /* MLK_FIPS202_AARCH64_NEED_X1_SCALAR && \
Expand Down
18 changes: 9 additions & 9 deletions dev/fips202/aarch64/src/keccak_f1600_x1_v84a_asm.S
Original file line number Diff line number Diff line change
Expand Up @@ -216,15 +216,15 @@
str Asud, [input_addr, #0xC0]
.endm

#define STACK_SIZE (16*4) /* VREGS (16*4) */
#define MLK_STACK_SIZE (16*4) /* VREGS (16*4) */

#define STACK_BASE_GPRS (16*4)
#define MLK_STACK_BASE_GPRS (16*4)
.macro alloc_stack
sub sp, sp, #(STACK_SIZE)
sub sp, sp, #(MLK_STACK_SIZE)
.endm

.macro free_stack
add sp, sp, #(STACK_SIZE)
add sp, sp, #(MLK_STACK_SIZE)
.endm

.macro save_vregs
Expand Down Expand Up @@ -339,7 +339,7 @@

.endm

#define KECCAK_F1600_ROUNDS 24
#define MLK_KECCAK_F1600_ROUNDS 24

.text
.global MLK_ASM_NAMESPACE(keccak_f1600_x1_v84a_asm)
Expand All @@ -349,7 +349,7 @@ MLK_ASM_FN_SYMBOL(keccak_f1600_x1_v84a_asm)
save_vregs
load_input

mov count, #(KECCAK_F1600_ROUNDS)
mov count, #(MLK_KECCAK_F1600_ROUNDS)
keccak_f1600_x1_v84a_loop:
keccak_f1600_round
sub count, count, #1
Expand Down Expand Up @@ -478,9 +478,9 @@ keccak_f1600_x1_v84a_loop:

/* To facilitate single-compilation-unit (SCU) builds, undefine all macros.
* Don't modify by hand -- this is auto-generated by scripts/autogen. */
#undef STACK_SIZE
#undef STACK_BASE_GPRS
#undef KECCAK_F1600_ROUNDS
#undef MLK_STACK_SIZE
#undef MLK_STACK_BASE_GPRS
#undef MLK_KECCAK_F1600_ROUNDS

/* simpasm: footer-start */
#endif /* __ARM_FEATURE_SHA3 */
Expand Down
Loading