From d5188f862bd366c1bae7dcb2ab917812b751e77c Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Thu, 13 Jun 2024 04:39:48 -0700 Subject: [PATCH] [compiler-rt][RISC-V] ILP32E/LP64E Save/Restore Grouping This changes the save/restore procedures to save/restore registers one by one - to match the stack alignment for the ILP32E/LP64E ABIs, rather than the larger batches of the conventional ABIs. The implementations of the save routines are not tail-shared, to reduce the number of instructions. I think this also helps code size but I need to check this again. I would expect (but haven't measured) that the majority of functions compiled for the ILP32E/LP64E ABIs will in fact use both callee-saved registers, and therefore there are still savings to be had, but I think those can come later, with more data (especially if those changes are just to the instruction sequences we use to save the registers, rather than the number and alignment of how this is done). This is a potential break for all of the ILP32E/LP64E ABI - we may instead have to teach the compiler to emit the CFI information correctly for the grouping we already have implemented (because that grouping matches GCC). It depends on how intentional we think the grouping is in the original ILP32E/LP64E save/restore implementation was, and whether we think we can fix that now. --- compiler-rt/lib/builtins/riscv/restore.S | 45 ++++++++++++++++-------- compiler-rt/lib/builtins/riscv/save.S | 44 ++++++++++++++++------- 2 files changed, 61 insertions(+), 28 deletions(-) diff --git a/compiler-rt/lib/builtins/riscv/restore.S b/compiler-rt/lib/builtins/riscv/restore.S index 6f43842c8ca68..bcda71cd32677 100644 --- a/compiler-rt/lib/builtins/riscv/restore.S +++ b/compiler-rt/lib/builtins/riscv/restore.S @@ -14,9 +14,12 @@ // them falling through into each other and don't want the linker to // accidentally split them up, garbage collect, or reorder them. // -// The entry points are grouped up into 2s for rv64 and 4s for rv32 since this -// is the minimum grouping which will maintain the required 16-byte stack -// alignment. +// For the conventional ABIs, entry points are grouped up into 2s for rv64 and +// 4s for rv32 since this is the minimum grouping which will maintain the +// required 16-byte stack alignment. +// +// For the ilp32e/lp64e abis, entry points are grouped into 1s, since this is +// the minimum grouping which will maintain the required 4-byte stack alignment. .text @@ -92,17 +95,23 @@ __riscv_restore_0: .globl __riscv_restore_2 .type __riscv_restore_2,@function +__riscv_restore_2: + lw s1, 0(sp) + addi sp, sp, 4 + // fallthrough into __riscv_restore_1/0 + .globl __riscv_restore_1 .type __riscv_restore_1,@function +__riscv_restore_1: + lw s0, 0(sp) + addi sp, sp, 4 + // fallthrough into __riscv_restore_0 + .globl __riscv_restore_0 .type __riscv_restore_0,@function -__riscv_restore_2: -__riscv_restore_1: __riscv_restore_0: - lw s1, 0(sp) - lw s0, 4(sp) - lw ra, 8(sp) - addi sp, sp, 12 + lw ra, 0(sp) + addi sp, sp, 4 ret #endif @@ -188,17 +197,23 @@ __riscv_restore_0: .globl __riscv_restore_2 .type __riscv_restore_2,@function +__riscv_restore_2: + ld s1, 0(sp) + addi sp, sp, 8 + // fallthrough into __riscv_restore_1/0 + .globl __riscv_restore_1 .type __riscv_restore_1,@function +__riscv_restore_1: + ld s0, 0(sp) + addi sp, sp, 8 + // fallthrough into __riscv_restore_0 + .globl __riscv_restore_0 .type __riscv_restore_0,@function -__riscv_restore_2: -__riscv_restore_1: __riscv_restore_0: - ld s1, 0(sp) - ld s0, 8(sp) - ld ra, 16(sp) - addi sp, sp, 24 + ld ra, 0(sp) + addi sp, sp, 8 ret #endif diff --git a/compiler-rt/lib/builtins/riscv/save.S b/compiler-rt/lib/builtins/riscv/save.S index 3e044179ff7f1..334d5e3d016cd 100644 --- a/compiler-rt/lib/builtins/riscv/save.S +++ b/compiler-rt/lib/builtins/riscv/save.S @@ -98,19 +98,28 @@ __riscv_save_0: .globl __riscv_save_2 .type __riscv_save_2,@function - .globl __riscv_save_1 - .type __riscv_save_1,@function - .globl __riscv_save_0 - .type __riscv_save_0,@function __riscv_save_2: -__riscv_save_1: -__riscv_save_0: addi sp, sp, -12 sw s1, 0(sp) sw s0, 4(sp) sw ra, 8(sp) jr t0 + .globl __riscv_save_1 + .type __riscv_save_1,@function +__riscv_save_1: + addi sp, sp, -8 + sw s0, 0(sp) + sw ra, 4(sp) + jr t0 + + .globl __riscv_save_0 + .type __riscv_save_0,@function +__riscv_save_0: + addi sp, sp, -4 + sw ra, 0(sp) + jr t0 + #endif #elif __riscv_xlen == 64 @@ -208,18 +217,27 @@ __riscv_save_0: .globl __riscv_save_2 .type __riscv_save_2,@function +__riscv_save_2: + addi sp, sp, -24 + sw s1, 0(sp) + sw s0, 8(sp) + sw ra, 16(sp) + jr t0 + .globl __riscv_save_1 .type __riscv_save_1,@function +__riscv_save_1: + addi sp, sp, -16 + sw s0, 0(sp) + sw ra, 8(sp) + jr t0 + .globl __riscv_save_0 .type __riscv_save_0,@function -__riscv_save_2: -__riscv_save_1: __riscv_save_0: - addi sp, sp, -24 - sd s1, 0(sp) - sd s0, 8(sp) - sd ra, 16(sp) - jr t0 + addi sp, sp, -8 + sw ra, 0(sp) + jr t0 #endif