From da334d363abb1cc4f8592bb69b62c5987168f4f7 Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 21 Nov 2024 11:36:54 +0000 Subject: [PATCH 1/3] Add test showing bug --- .../AArch64/stack-tagging-epilogue-fold.mir | 71 +++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir new file mode 100644 index 0000000000000..00c0bdaabaa4a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir @@ -0,0 +1,71 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -mattr=+mte -run-pass=prologepilog %s -o - | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32" + target triple = "aarch64-arm-none-eabi" + + define void @F76(i32 %0, ...) #0 { + ret void + } + +... +--- +name: F76 +fixedStack: + - { id: 0, type: default, offset: 0, size: 4, alignment: 16, stack-id: default, + isImmutable: true, isAliased: false, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4080, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4080, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: default, offset: 0, size: 32, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4112, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 2, name: '', type: default, offset: 0, size: 3888, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -8000, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 3, name: '', type: default, offset: 0, size: 56, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -8056, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 4, name: '', type: default, offset: 0, size: 128, alignment: 16, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -8192, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } +body: | + bb.0 (%ir-block.1): + liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7 + + ; CHECK-LABEL: name: F76 + ; CHECK: liveins: $q0, $q1, $q2, $q3, $q4, $q5, $q6, $q7, $x1, $x2, $x3, $x4, $x5, $x6, $x7, $fp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5) + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2, 12 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8208 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 + ; CHECK-NEXT: renamable $x8 = IRGstack $sp, $xzr + ; CHECK-NEXT: renamable $x9 = TAGPstack $x8, 2, renamable $x8, 1 + ; CHECK-NEXT: renamable $x10 = COPY renamable $x9 + ; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv + ; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0 + ; CHECK-NEXT: dead early-clobber $x8, early-clobber $sp = frame-destroy STGloop_wback 4096, $sp, implicit-def $nzcv + ; CHECK-NEXT: $sp = frame-destroy STGPostIndex $sp, $sp, 256 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5) + ; CHECK-NEXT: RET_ReallyLR + /* Prologue */ + renamable $x8 = IRGstack $sp, $xzr + renamable $x9 = TAGPstack %stack.0, 0, renamable $x8, 1 + renamable $x10 = COPY renamable $x9 + dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv + ST2Gi renamable $x8, renamable $x8, 0 + + /* Epilogue */ + dead early-clobber renamable $x8, dead early-clobber renamable $x9 = STGloop 4080, %stack.0, implicit-def dead $nzcv + ST2Gi $sp, %stack.1, 0 + RET_ReallyLR +... From 7c959b33f1f8f0ccf5d33730f0bcef5fc3a9f45d Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 21 Nov 2024 11:38:27 +0000 Subject: [PATCH 2/3] [AArch64] Fix range check for STGPostIndex When generating function epilogues using AArch64 stack tagging, we can fold an SP update into the tag-setting loop. The loop tags 32 bytes at a time using ST2G, so the final SP update might be done either by a post indexed STG which tags the final 16 bytes of the tagged region, or by an ADD/SUB instruction after the loop. However, we were only considering the range of the ADD/SUB instructions when deciding whether to do this, and the valid immediate range for STG is slightly lower when the offset is positive. --- .../Target/AArch64/AArch64FrameLowering.cpp | 18 ++++++++++++++---- .../AArch64/stack-tagging-epilogue-fold.mir | 5 +++-- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 216244950ba9e..18acf62596c04 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4393,10 +4393,20 @@ bool canMergeRegUpdate(MachineBasicBlock::iterator II, unsigned Reg, int64_t Offset = MI.getOperand(2).getImm() << Shift; if (MI.getOpcode() == AArch64::SUBXri) Offset = -Offset; - int64_t AbsPostOffset = std::abs(Offset - Size); - const int64_t kMaxOffset = - 0xFFF; // Max encoding for unshifted ADDXri / SUBXri - if (AbsPostOffset <= kMaxOffset && AbsPostOffset % 16 == 0) { + int64_t PostOffset = Offset - Size; + // TagStoreEdit::emitLoop might emit either an ADD/SUB after the loop, or + // an STGPostIndex which does the last 16 bytes of tag write. Which one is + // chosen depends on the alignment of the loop size, but the difference + // between the valid ranges for the two instructions is small, so we + // conservatively assume that it could be either case here. + // + // Max offset of STGPostIndex, minus the 16 byte tag write folded into that + // instruction. + const int64_t kMaxOffset = 4080 - 16; + // Max offset of SUBXri. + const int64_t kMinOffset = -4095; + if (PostOffset <= kMaxOffset && PostOffset >= kMinOffset && + PostOffset % 16 == 0) { *TotalOffset = Offset; return true; } diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir index 00c0bdaabaa4a..bcd716c5c8d6c 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir +++ b/llvm/test/CodeGen/AArch64/stack-tagging-epilogue-fold.mir @@ -53,8 +53,9 @@ body: | ; CHECK-NEXT: renamable $x10 = COPY renamable $x9 ; CHECK-NEXT: dead early-clobber renamable $x11, dead early-clobber renamable $x10 = STGloop_wback 4080, killed renamable $x10, implicit-def dead $nzcv ; CHECK-NEXT: ST2Gi renamable $x8, renamable $x8, 0 - ; CHECK-NEXT: dead early-clobber $x8, early-clobber $sp = frame-destroy STGloop_wback 4096, $sp, implicit-def $nzcv - ; CHECK-NEXT: $sp = frame-destroy STGPostIndex $sp, $sp, 256 + ; CHECK-NEXT: $x9 = ADDXri $sp, 0, 0 + ; CHECK-NEXT: dead early-clobber $x8, dead early-clobber $x9 = STGloop_wback 4112, $x9, implicit-def $nzcv + ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2, 12 ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5) ; CHECK-NEXT: RET_ReallyLR /* Prologue */ From 9b95f600e56b283f7238fa8048d92a4cb80450db Mon Sep 17 00:00:00 2001 From: Oliver Stannard Date: Thu, 21 Nov 2024 11:01:38 +0000 Subject: [PATCH 3/3] [AArch64] Add assertions and debug trace (NFC) --- llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 18acf62596c04..e6057d8a02884 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -4355,26 +4355,37 @@ void TagStoreEdit::emitLoop(MachineBasicBlock::iterator InsertI) { int64_t ExtraBaseRegUpdate = FrameRegUpdate ? (*FrameRegUpdate - FrameRegOffset.getFixed() - Size) : 0; + LLVM_DEBUG(dbgs() << "TagStoreEdit::emitLoop: LoopSize=" << LoopSize + << ", Size=" << Size + << ", ExtraBaseRegUpdate=" << ExtraBaseRegUpdate + << ", FrameRegUpdate=" << FrameRegUpdate + << ", FrameRegOffset.getFixed()=" + << FrameRegOffset.getFixed() << "\n"); if (LoopSize < Size) { assert(FrameRegUpdate); assert(Size - LoopSize == 16); // Tag 16 more bytes at BaseReg and update BaseReg. + int64_t STGOffset = ExtraBaseRegUpdate + 16; + assert(STGOffset % 16 == 0 && STGOffset >= -4096 && STGOffset <= 4080 && + "STG immediate out of range"); BuildMI(*MBB, InsertI, DL, TII->get(ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex)) .addDef(BaseReg) .addReg(BaseReg) .addReg(BaseReg) - .addImm(1 + ExtraBaseRegUpdate / 16) + .addImm(STGOffset / 16) .setMemRefs(CombinedMemRefs) .setMIFlags(FrameRegUpdateFlags); } else if (ExtraBaseRegUpdate) { // Update BaseReg. + int64_t AddSubOffset = std::abs(ExtraBaseRegUpdate); + assert(AddSubOffset <= 4095 && "ADD/SUB immediate out of range"); BuildMI( *MBB, InsertI, DL, TII->get(ExtraBaseRegUpdate > 0 ? AArch64::ADDXri : AArch64::SUBXri)) .addDef(BaseReg) .addReg(BaseReg) - .addImm(std::abs(ExtraBaseRegUpdate)) + .addImm(AddSubOffset) .addImm(0) .setMIFlags(FrameRegUpdateFlags); }