From 27c6b10a95f3056e24ae5381f21233447c1816f3 Mon Sep 17 00:00:00 2001 From: XChy Date: Mon, 15 Sep 2025 18:16:50 +0800 Subject: [PATCH 1/9] [AArch64] Avoid apply S-form on frame index in peephole --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 5 + llvm/test/CodeGen/AArch64/pr157252.ll | 96 ++++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e56fe90259d5c..2c09710831808 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1920,6 +1920,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, CmpInstr.getOperand(2).getImm() == 0) && "Caller guarantees that CmpInstr compares with constant 0"); + // NZCV is not supported if the stack offset is scalable. + auto &ST = MI.getParent()->getParent()->getSubtarget(); + if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI()) + return false; + std::optional NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI); if (!NZVCUsed || NZVCUsed->C) return false; diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll new file mode 100644 index 0000000000000..c3b296a795157 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr157252.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +define void @i(ptr %ad, ptr %0) #0 { +; CHECK-LABEL: i: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w28, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: // %bb.1: // %asm.fallthrough +; CHECK-NEXT: .LBB0_2: // Inline asm indirect target +; CHECK-NEXT: // %ah.preheader.preheader +; CHECK-NEXT: // Label of block must be emitted +; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7 +; CHECK-NEXT: mov x9, #35417 // =0x8a59 +; CHECK-NEXT: mov w19, #1 // =0x1 +; CHECK-NEXT: movk x8, #29436, lsl #16 +; CHECK-NEXT: movk x9, #36099, lsl #16 +; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: movk x8, #64591, lsl #32 +; CHECK-NEXT: movk x9, #944, lsl #32 +; CHECK-NEXT: index z0.d, x9, x8 +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: .LBB0_3: // Inline asm indirect target +; CHECK-NEXT: // %ah +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: // Label of block must be emitted +; CHECK-NEXT: sub x9, x29, #16 +; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: // %bb.4: // %asm.fallthrough2 +; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: bl g +; CHECK-NEXT: add x8, sp, #28 +; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: strb w19, [x10] +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: b .LBB0_3 +entry: + %aj = alloca i32, align 4 + callbr void asm sideeffect "", "!i,!i"() + to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader] + +ah.preheader.preheader: ; preds = %entry, %entry + %conv = xor i8 0, 1 + br label %ah + +asm.fallthrough: ; preds = %entry + unreachable + +ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader + %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ , %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ] + %vecext = extractelement <8 x i64> %af.2, i64 0 + store i64 %vecext, ptr %ad, align 8 + call void asm sideeffect "", "~{v11}"() + callbr void asm sideeffect "", "!i"() + to label %asm.fallthrough2 [label %ah] + +asm.fallthrough2: ; preds = %ah + %call = call i32 @g() + store i8 %conv, ptr %0, align 1 + %cmp = icmp ne ptr %aj, null + %conv3 = zext i1 %cmp to i32 + store i32 %conv3, ptr %ad, align 4 + callbr void asm sideeffect "", "!i"() + to label %ah [label %ah] +} + +declare i32 @g(...) + +attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" } From a30b328fbbe1c055c97bfcb40aed7d1fb88417ba Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 01:48:50 +0800 Subject: [PATCH 2/9] Unfold adds --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 57 +++++-- llvm/test/CodeGen/AArch64/pr157252.ll | 96 ------------ llvm/test/CodeGen/AArch64/pr157252.mir | 154 +++++++++++++++++++ 3 files changed, 197 insertions(+), 110 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/pr157252.ll create mode 100644 llvm/test/CodeGen/AArch64/pr157252.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 2c09710831808..17a9999495602 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1920,11 +1920,6 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, CmpInstr.getOperand(2).getImm() == 0) && "Caller guarantees that CmpInstr compares with constant 0"); - // NZCV is not supported if the stack offset is scalable. - auto &ST = MI.getParent()->getParent()->getSubtarget(); - if ((ST.hasSVE() || ST.isStreaming()) && MI.getOperand(1).isFI()) - return false; - std::optional NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI); if (!NZVCUsed || NZVCUsed->C) return false; @@ -6569,18 +6564,52 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, (SOffset ? 0 : AArch64FrameOffsetIsLegal); } +// Unfold ADDSXri: +// adds %dest, %stack, c +// --> +// add %dest, %stack, 0 +// adds %dest, %dest, c +static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg, + const AArch64InstrInfo *TII) { + auto *MBB = MI.getParent(); + Register DestReg = MI.getOperand(0).getReg(); + + auto *Unfolded = + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg) + .addReg(FrameReg) + .addImm(0) + .addImm(0) + .getInstr(); + + BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg) + .addReg(DestReg) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()); + + MI.eraseFromParent(); + Unfolded->getParent()->dump(); + return Unfolded; +} + bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; + MachineInstr *NewMI = &MI; + if (Opcode == AArch64::ADDSXri && Offset.getScalable()) { + NewMI = unfoldAddXri(MI, FrameReg, TII); + Opcode = AArch64::ADDXri; + } + if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); - emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), - MI.getOperand(0).getReg(), FrameReg, Offset, TII, - MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); - MI.eraseFromParent(); + Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm()); + emitFrameOffset(*NewMI->getParent(), *NewMI, + NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(), + FrameReg, Offset, TII, MachineInstr::NoFlags, + (Opcode == AArch64::ADDSXri)); + NewMI->eraseFromParent(); Offset = StackOffset(); return true; } @@ -6588,16 +6617,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, int64_t NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; - int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, + int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp, &UnscaledOp, &NewOffset); if (Status & AArch64FrameOffsetCanUpdate) { if (Status & AArch64FrameOffsetIsLegal) // Replace the FrameIndex with FrameReg. - MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); if (UseUnscaledOp) - MI.setDesc(TII->get(UnscaledOp)); + NewMI->setDesc(TII->get(UnscaledOp)); - MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); + NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset); return !Offset; } diff --git a/llvm/test/CodeGen/AArch64/pr157252.ll b/llvm/test/CodeGen/AArch64/pr157252.ll deleted file mode 100644 index c3b296a795157..0000000000000 --- a/llvm/test/CodeGen/AArch64/pr157252.ll +++ /dev/null @@ -1,96 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=aarch64 < %s | FileCheck %s - -define void @i(ptr %ad, ptr %0) #0 { -; CHECK-LABEL: i: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d11, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: add x29, sp, #16 -; CHECK-NEXT: stp x28, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: .cfi_def_cfa w29, 32 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w28, -16 -; CHECK-NEXT: .cfi_offset w30, -24 -; CHECK-NEXT: .cfi_offset w29, -32 -; CHECK-NEXT: .cfi_offset b11, -48 -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: // %bb.1: // %asm.fallthrough -; CHECK-NEXT: .LBB0_2: // Inline asm indirect target -; CHECK-NEXT: // %ah.preheader.preheader -; CHECK-NEXT: // Label of block must be emitted -; CHECK-NEXT: mov x8, #-35417 // =0xffffffffffff75a7 -; CHECK-NEXT: mov x9, #35417 // =0x8a59 -; CHECK-NEXT: mov w19, #1 // =0x1 -; CHECK-NEXT: movk x8, #29436, lsl #16 -; CHECK-NEXT: movk x9, #36099, lsl #16 -; CHECK-NEXT: stp x1, x0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: movk x8, #64591, lsl #32 -; CHECK-NEXT: movk x9, #944, lsl #32 -; CHECK-NEXT: index z0.d, x9, x8 -; CHECK-NEXT: sub x8, x29, #16 -; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: .LBB0_3: // Inline asm indirect target -; CHECK-NEXT: // %ah -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: // Label of block must be emitted -; CHECK-NEXT: sub x9, x29, #16 -; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: ldr z0, [x9, #-1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: str d0, [x8] -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: sub x8, x29, #16 -; CHECK-NEXT: str z0, [x8, #-1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: // %bb.4: // %asm.fallthrough2 -; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: bl g -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: ldp x10, x8, [sp] // 16-byte Folded Reload -; CHECK-NEXT: cset w9, ne -; CHECK-NEXT: strb w19, [x10] -; CHECK-NEXT: str w9, [x8] -; CHECK-NEXT: //APP -; CHECK-NEXT: //NO_APP -; CHECK-NEXT: b .LBB0_3 -entry: - %aj = alloca i32, align 4 - callbr void asm sideeffect "", "!i,!i"() - to label %asm.fallthrough [label %ah.preheader.preheader, label %ah.preheader.preheader] - -ah.preheader.preheader: ; preds = %entry, %entry - %conv = xor i8 0, 1 - br label %ah - -asm.fallthrough: ; preds = %entry - unreachable - -ah: ; preds = %asm.fallthrough2, %asm.fallthrough2, %ah, %ah.preheader.preheader - %af.2 = phi <8 x i64> [ zeroinitializer, %asm.fallthrough2 ], [ , %ah.preheader.preheader ], [ zeroinitializer, %asm.fallthrough2 ], [ zeroinitializer, %ah ] - %vecext = extractelement <8 x i64> %af.2, i64 0 - store i64 %vecext, ptr %ad, align 8 - call void asm sideeffect "", "~{v11}"() - callbr void asm sideeffect "", "!i"() - to label %asm.fallthrough2 [label %ah] - -asm.fallthrough2: ; preds = %ah - %call = call i32 @g() - store i8 %conv, ptr %0, align 1 - %cmp = icmp ne ptr %aj, null - %conv3 = zext i1 %cmp to i32 - store i32 %conv3, ptr %ad, align 4 - callbr void asm sideeffect "", "!i"() - to label %ah [label %ah] -} - -declare i32 @g(...) - -attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" } diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir new file mode 100644 index 0000000000000..6e7938709c3e1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/pr157252.mir @@ -0,0 +1,154 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s +--- | + define void @i(ptr %ad, ptr %0) #0 { + entry: + ret void + } + declare i32 @g(...) + attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" } +... +--- +name: i +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +noPhis: true +isSSA: false +noVRegs: true +hasFakeUses: false +callsEHReturn: false +callsUnwindInit: false +hasEHContTarget: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: true + hasCalls: true + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + isCalleeSavedInfoValid: false + localFrameSize: 0 +fixedStack: [] +stack: + - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + local-offset: -4, debug-info-variable: '', debug-info-expression: '', + debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, + stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: i + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) + ; CHECK-NEXT: liveins: $x0, $x1, $d11, $lr, $x19, $x28 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8) + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6) + ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4) + ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0 + ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48 + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target): + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w19 = MOVi32imm 1 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target): + ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000) + ; CHECK-NEXT: liveins: $w19 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $w19 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: $x8 = ADDXri $sp, 28, 0 + ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg + ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: B %bb.3 + bb.0: + successors: %bb.2(0x80000000), %bb.1(0x00000000) + liveins: $x0, $x1 + + B %bb.2 + + bb.1 (inlineasm-br-indirect-target): + successors: %bb.3(0x80000000) + liveins: $x0, $x1 + + renamable $w19 = MOVi32imm 1 + B %bb.3 + + bb.2: + successors: + + bb.3 (inlineasm-br-indirect-target): + successors: %bb.4(0x80000000), %bb.3(0x00000000) + liveins: $w19 + + INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 + B %bb.4 + + bb.4: + successors: %bb.3(0x80000000) + liveins: $w19 + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv + B %bb.3 +... From 18c3c7b7e0190773007efc9b9a9482df1df62cb6 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 01:53:03 +0800 Subject: [PATCH 3/9] format --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 17a9999495602..26773fbb48e41 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6605,10 +6605,9 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm()); - emitFrameOffset(*NewMI->getParent(), *NewMI, - NewMI->getDebugLoc(), NewMI->getOperand(0).getReg(), - FrameReg, Offset, TII, MachineInstr::NoFlags, - (Opcode == AArch64::ADDSXri)); + emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(), + NewMI->getOperand(0).getReg(), FrameReg, Offset, TII, + MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); NewMI->eraseFromParent(); Offset = StackOffset(); return true; From a4b824a101d310b36cc62992ee802514623df1a6 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 01:53:28 +0800 Subject: [PATCH 4/9] Remove debugging --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 26773fbb48e41..3f468734a9c2f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6587,7 +6587,6 @@ static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg, .addImm(MI.getOperand(3).getImm()); MI.eraseFromParent(); - Unfolded->getParent()->dump(); return Unfolded; } From f2fa5b4de87e1c43fd172a42ca7af1c9706c73ba Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 13:16:09 +0800 Subject: [PATCH 5/9] rename --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3f468734a9c2f..0ee6dd665df57 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6569,7 +6569,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, // --> // add %dest, %stack, 0 // adds %dest, %dest, c -static MachineInstr *unfoldAddXri(MachineInstr &MI, unsigned FrameReg, +static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg, const AArch64InstrInfo *TII) { auto *MBB = MI.getParent(); Register DestReg = MI.getOperand(0).getReg(); @@ -6598,7 +6598,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, MachineInstr *NewMI = &MI; if (Opcode == AArch64::ADDSXri && Offset.getScalable()) { - NewMI = unfoldAddXri(MI, FrameReg, TII); + NewMI = unfoldAddSXri(MI, FrameReg, TII); Opcode = AArch64::ADDXri; } From 66b32561e8bba3408fd7eb9a73a3bd68c672edd5 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 15:52:49 +0800 Subject: [PATCH 6/9] format --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0ee6dd665df57..3305f187976e9 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6570,7 +6570,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, // add %dest, %stack, 0 // adds %dest, %dest, c static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg, - const AArch64InstrInfo *TII) { + const AArch64InstrInfo *TII) { auto *MBB = MI.getParent(); Register DestReg = MI.getOperand(0).getReg(); From ebbd0ce7a1a804e4e4a23f59bd96fef6236e36e3 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 21:06:12 +0800 Subject: [PATCH 7/9] resolve comments --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 61 ++++------ llvm/test/CodeGen/AArch64/pr157252.mir | 111 +++++-------------- 2 files changed, 49 insertions(+), 123 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3305f187976e9..f06c2030f20b7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6263,6 +6263,11 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, AArch64InstrInfo::decomposeStackOffsetForFrameOffsets( Offset, Bytes, NumPredicateVectors, NumDataVectors); + // Insert ADDSXri for scalable offset at the end. + bool NeedInsertADDS = SetNZCV && (NumPredicateVectors || NumDataVectors); + if (NeedInsertADDS) + SetNZCV = false; + // First emit non-scalable frame offsets, or a simple 'mov'. if (Bytes || (!Offset && SrcReg != DestReg)) { assert((DestReg != AArch64::SP || Bytes % 8 == 0) && @@ -6282,8 +6287,6 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, FrameReg = DestReg; } - assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) && - "SetNZCV not supported with SVE vectors"); assert(!(NeedsWinCFI && NumPredicateVectors) && "WinCFI can't allocate fractions of an SVE data vector"); @@ -6303,6 +6306,12 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset, FrameReg); } + + if (NeedInsertADDS) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg) + .addReg(DestReg) + .addImm(0) + .addImm(0); } MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( @@ -6564,50 +6573,18 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, (SOffset ? 0 : AArch64FrameOffsetIsLegal); } -// Unfold ADDSXri: -// adds %dest, %stack, c -// --> -// add %dest, %stack, 0 -// adds %dest, %dest, c -static MachineInstr *unfoldAddSXri(MachineInstr &MI, unsigned FrameReg, - const AArch64InstrInfo *TII) { - auto *MBB = MI.getParent(); - Register DestReg = MI.getOperand(0).getReg(); - - auto *Unfolded = - BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDXri), DestReg) - .addReg(FrameReg) - .addImm(0) - .addImm(0) - .getInstr(); - - BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(AArch64::ADDSXri), DestReg) - .addReg(DestReg) - .addImm(MI.getOperand(2).getImm()) - .addImm(MI.getOperand(3).getImm()); - - MI.eraseFromParent(); - return Unfolded; -} - bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII) { unsigned Opcode = MI.getOpcode(); unsigned ImmIdx = FrameRegIdx + 1; - MachineInstr *NewMI = &MI; - if (Opcode == AArch64::ADDSXri && Offset.getScalable()) { - NewMI = unfoldAddSXri(MI, FrameReg, TII); - Opcode = AArch64::ADDXri; - } - if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) { - Offset += StackOffset::getFixed(NewMI->getOperand(ImmIdx).getImm()); - emitFrameOffset(*NewMI->getParent(), *NewMI, NewMI->getDebugLoc(), - NewMI->getOperand(0).getReg(), FrameReg, Offset, TII, + Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm()); + emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(), + MI.getOperand(0).getReg(), FrameReg, Offset, TII, MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri)); - NewMI->eraseFromParent(); + MI.eraseFromParent(); Offset = StackOffset(); return true; } @@ -6615,16 +6592,16 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, int64_t NewOffset; unsigned UnscaledOp; bool UseUnscaledOp; - int Status = isAArch64FrameOffsetLegal(*NewMI, Offset, &UseUnscaledOp, + int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp, &NewOffset); if (Status & AArch64FrameOffsetCanUpdate) { if (Status & AArch64FrameOffsetIsLegal) // Replace the FrameIndex with FrameReg. - NewMI->getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); + MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); if (UseUnscaledOp) - NewMI->setDesc(TII->get(UnscaledOp)); + MI.setDesc(TII->get(UnscaledOp)); - NewMI->getOperand(ImmIdx).ChangeToImmediate(NewOffset); + MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset); return !Offset; } diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir index 6e7938709c3e1..b1956cc83a434 100644 --- a/llvm/test/CodeGen/AArch64/pr157252.mir +++ b/llvm/test/CodeGen/AArch64/pr157252.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=aarch64 -run-pass prologepilog -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -o - %s | FileCheck %s --- | define void @i(ptr %ad, ptr %0) #0 { entry: @@ -59,12 +59,6 @@ stack: stack-id: default, callee-saved-register: '', callee-saved-restored: true, local-offset: -4, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 1, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - - { id: 2, name: '', type: spill-slot, offset: 0, size: 8, alignment: 8, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } @@ -74,81 +68,36 @@ debugValueSubstitutions: [] constants: [] machineFunctionInfo: {} body: | - ; CHECK-LABEL: name: i - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x80000000), %bb.1(0x00000000) - ; CHECK-NEXT: liveins: $x0, $x1, $d11, $lr, $x19, $x28 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.8) - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.7), (store (s64) into %stack.6) - ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x19, $sp, 4 :: (store (s64) into %stack.5), (store (s64) into %stack.4) - ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0 - ; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0 - ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w19, -8 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48 - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1 (inlineasm-br-indirect-target): - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w19 = MOVi32imm 1 - ; CHECK-NEXT: B %bb.3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3 (inlineasm-br-indirect-target): - ; CHECK-NEXT: successors: %bb.4(0x80000000), %bb.3(0x00000000) - ; CHECK-NEXT: liveins: $w19 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 - ; CHECK-NEXT: B %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $w19 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: $x8 = ADDXri $sp, 28, 0 - ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg - ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv - ; CHECK-NEXT: B %bb.3 bb.0: - successors: %bb.2(0x80000000), %bb.1(0x00000000) - liveins: $x0, $x1 - - B %bb.2 - - bb.1 (inlineasm-br-indirect-target): - successors: %bb.3(0x80000000) - liveins: $x0, $x1 - - renamable $w19 = MOVi32imm 1 - B %bb.3 - - bb.2: - successors: - - bb.3 (inlineasm-br-indirect-target): - successors: %bb.4(0x80000000), %bb.3(0x00000000) - liveins: $w19 - + ; CHECK-LABEL: name: i + ; CHECK: liveins: $d11, $lr, $x20, $x28 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.6) + ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.5), (store (s64) into %stack.4) + ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x20, $sp, 4 :: (store (s64) into %stack.3), (store (s64) into %stack.2) + ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0 + ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48 + ; CHECK-NEXT: $w20 = MOVi32imm 1 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0 + ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg + ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv + ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg + ; CHECK-NEXT: $x28, $x20 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.3), (load (s64) from %stack.2) + ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4) + ; CHECK-NEXT: early-clobber $sp, $d11 = frame-destroy LDRDpost $sp, 48 :: (load (s64) from %stack.6) + ; CHECK-NEXT: RET_ReallyLR + $w20 = MOVi32imm 1 INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 - B %bb.4 - - bb.4: - successors: %bb.3(0x80000000) - liveins: $w19 - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - dead renamable $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv - B %bb.3 + $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv + RET_ReallyLR ... + From b5ac53a02050b36f268d21e9240966f9d69ac904 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 21:40:09 +0800 Subject: [PATCH 8/9] reduce test --- llvm/test/CodeGen/AArch64/pr157252.mir | 104 ++++--------------------- 1 file changed, 13 insertions(+), 91 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/pr157252.mir b/llvm/test/CodeGen/AArch64/pr157252.mir index b1956cc83a434..319e54f0fa7e9 100644 --- a/llvm/test/CodeGen/AArch64/pr157252.mir +++ b/llvm/test/CodeGen/AArch64/pr157252.mir @@ -1,103 +1,25 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -o - %s | FileCheck %s ---- | - define void @i(ptr %ad, ptr %0) #0 { - entry: - ret void - } - declare i32 @g(...) - attributes #0 = { "frame-pointer"="non-leaf" "target-features"="+sve" } -... +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass prologepilog -frame-pointer=none -o - %s | FileCheck %s --- -name: i -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -noPhis: true -isSSA: false -noVRegs: true -hasFakeUses: false -callsEHReturn: false -callsUnwindInit: false -hasEHContTarget: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: true -registers: [] -liveins: [] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 16 - adjustsStack: true - hasCalls: true - stackProtector: '' - functionContext: '' - maxCallFrameSize: 0 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - isCalleeSavedInfoValid: false - localFrameSize: 0 -fixedStack: [] +name: test_addsxri_scalable_offset stack: - - { id: 0, name: '', type: default, offset: 0, size: 4, alignment: 4, - stack-id: default, callee-saved-register: '', callee-saved-restored: true, - local-offset: -4, debug-info-variable: '', debug-info-expression: '', - debug-info-location: '' } - - { id: 3, name: '', type: spill-slot, offset: 0, size: 16, alignment: 16, - stack-id: scalable-vector, callee-saved-register: '', callee-saved-restored: true, - debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} -body: | + - { id: 0, type: default, size: 4, alignment: 4, stack-id: default } + - { id: 1, type: default, size: 16, alignment: 16, stack-id: scalable-vector } +body: | bb.0: - ; CHECK-LABEL: name: i - ; CHECK: liveins: $d11, $lr, $x20, $x28 + ; CHECK-LABEL: name: test_addsxri_scalable_offset + ; CHECK: liveins: $fp ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: early-clobber $sp = frame-setup STRDpre killed $d11, $sp, -48 :: (store (s64) into %stack.6) - ; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 2 :: (store (s64) into %stack.5), (store (s64) into %stack.4) - ; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x20, $sp, 4 :: (store (s64) into %stack.3), (store (s64) into %stack.2) - ; CHECK-NEXT: $fp = frame-setup ADDXri $sp, 16, 0 + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.2) ; CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 32 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w20, -8 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w28, -16 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -24 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -32 - ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $b11, -48 - ; CHECK-NEXT: $w20 = MOVi32imm 1 - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 - ; CHECK-NEXT: BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 ; CHECK-NEXT: $x8 = ADDXri $sp, 12, 0 ; CHECK-NEXT: $x8 = ADDVL_XXI $x8, 1, implicit $vg ; CHECK-NEXT: $x8 = ADDSXri $x8, 0, 0, implicit-def $nzcv ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg - ; CHECK-NEXT: $x28, $x20 = frame-destroy LDPXi $sp, 4 :: (load (s64) from %stack.3), (load (s64) from %stack.2) - ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4) - ; CHECK-NEXT: early-clobber $sp, $d11 = frame-destroy LDRDpost $sp, 48 :: (load (s64) from %stack.6) - ; CHECK-NEXT: RET_ReallyLR - $w20 = MOVi32imm 1 - INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $q11 - BL @g, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 + ; CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.2) + ; CHECK-NEXT: RET_ReallyLR implicit $x8 $x8 = ADDSXri %stack.0, 0, 0, implicit-def $nzcv - RET_ReallyLR + RET_ReallyLR implicit $x8 ... - From 36616d7492f508c2f64da3d2d257394aabced194 Mon Sep 17 00:00:00 2001 From: XChy Date: Tue, 16 Sep 2025 21:59:36 +0800 Subject: [PATCH 9/9] rename variable --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index f06c2030f20b7..f45e89b4e2cfc 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -6264,8 +6264,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, Offset, Bytes, NumPredicateVectors, NumDataVectors); // Insert ADDSXri for scalable offset at the end. - bool NeedInsertADDS = SetNZCV && (NumPredicateVectors || NumDataVectors); - if (NeedInsertADDS) + bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors); + if (NeedsFinalDefNZCV) SetNZCV = false; // First emit non-scalable frame offsets, or a simple 'mov'. @@ -6307,7 +6307,7 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB, FrameReg); } - if (NeedInsertADDS) + if (NeedsFinalDefNZCV) BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg) .addReg(DestReg) .addImm(0)