-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[AMDGPU] Update machine frame info during inlining #169477
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/rovka/machine-inlining
Are you sure you want to change the base?
[AMDGPU] Update machine frame info during inlining #169477
Conversation
Update some of the machine frame info while inlining functions. The stack of the caller will now contain an additional object representing the stacks of its callees that have been inlined. Also update some other info such as HasCalls and a few other pieces of info that are trivial to update (this isn't very thorough or exhaustive, and notably doesn't handle tail calls).
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Diana Picus (rovka) ChangesUpdate some of the machine frame info while inlining functions. The Also update some other info such as HasCalls and a few other pieces of Patch is 62.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169477.diff 8 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.cpp
index 8a586ddbfdfa5..18960bab86cda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.cpp
@@ -10,6 +10,7 @@
#include "AMDGPU.h"
#include "AMDGPUMachineModuleInfo.h"
#include "AMDGPUSubtarget.h"
+#include "GCNSubtarget.h"
#include "SIInstrInfo.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/ADT/DenseMap.h"
@@ -128,15 +129,20 @@ bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
if (!MFI.hasCalls() && !MFI.hasTailCall())
return false;
+ MaxInlinedCalleeStackSize = 0;
+ HasInlinedVarSizedStack = false;
+
// Collect calls to inline.
SmallVector<MachineInstr *, 4> CallsToInline;
const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
+ size_t CallsFound = 0;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
if (!MI.isCall())
continue;
+ CallsFound++;
const MachineOperand *CalleeOp =
TII->getNamedOperand(MI, AMDGPU::OpName::callee);
if (CalleeOp && CalleeOp->isGlobal()) {
@@ -156,6 +162,11 @@ bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Reset HasCalls if we're about to inline all of them. This will be updated
+ // further during inlining if any of the callees introduces its own calls.
+ // FIXME: HasTailCall!
+ MFI.setHasCalls(CallsFound != CallsToInline.size());
+
// Perform the actual inlining.
for (MachineInstr *CallMI : CallsToInline) {
const MachineOperand *CalleeOp =
@@ -176,6 +187,14 @@ bool AMDGPUMachineLevelInliner::runOnMachineFunction(MachineFunction &MF) {
Changed = true;
}
+ if (Changed) {
+ if (MaxInlinedCalleeStackSize != 0)
+ createCalleeStackObject(MFI);
+
+ if (HasInlinedVarSizedStack)
+ MFI.CreateVariableSizedObject(Align(1), /*Alloca=*/nullptr);
+ }
+
return Changed;
}
@@ -184,6 +203,9 @@ void AMDGPUMachineLevelInliner::inlineMachineFunction(MachineFunction *CallerMF,
MachineFunction *CalleeMF,
const SIInstrInfo *TII) {
+ // TODO: update SIMachineFunctionInfo (e.g. Occupancy)
+ updateCallerFrameInfo(CallerMF->getFrameInfo(), *CalleeMF);
+
MachineBasicBlock *CallMBB = CallMI->getParent();
MachineBasicBlock *ContinuationMBB =
CallMBB->splitAt(*CallMI, /*UpdateLiveIns=*/true);
@@ -287,6 +309,55 @@ void AMDGPUMachineLevelInliner::cleanupAfterInlining(
MI->eraseFromParent();
}
+void AMDGPUMachineLevelInliner::updateCallerFrameInfo(
+ MachineFrameInfo &CallerMFI, const MachineFunction &CalleeMF) {
+ const MachineFrameInfo &CalleeMFI = CalleeMF.getFrameInfo();
+ const GCNSubtarget &ST = CalleeMF.getSubtarget<GCNSubtarget>();
+ const TargetRegisterInfo &TRI = *ST.getRegisterInfo();
+
+ // Follow the prologue logic.
+ uint64_t CalleeStackSize = CalleeMFI.getStackSize();
+ if (TRI.hasStackRealignment(CalleeMF))
+ CalleeStackSize += CalleeMFI.getMaxAlign().value();
+ uint64_t TrueCalleeStackSize = CalleeStackSize * ST.getScratchScaleFactor();
+
+ // Only one of the stacks of the callees will
+ // be active at any given time, so we only need to make sure the largest one
+ // fits.
+ MaxInlinedCalleeStackSize =
+ std::max(MaxInlinedCalleeStackSize, TrueCalleeStackSize);
+
+ // Track if any callee has variable-sized stack objects.
+ if (CalleeMFI.hasVarSizedObjects())
+ HasInlinedVarSizedStack = true;
+
+#define SET_IF_ANY(SETTER, GETTER) \
+ CallerMFI.SETTER(CallerMFI.GETTER() || CalleeMFI.GETTER())
+
+ SET_IF_ANY(setHasCalls, hasCalls);
+ SET_IF_ANY(setHasTailCall, hasTailCall);
+ SET_IF_ANY(setAdjustsStack, adjustsStack);
+ SET_IF_ANY(setFrameAddressIsTaken, isFrameAddressTaken);
+ SET_IF_ANY(setReturnAddressIsTaken, isReturnAddressTaken);
+ SET_IF_ANY(setHasVAStart, hasVAStart);
+ SET_IF_ANY(setHasMustTailInVarArgFunc, hasMustTailInVarArgFunc);
+ SET_IF_ANY(setHasOpaqueSPAdjustment, hasOpaqueSPAdjustment);
+ SET_IF_ANY(setHasCopyImplyingStackAdjustment, hasCopyImplyingStackAdjustment);
+
+#undef SET_IF_ANY
+}
+
+void AMDGPUMachineLevelInliner::createCalleeStackObject(
+ MachineFrameInfo &CallerMFI) {
+ // Create a stack object representing the maximum callee stack space
+ uint64_t CallerStackSize = CallerMFI.getStackSize();
+ int CalleeStackIdx =
+ CallerMFI.CreateStackObject(MaxInlinedCalleeStackSize, Align(1),
+ /*isSpillSlot=*/false);
+ CallerMFI.setObjectOffset(CalleeStackIdx, CallerStackSize);
+ CallerMFI.setStackSize(CallerStackSize + MaxInlinedCalleeStackSize);
+}
+
FunctionPass *llvm::createAMDGPUMachineLevelInlinerPass() {
return new AMDGPUMachineLevelInliner();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.h
index ab5ecdc5dbd41..51a2e494247a6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineLevelInliner.h
@@ -27,6 +27,7 @@
namespace llvm {
+class GCNSubtarget;
class SIInstrInfo;
class AMDGPUMachineLevelInliner : public MachineFunctionPass {
@@ -52,6 +53,21 @@ class AMDGPUMachineLevelInliner : public MachineFunctionPass {
void cleanupAfterInlining(MachineFunction *CallerMF, MachineInstr *CallMI,
const SIInstrInfo *TII) const;
+
+ void updateCallerFrameInfo(MachineFrameInfo &CallerMFI,
+ const MachineFunction &CalleeMF);
+
+ /// Create a stack object representing the stacks of all the inlined callees.
+ /// Its size will be large enough to accomodate the callee with the largest
+ /// stack.
+ void createCalleeStackObject(MachineFrameInfo &CallerMFI);
+
+ /// The maximum stack size among all inlined callees (including any padding
+ /// required to ensure proper alignment).
+ uint64_t MaxInlinedCalleeStackSize = 0;
+
+ /// Whether any inlined callee has variable-sized stack objects.
+ bool HasInlinedVarSizedStack = false;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index cb27f474d78f3..f9f8c196aeb33 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1057,6 +1057,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// dynamic realignment in common cases.
Align getStackAlignment() const { return Align(16); }
+ unsigned getScratchScaleFactor() const {
+ return enableFlatScratch() ? 1 : getWavefrontSize();
+ }
+
bool enableMachineScheduler() const override {
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index ffbb111d42221..9f42a3e8ae922 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -590,10 +590,6 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
return ScratchRsrcReg;
}
-static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {
- return ST.enableFlatScratch() ? 1 : ST.getWavefrontSize();
-}
-
void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
@@ -693,7 +689,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
}
assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);
- unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);
+ unsigned Offset = FrameInfo.getStackSize() * ST.getScratchScaleFactor();
if (!mayReserveScratchForCWSR(MF)) {
if (hasFP(MF)) {
Register FPReg = MFI->getFrameOffsetReg();
@@ -1231,7 +1227,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
assert(StackPtrReg != AMDGPU::SP_REG);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_MOV_B32), StackPtrReg)
- .addImm(MFI.getStackSize() * getScratchScaleFactor(ST));
+ .addImm(MFI.getStackSize() * ST.getScratchScaleFactor());
}
}
@@ -1292,12 +1288,12 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// s_and_b32 s33, s33, 0b111...0000
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)
.addReg(StackPtrReg)
- .addImm((Alignment - 1) * getScratchScaleFactor(ST))
+ .addImm((Alignment - 1) * ST.getScratchScaleFactor())
.setMIFlag(MachineInstr::FrameSetup);
auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)
- .addReg(FramePtrReg, RegState::Kill)
- .addImm(-Alignment * getScratchScaleFactor(ST))
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(FramePtrReg, RegState::Kill)
+ .addImm(-Alignment * ST.getScratchScaleFactor())
+ .setMIFlag(MachineInstr::FrameSetup);
And->getOperand(3).setIsDead(); // Mark SCC as dead.
FuncInfo->setIsStackRealigned(true);
} else if ((HasFP = hasFP(MF))) {
@@ -1326,9 +1322,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
if (HasFP && RoundedSize != 0) {
auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)
- .addReg(StackPtrReg)
- .addImm(RoundedSize * getScratchScaleFactor(ST))
- .setMIFlag(MachineInstr::FrameSetup);
+ .addReg(StackPtrReg)
+ .addImm(RoundedSize * ST.getScratchScaleFactor())
+ .setMIFlag(MachineInstr::FrameSetup);
Add->getOperand(3).setIsDead(); // Mark SCC as dead.
}
@@ -2137,7 +2133,7 @@ MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Register SPReg = MFI->getStackPtrOffsetReg();
- Amount *= getScratchScaleFactor(ST);
+ Amount *= ST.getScratchScaleFactor();
if (IsDestroy)
Amount = -Amount;
auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-machine-level-inliner-mfi.mir b/llvm/test/CodeGen/AMDGPU/amdgpu-machine-level-inliner-mfi.mir
new file mode 100644
index 0000000000000..bcda11a2419a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-machine-level-inliner-mfi.mir
@@ -0,0 +1,651 @@
+# RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1200 -amdgpu-enable-machine-level-inliner -run-pass=amdgpu-inlining-anchor,amdgpu-machine-level-inliner %s -o - | FileCheck %s
+
+# Test that the inliner correctly updates the MachineFunctionInfo
+
+--- |
+ ; Test that we update the frame info for the caller with info from the callee.
+ ; In particular, hasCalls should be false after inlining.
+ define amdgpu_gfx_whole_wave i32 @wwf_with_local_no_calls(i1 %mask, i32 %x) {
+ %local = alloca i32, addrspace(5)
+ ret i32 0
+ }
+ define amdgpu_cs void @inline_wwf_with_local_no_calls(i32 %y) {
+ %local = alloca i32, addrspace(5)
+ ret void
+ }
+ ; Same as above, but also make sure we reuse stack space between different callees.
+ define amdgpu_cs void @inline_wwf_with_local_twice(i32 %y) {
+ %local = alloca i32, addrspace(5)
+ ret void
+ }
+
+ ; Test callees with different stack sizes and alignments.
+ define amdgpu_gfx_whole_wave i32 @wwf_large_stack_small_align(i1 %mask) {
+ %local = alloca i32, i32 512, align 4, addrspace(5)
+ ret i32 0
+ }
+ define amdgpu_gfx_whole_wave i32 @wwf_small_stack_large_align(i1 %mask) {
+ %local = alloca i32, align 1024, addrspace(5)
+ ret i32 0
+ }
+ define amdgpu_cs void @inline_wwf_different_stack_shapes() {
+ ret void
+ }
+
+ ; Test dynamic stack allocations.
+ define amdgpu_gfx_whole_wave i32 @wwf_dyn_stack(i1 %mask, i32 inreg %size) {
+ %local = alloca i32, i32 %size, addrspace(5)
+ ret i32 0
+ }
+ define amdgpu_cs void @inline_wwf_dyn_stack_callee(i32 inreg %size, ptr addrspace(1) %output) { ret void }
+
+ ; Test that we correctly handle stack arguments.
+ define amdgpu_gfx_whole_wave i32 @wwf_with_stack_args(i1 %active, <33 x i32> %vec) { ret i32 0 }
+ define amdgpu_cs void @inline_wwf_with_stack_args(i32 %x, i32 %y, ptr addrspace(1) %output) { ret void }
+
+ ; Test that we update hasCalls if the callee contains its own calls.
+ define amdgpu_gfx_whole_wave i32 @wwf_with_calls(i1 %mask, i32 %x) { ret i32 0}
+ define amdgpu_cs void @inline_wwf_with_calls(i32 %y) { ret void }
+
+ ; Test that hasCalls is still correct if the caller has other calls.
+ define amdgpu_gfx i32 @wont_inline() { ret i32 0 }
+ define amdgpu_cs void @inline_wwf_without_calls(i32 %y) { ret void }
+...
+---
+name: wwf_with_local_no_calls
+tracksRegLiveness: true
+frameInfo:
+ stackSize: 16
+stack:
+ - { id: 0, name: local, type: default, offset: 0, size: 8, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: spill-slot, offset: 8, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+ - { id: 2, name: '', type: default, offset: 12, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ isEntryFunction: false
+ isChainFunction: false
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ wwmReservedRegs:
+ - '$vgpr1'
+ scavengeFI: '%stack.2'
+ isWholeWaveFunction: true
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+
+ $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+ SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+ $exec_lo = S_MOV_B32 -1
+ $vgpr1 = V_MUL_LO_U32_e64 $vgpr0, 18, implicit $exec
+ SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.local, addrspace 5)
+ $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
+ $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+ $exec_lo = S_MOV_B32 $sgpr0
+ S_SETPC_B64_return undef $sgpr30_sgpr31, implicit $vgpr0
+...
+---
+name: inline_wwf_with_local_no_calls
+# CHECK-LABEL: name: inline_wwf_with_local_no_calls
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+ stackSize: 8
+# CHECK: frameInfo:
+# CHECK: stackSize: 24
+# CHECK: offsetAdjustment: 0
+# CHECK: maxAlignment: 4
+# CHECK: adjustsStack: false
+# CHECK: hasCalls: false
+# CHECK: hasTailCall: false
+stack:
+ - { id: 0, name: local, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK: stack:
+# CHECK-NEXT: - { id: 0, name: local, type: default, offset: 0, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+# CHECK-NEXT: debug-info-location: '' }
+# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 8, size: 16, alignment: 1,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+machineFunctionInfo:
+ isEntryFunction: true
+ isChainFunction: false
+ scratchRSrcReg: '$private_rsrc_reg'
+ frameOffsetReg: '$fp_reg'
+ stackPtrOffsetReg: '$sgpr32'
+ scavengeFI: '%stack.1'
+ isWholeWaveFunction: false
+# CHECK: machineFunctionInfo:
+# CHECK: isEntryFunction: true
+# CHECK: isChainFunction: false
+# CHECK: scratchRSrcReg: '$private_rsrc_reg'
+# CHECK frameOffsetReg: '$fp_reg'
+# CHECK stackPtrOffsetReg: '$sgpr32'
+# CHECK scavengeFI: '%stack.1'
+# CHECK isWholeWaveFunction: false
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ $sgpr32 = S_MOV_B32 16
+ $sgpr1 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @wwf_with_local_no_calls
+ $sgpr0 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @wwf_with_local_no_calls
+ dead $sgpr30_sgpr31 = SI_CALL killed $sgpr0_sgpr1, @wwf_with_local_no_calls, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
+ SCRATCH_STORE_DWORD_ST killed $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.local, addrspace 5)
+ S_ENDPGM 0
+...
+name: inline_wwf_with_local_twice
+# CHECK-LABEL: name: inline_wwf_with_local_twice
+tracksRegLiveness: true
+frameInfo:
+ hasCalls: true
+ stackSize: 8
+# CHECK: frameInfo:
+# CHECK-NEXT: isFrameAddressTaken: false
+# CHECK-NEXT: isReturnAddressTaken: false
+# CHECK-NEXT: hasStackMap: false
+# CHECK-NEXT: hasPatchPoint: false
+# CHECK-NEXT: stackSize: 24
+# CHECK-NEXT: offsetAdjustment: 0
+# CHECK-NEXT: maxAlignment: 4
+# CHECK-NEXT: adjustsStack: false
+# CHECK-NEXT: hasCalls: false
+# CHECK-NEXT: stackProtector: ''
+# CHECK-NEXT: functionContext: ''
+# CHECK-NEXT: maxCallFrameSize: 4294967295
+# CHECK-NEXT: cvBytesOfCalleeSavedRegisters: 0
+# CHECK-NEXT: hasOpaqueSPAdjustment: false
+# CHECK-NEXT: hasVAStart: false
+# CHECK-NEXT: hasMustTailInVarArgFunc: false
+# CHECK-NEXT: hasTailCall: false
+# CHECK-NEXT: isCalleeSavedInfoValid: false
+# CHECK-NEXT: localFrameSize: 0
+stack:
+ - { id: 0, name: local, type: default, offset: 0, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+ debug-info-location: '' }
+ - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+ stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+ debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK: stack:
+# CHECK-NEXT: - { id: 0, name: local, type: default, offset: 0, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: local-offset: 0, debug-info-variable: '', debug-info-expression: '',
+# CHECK-NEXT: debug-info-location: '' }
+# CHECK-NEXT: - { id: 1, name: '', type: default, offset: 4, size: 4, alignment: 4,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+# CHECK-NEXT: - { id: 2, name: '', type: default, offset: 8, size: 16, alignment: 1,
+# CHECK-NEXT: stack-id: default, callee-saved-register: '', callee-saved-restored: true,
+# CHECK-NEXT: debug-info-variable: '', debug-info-expression: '', debug-info-location: '' }
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ $sgpr32 = S_MOV_B32 16
+ $sgpr7 = S_MOV_B32 target-flags(amdgpu-abs32-hi) @wwf_with_local_no_calls
+ $sgpr6 = S_MOV_B32 target-flags(amdgpu-abs32-lo) @wwf_with_local_no_calls
+ dead $sgpr30_sgpr31 = SI_CALL $sgpr6_sgpr7, @wwf_with_local_no_calls, csr_amdgpu_si_gfx, implicit $vgpr0, implicit-def $vgpr0
+ SCRATCH_STORE_DWORD_ST $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (volatile store (s32) into %ir.local, addrspace 5)
+ dead $sgpr30_sgpr31 = SI_CALL killed $sgpr6_sgpr7, @wwf_...
[truncated]
|
🐧 Linux x64 Test Results
Failed Tests(click on a test name to see its output) LLVMLLVM.CodeGen/AArch64/arm64-opt-remarks-lazy-bfi.llLLVM.tools/UpdateTestChecks/update_givaluetracking_test_checks/knownbits-const.testIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |

Update some of the machine frame info while inlining functions. The
stack of the caller will now contain an additional object representing
the stacks of its callees that have been inlined.
Also update some other info such as HasCalls and a few other pieces of
info that are trivial to update (this isn't very thorough or exhaustive,
and notably doesn't handle tail calls).