Skip to content

Commit 97e4869

Browse files
committed
[AMDGPU] supporting dynamically sized allocas
1 parent a348f22 commit 97e4869

File tree

2 files changed

+2169
-22
lines changed

2 files changed

+2169
-22
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3997,10 +3997,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
39973997
InVals, /*IsThisReturn=*/false, SDValue());
39983998
}
39993999

4000-
// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
4001-
// except for applying the wave size scale to the increment amount.
4002-
SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
4003-
SelectionDAG &DAG) const {
4000+
SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4001+
SelectionDAG &DAG) const {
4002+
// This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
4003+
// except for applying the wave size scale to the increment amount and doing a
4004+
// wave reduction for divergent allocation size.
40044005
const MachineFunction &MF = DAG.getMachineFunction();
40054006
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
40064007

@@ -4018,6 +4019,8 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40184019
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
40194020

40204021
SDValue Size = Tmp2.getOperand(1);
4022+
4023+
// Start address of the dynamically sized stack object
40214024
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
40224025
Chain = SP.getValue(1);
40234026
MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
@@ -4027,12 +4030,28 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40274030
? ISD::ADD
40284031
: ISD::SUB;
40294032

4030-
SDValue ScaledSize = DAG.getNode(
4031-
ISD::SHL, dl, VT, Size,
4032-
DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4033+
if (isa<ConstantSDNode>(Op.getOperand(1))){
4034+
SDValue ScaledSize = DAG.getNode(
4035+
ISD::SHL, dl, VT, Size,
4036+
DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4037+
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
4038+
}
4039+
else{
4040+
SDValue WaveReduction =
4041+
DAG.getTargetConstant(Intrinsic::amdgcn_wave_reduce_umax, dl, MVT::i32);
4042+
Size = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4043+
WaveReduction, Size, DAG.getConstant(0, dl, MVT::i32));
4044+
SDValue ScaledSize = DAG.getNode(
4045+
ISD::SHL, dl, VT, Size,
4046+
DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4047+
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value in vgpr.
4048+
SDValue ReadFirstLaneID =
4049+
DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, dl, MVT::i32);
4050+
Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4051+
ReadFirstLaneID, Tmp1);
4052+
}
40334053

40344054
Align StackAlign = TFL->getStackAlign();
4035-
Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
40364055
if (Alignment && *Alignment > StackAlign) {
40374056
Tmp1 = DAG.getNode(
40384057
ISD::AND, dl, VT, Tmp1,
@@ -4042,25 +4061,12 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40424061
}
40434062

40444063
Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
4064+
Tmp1 = SP;
40454065
Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
40464066

40474067
return DAG.getMergeValues({Tmp1, Tmp2}, dl);
40484068
}
40494069

4050-
SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4051-
SelectionDAG &DAG) const {
4052-
// We only handle constant sizes here to allow non-entry block, static sized
4053-
// allocas. A truly dynamic value is more difficult to support because we
4054-
// don't know if the size value is uniform or not. If the size isn't uniform,
4055-
// we would need to do a wave reduction to get the maximum size to know how
4056-
// much to increment the uniform stack pointer.
4057-
SDValue Size = Op.getOperand(1);
4058-
if (isa<ConstantSDNode>(Size))
4059-
return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
4060-
4061-
return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
4062-
}
4063-
40644070
SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
40654071
if (Op.getValueType() != MVT::i32)
40664072
return Op; // Defer to cannot select error.

0 commit comments

Comments
 (0)