@@ -3997,10 +3997,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
39973997 InVals, /*IsThisReturn=*/false, SDValue());
39983998}
39993999
4000- // This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
4001- // except for applying the wave size scale to the increment amount.
4002- SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
4003- SelectionDAG &DAG) const {
4000+ SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4001+ SelectionDAG &DAG) const {
4002+ // This is identical to the default implementation in ExpandDYNAMIC_STACKALLOC,
4003+ // except for applying the wave size scale to the increment amount and doing a
4004+ // wave reduction for divergent allocation size.
40044005 const MachineFunction &MF = DAG.getMachineFunction();
40054006 const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
40064007
@@ -4018,6 +4019,8 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40184019 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
40194020
40204021 SDValue Size = Tmp2.getOperand(1);
4022+
4023+ // Start address of the dynamically sized stack object
40214024 SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
40224025 Chain = SP.getValue(1);
40234026 MaybeAlign Alignment = cast<ConstantSDNode>(Tmp3)->getMaybeAlignValue();
@@ -4027,12 +4030,28 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40274030 ? ISD::ADD
40284031 : ISD::SUB;
40294032
4030- SDValue ScaledSize = DAG.getNode(
4031- ISD::SHL, dl, VT, Size,
4032- DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4033+ if (isa<ConstantSDNode>(Op.getOperand(1))){
4034+ SDValue ScaledSize = DAG.getNode(
4035+ ISD::SHL, dl, VT, Size,
4036+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4037+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
4038+ }
4039+ else{
4040+ SDValue WaveReduction =
4041+ DAG.getTargetConstant(Intrinsic::amdgcn_wave_reduce_umax, dl, MVT::i32);
4042+ Size = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4043+ WaveReduction, Size, DAG.getConstant(0, dl, MVT::i32));
4044+ SDValue ScaledSize = DAG.getNode(
4045+ ISD::SHL, dl, VT, Size,
4046+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
4047+ Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value in vgpr.
4048+ SDValue ReadFirstLaneID =
4049+ DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, dl, MVT::i32);
4050+ Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
4051+ ReadFirstLaneID, Tmp1);
4052+ }
40334053
40344054 Align StackAlign = TFL->getStackAlign();
4035- Tmp1 = DAG.getNode(Opc, dl, VT, SP, ScaledSize); // Value
40364055 if (Alignment && *Alignment > StackAlign) {
40374056 Tmp1 = DAG.getNode(
40384057 ISD::AND, dl, VT, Tmp1,
@@ -4042,25 +4061,12 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
40424061 }
40434062
40444063 Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
4064+ Tmp1 = SP;
40454065 Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
40464066
40474067 return DAG.getMergeValues({Tmp1, Tmp2}, dl);
40484068}
40494069
4050- SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4051- SelectionDAG &DAG) const {
4052- // We only handle constant sizes here to allow non-entry block, static sized
4053- // allocas. A truly dynamic value is more difficult to support because we
4054- // don't know if the size value is uniform or not. If the size isn't uniform,
4055- // we would need to do a wave reduction to get the maximum size to know how
4056- // much to increment the uniform stack pointer.
4057- SDValue Size = Op.getOperand(1);
4058- if (isa<ConstantSDNode>(Size))
4059- return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
4060-
4061- return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
4062- }
4063-
40644070SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
40654071 if (Op.getValueType() != MVT::i32)
40664072 return Op; // Defer to cannot select error.
0 commit comments