Skip to content

Commit 74cebce

Browse files
authored
Revert "[AMDGPU] Add wave reduce intrinsics for float types - 2 (#161… (#168845)
…815)" This reverts commit dcab4cb.
1 parent 54f69ca commit 74cebce

File tree

6 files changed

+4
-2046
lines changed

6 files changed

+4
-2046
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2482,7 +2482,7 @@ class AMDGPUWaveReduce<LLVMType data_ty = llvm_any_ty> : Intrinsic<
24822482

24832483
multiclass AMDGPUWaveReduceOps {
24842484
foreach Op =
2485-
["umin", "fmin", "min", "umax", "fmax", "max", "add", "fadd", "sub", "fsub", "and", "or", "xor"] in {
2485+
["umin", "fmin", "min", "umax", "fmax", "max", "add", "sub", "and", "or", "xor"] in {
24862486
def Op : AMDGPUWaveReduce;
24872487
}
24882488
}

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5214,9 +5214,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
52145214
break;
52155215
}
52165216
case Intrinsic::amdgcn_wave_reduce_add:
5217-
case Intrinsic::amdgcn_wave_reduce_fadd:
52185217
case Intrinsic::amdgcn_wave_reduce_sub:
5219-
case Intrinsic::amdgcn_wave_reduce_fsub:
52205218
case Intrinsic::amdgcn_wave_reduce_min:
52215219
case Intrinsic::amdgcn_wave_reduce_umin:
52225220
case Intrinsic::amdgcn_wave_reduce_fmin:

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -5480,15 +5480,11 @@ static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
54805480
return std::numeric_limits<uint32_t>::min();
54815481
case AMDGPU::S_MAX_I32:
54825482
return std::numeric_limits<int32_t>::min();
5483-
case AMDGPU::V_SUB_F32_e64: // +0.0
5484-
return __builtin_bit_cast(uint32_t, +0.0f);
54855483
case AMDGPU::S_ADD_I32:
54865484
case AMDGPU::S_SUB_I32:
54875485
case AMDGPU::S_OR_B32:
54885486
case AMDGPU::S_XOR_B32:
54895487
return std::numeric_limits<uint32_t>::min();
5490-
case AMDGPU::V_ADD_F32_e64: // -0.0
5491-
return __builtin_bit_cast(uint32_t, -0.0f);
54925488
case AMDGPU::S_AND_B32:
54935489
return std::numeric_limits<uint32_t>::max();
54945490
case AMDGPU::V_MIN_F32_e64:
@@ -5529,13 +5525,11 @@ static bool is32bitWaveReduceOperation(unsigned Opc) {
55295525
Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32 ||
55305526
Opc == AMDGPU::S_AND_B32 || Opc == AMDGPU::S_OR_B32 ||
55315527
Opc == AMDGPU::S_XOR_B32 || Opc == AMDGPU::V_MIN_F32_e64 ||
5532-
Opc == AMDGPU::V_MAX_F32_e64 || Opc == AMDGPU::V_ADD_F32_e64 ||
5533-
Opc == AMDGPU::V_SUB_F32_e64;
5528+
Opc == AMDGPU::V_MAX_F32_e64;
55345529
}
55355530

55365531
static bool isFloatingPointWaveReduceOperation(unsigned Opc) {
5537-
return Opc == AMDGPU::V_MIN_F32_e64 || Opc == AMDGPU::V_MAX_F32_e64 ||
5538-
Opc == AMDGPU::V_ADD_F32_e64 || Opc == AMDGPU::V_SUB_F32_e64;
5532+
return Opc == AMDGPU::V_MIN_F32_e64 || Opc == AMDGPU::V_MAX_F32_e64;
55395533
}
55405534

55415535
static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
@@ -5582,10 +5576,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55825576
case AMDGPU::S_XOR_B64:
55835577
case AMDGPU::S_ADD_I32:
55845578
case AMDGPU::S_ADD_U64_PSEUDO:
5585-
case AMDGPU::V_ADD_F32_e64:
55865579
case AMDGPU::S_SUB_I32:
5587-
case AMDGPU::S_SUB_U64_PSEUDO:
5588-
case AMDGPU::V_SUB_F32_e64: {
5580+
case AMDGPU::S_SUB_U64_PSEUDO: {
55895581
const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
55905582
const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
55915583
Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
@@ -5740,30 +5732,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
57405732
.addImm(AMDGPU::sub1);
57415733
break;
57425734
}
5743-
case AMDGPU::V_ADD_F32_e64:
5744-
case AMDGPU::V_SUB_F32_e64: {
5745-
Register ActiveLanesVreg =
5746-
MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5747-
Register DstVreg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5748-
// Get number of active lanes as a float val.
5749-
BuildMI(BB, MI, DL, TII->get(AMDGPU::V_CVT_F32_I32_e64),
5750-
ActiveLanesVreg)
5751-
.addReg(NewAccumulator->getOperand(0).getReg())
5752-
.addImm(0) // clamp
5753-
.addImm(0); // output-modifier
5754-
5755-
// Take negation of input for SUB reduction
5756-
unsigned srcMod = Opc == AMDGPU::V_SUB_F32_e64 ? 1 : 0;
5757-
BuildMI(BB, MI, DL, TII->get(AMDGPU::V_MUL_F32_e64), DstVreg)
5758-
.addImm(srcMod) // src0 modifier
5759-
.addReg(SrcReg)
5760-
.addImm(0) // src1 modifier
5761-
.addReg(ActiveLanesVreg)
5762-
.addImm(0) // clamp
5763-
.addImm(0); // output-mod
5764-
BuildMI(BB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
5765-
.addReg(DstVreg);
5766-
}
57675735
}
57685736
RetBB = &BB;
57695737
}
@@ -6011,14 +5979,10 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
60115979
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_ADD_I32);
60125980
case AMDGPU::WAVE_REDUCE_ADD_PSEUDO_U64:
60135981
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_ADD_U64_PSEUDO);
6014-
case AMDGPU::WAVE_REDUCE_FADD_PSEUDO_F32:
6015-
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::V_ADD_F32_e64);
60165982
case AMDGPU::WAVE_REDUCE_SUB_PSEUDO_I32:
60175983
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_SUB_I32);
60185984
case AMDGPU::WAVE_REDUCE_SUB_PSEUDO_U64:
60195985
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_SUB_U64_PSEUDO);
6020-
case AMDGPU::WAVE_REDUCE_FSUB_PSEUDO_F32:
6021-
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::V_SUB_F32_e64);
60225986
case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B32:
60235987
return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_AND_B32);
60245988
case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B64:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,8 +374,6 @@ defvar Operations = [
374374

375375
WaveReduceOp<"fmin", "F32", f32, SGPR_32, VSrc_b32>,
376376
WaveReduceOp<"fmax", "F32", f32, SGPR_32, VSrc_b32>,
377-
WaveReduceOp<"fadd", "F32", f32, SGPR_32, VSrc_b32>,
378-
WaveReduceOp<"fsub", "F32", f32, SGPR_32, VSrc_b32>,
379377
];
380378

381379
foreach Op = Operations in {

0 commit comments

Comments
 (0)