@@ -5480,15 +5480,11 @@ static uint32_t getIdentityValueFor32BitWaveReduction(unsigned Opc) {
54805480 return std::numeric_limits<uint32_t>::min();
54815481 case AMDGPU::S_MAX_I32:
54825482 return std::numeric_limits<int32_t>::min();
5483- case AMDGPU::V_SUB_F32_e64: // +0.0
5484- return __builtin_bit_cast(uint32_t, +0.0f);
54855483 case AMDGPU::S_ADD_I32:
54865484 case AMDGPU::S_SUB_I32:
54875485 case AMDGPU::S_OR_B32:
54885486 case AMDGPU::S_XOR_B32:
54895487 return std::numeric_limits<uint32_t>::min();
5490- case AMDGPU::V_ADD_F32_e64: // -0.0
5491- return __builtin_bit_cast(uint32_t, -0.0f);
54925488 case AMDGPU::S_AND_B32:
54935489 return std::numeric_limits<uint32_t>::max();
54945490 case AMDGPU::V_MIN_F32_e64:
@@ -5529,13 +5525,11 @@ static bool is32bitWaveReduceOperation(unsigned Opc) {
55295525 Opc == AMDGPU::S_ADD_I32 || Opc == AMDGPU::S_SUB_I32 ||
55305526 Opc == AMDGPU::S_AND_B32 || Opc == AMDGPU::S_OR_B32 ||
55315527 Opc == AMDGPU::S_XOR_B32 || Opc == AMDGPU::V_MIN_F32_e64 ||
5532- Opc == AMDGPU::V_MAX_F32_e64 || Opc == AMDGPU::V_ADD_F32_e64 ||
5533- Opc == AMDGPU::V_SUB_F32_e64;
5528+ Opc == AMDGPU::V_MAX_F32_e64;
55345529}
55355530
55365531static bool isFloatingPointWaveReduceOperation(unsigned Opc) {
5537- return Opc == AMDGPU::V_MIN_F32_e64 || Opc == AMDGPU::V_MAX_F32_e64 ||
5538- Opc == AMDGPU::V_ADD_F32_e64 || Opc == AMDGPU::V_SUB_F32_e64;
5532+ return Opc == AMDGPU::V_MIN_F32_e64 || Opc == AMDGPU::V_MAX_F32_e64;
55395533}
55405534
55415535static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
@@ -5582,10 +5576,8 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55825576 case AMDGPU::S_XOR_B64:
55835577 case AMDGPU::S_ADD_I32:
55845578 case AMDGPU::S_ADD_U64_PSEUDO:
5585- case AMDGPU::V_ADD_F32_e64:
55865579 case AMDGPU::S_SUB_I32:
5587- case AMDGPU::S_SUB_U64_PSEUDO:
5588- case AMDGPU::V_SUB_F32_e64: {
5580+ case AMDGPU::S_SUB_U64_PSEUDO: {
55895581 const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
55905582 const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
55915583 Register ExecMask = MRI.createVirtualRegister(WaveMaskRegClass);
@@ -5740,30 +5732,6 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
57405732 .addImm(AMDGPU::sub1);
57415733 break;
57425734 }
5743- case AMDGPU::V_ADD_F32_e64:
5744- case AMDGPU::V_SUB_F32_e64: {
5745- Register ActiveLanesVreg =
5746- MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5747- Register DstVreg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
5748- // Get number of active lanes as a float val.
5749- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_CVT_F32_I32_e64),
5750- ActiveLanesVreg)
5751- .addReg(NewAccumulator->getOperand(0).getReg())
5752- .addImm(0) // clamp
5753- .addImm(0); // output-modifier
5754-
5755- // Take negation of input for SUB reduction
5756- unsigned srcMod = Opc == AMDGPU::V_SUB_F32_e64 ? 1 : 0;
5757- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_MUL_F32_e64), DstVreg)
5758- .addImm(srcMod) // src0 modifier
5759- .addReg(SrcReg)
5760- .addImm(0) // src1 modifier
5761- .addReg(ActiveLanesVreg)
5762- .addImm(0) // clamp
5763- .addImm(0); // output-mod
5764- BuildMI(BB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
5765- .addReg(DstVreg);
5766- }
57675735 }
57685736 RetBB = &BB;
57695737 }
@@ -6011,14 +5979,10 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
60115979 return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_ADD_I32);
60125980 case AMDGPU::WAVE_REDUCE_ADD_PSEUDO_U64:
60135981 return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_ADD_U64_PSEUDO);
6014- case AMDGPU::WAVE_REDUCE_FADD_PSEUDO_F32:
6015- return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::V_ADD_F32_e64);
60165982 case AMDGPU::WAVE_REDUCE_SUB_PSEUDO_I32:
60175983 return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_SUB_I32);
60185984 case AMDGPU::WAVE_REDUCE_SUB_PSEUDO_U64:
60195985 return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_SUB_U64_PSEUDO);
6020- case AMDGPU::WAVE_REDUCE_FSUB_PSEUDO_F32:
6021- return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::V_SUB_F32_e64);
60225986 case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B32:
60235987 return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_AND_B32);
60245988 case AMDGPU::WAVE_REDUCE_AND_PSEUDO_B64:
0 commit comments