@@ -5136,18 +5136,23 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
51365136 if (isSGPR) {
51375137 switch (Opc) {
51385138 case AMDGPU::S_MIN_U32:
5139- case AMDGPU::V_CMP_LT_U64_e64: /*umin*/
51405139 case AMDGPU::S_MIN_I32:
5141- case AMDGPU::V_CMP_LT_I64_e64: /*min*/
51425140 case AMDGPU::S_MAX_U32:
5143- case AMDGPU::V_CMP_GT_U64_e64: /*umax*/
51445141 case AMDGPU::S_MAX_I32:
5145- case AMDGPU::V_CMP_GT_I64_e64: /*max*/
51465142 case AMDGPU::S_AND_B32:
51475143 case AMDGPU::S_OR_B32: {
51485144 // Idempotent operations.
5149- unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5150- BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
5145+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
5146+ RetBB = &BB;
5147+ break;
5148+ }
5149+ case AMDGPU::V_CMP_LT_U64_e64: // umin
5150+ case AMDGPU::V_CMP_LT_I64_e64: // min
5151+ case AMDGPU::V_CMP_GT_U64_e64: // umax
5152+ case AMDGPU::V_CMP_GT_I64_e64: // max
5153+ {
5154+ // Idempotent operations.
5155+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
51515156 RetBB = &BB;
51525157 break;
51535158 }
@@ -5341,9 +5346,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53415346 Register LaneMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
53425347 Register ComparisonResultReg =
53435348 MRI.createVirtualRegister(WaveMaskRegClass);
5344- const TargetRegisterClass *VregClass =
5345- ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
5346- : &AMDGPU::VReg_64RegClass;
5349+ const TargetRegisterClass *VregClass = TRI->getVGPR64Class();
53475350 const TargetRegisterClass *VSubRegClass =
53485351 TRI->getSubRegisterClass(VregClass, AMDGPU::sub0);
53495352 Register AccumulatorVReg = MRI.createVirtualRegister(VregClass);
0 commit comments