@@ -5236,18 +5236,22 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
52365236 if (isSGPR) {
52375237 switch (Opc) {
52385238 case AMDGPU::S_MIN_U32:
5239- case AMDGPU::V_CMP_LT_U64_e64: /*umin*/
52405239 case AMDGPU::S_MIN_I32:
5241- case AMDGPU::V_CMP_LT_I64_e64: /*min*/
52425240 case AMDGPU::S_MAX_U32:
5243- case AMDGPU::V_CMP_GT_U64_e64: /*umax*/
52445241 case AMDGPU::S_MAX_I32:
5245- case AMDGPU::V_CMP_GT_I64_e64: /*max*/
52465242 case AMDGPU::S_AND_B32:
52475243 case AMDGPU::S_OR_B32: {
52485244 // Idempotent operations.
5249- unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5250- BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
5245+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
5246+ RetBB = &BB;
5247+ break;
5248+ }
5249+ case AMDGPU::V_CMP_LT_U64_e64: // umin
5250+ case AMDGPU::V_CMP_LT_I64_e64: // min
5251+ case AMDGPU::V_CMP_GT_U64_e64: // umax
5252+ case AMDGPU::V_CMP_GT_I64_e64: { // max
5253+ // Idempotent operations.
5254+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
52515255 RetBB = &BB;
52525256 break;
52535257 }
@@ -5441,9 +5445,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
54415445 Register LaneMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
54425446 Register ComparisonResultReg =
54435447 MRI.createVirtualRegister(WaveMaskRegClass);
5444- const TargetRegisterClass *VregClass =
5445- ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
5446- : &AMDGPU::VReg_64RegClass;
5448+ const TargetRegisterClass *VregClass = TRI->getVGPR64Class();
54475449 const TargetRegisterClass *VSubRegClass =
54485450 TRI->getSubRegisterClass(VregClass, AMDGPU::sub0);
54495451 Register AccumulatorVReg = MRI.createVirtualRegister(VregClass);
0 commit comments