@@ -5314,18 +5314,22 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
53145314 if (isSGPR) {
53155315 switch (Opc) {
53165316 case AMDGPU::S_MIN_U32:
5317- case AMDGPU::V_CMP_LT_U64_e64: /*umin*/
53185317 case AMDGPU::S_MIN_I32:
5319- case AMDGPU::V_CMP_LT_I64_e64: /*min*/
53205318 case AMDGPU::S_MAX_U32:
5321- case AMDGPU::V_CMP_GT_U64_e64: /*umax*/
53225319 case AMDGPU::S_MAX_I32:
5323- case AMDGPU::V_CMP_GT_I64_e64: /*max*/
53245320 case AMDGPU::S_AND_B32:
53255321 case AMDGPU::S_OR_B32: {
53265322 // Idempotent operations.
5327- unsigned movOpc = is32BitOpc ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
5328- BuildMI(BB, MI, DL, TII->get(movOpc), DstReg).addReg(SrcReg);
5323+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
5324+ RetBB = &BB;
5325+ break;
5326+ }
5327+ case AMDGPU::V_CMP_LT_U64_e64: // umin
5328+ case AMDGPU::V_CMP_LT_I64_e64: // min
5329+ case AMDGPU::V_CMP_GT_U64_e64: // umax
5330+ case AMDGPU::V_CMP_GT_I64_e64: { // max
5331+ // Idempotent operations.
5332+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B64), DstReg).addReg(SrcReg);
53295333 RetBB = &BB;
53305334 break;
53315335 }
@@ -5519,9 +5523,7 @@ static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
55195523 Register LaneMaskReg = MRI.createVirtualRegister(WaveMaskRegClass);
55205524 Register ComparisonResultReg =
55215525 MRI.createVirtualRegister(WaveMaskRegClass);
5522- const TargetRegisterClass *VregClass =
5523- ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
5524- : &AMDGPU::VReg_64RegClass;
5526+ const TargetRegisterClass *VregClass = TRI->getVGPR64Class();
55255527 const TargetRegisterClass *VSubRegClass =
55265528 TRI->getSubRegisterClass(VregClass, AMDGPU::sub0);
55275529 Register AccumulatorVReg = MRI.createVirtualRegister(VregClass);
0 commit comments