diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 041b9b4d66f63..1e654b260cbfa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3782,13 +3782,7 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const { if (NumOpcodes < 2 || Src.empty()) return false; - // For a uniform case threshold should be higher to account for moves between - // VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs - // and a readtfirstlane after. - if (NumOpcodes < 4) - return false; - - bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32); + const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32); if (NumOpcodes == 2 && IsB32) { // Avoid using BITOP3 for OR3, XOR3, AND_OR. This is not faster but makes // asm more readable. This cannot be modeled with AddedComplexity because @@ -3797,6 +3791,11 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const { mi_match(MI, *MRI, m_GOr(m_GOr(m_Reg(), m_Reg()), m_Reg())) || mi_match(MI, *MRI, m_GOr(m_GAnd(m_Reg(), m_Reg()), m_Reg()))) return false; + } else if (NumOpcodes < 4) { + // For a uniform case threshold should be higher to account for moves + // between VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be + // in SGPRs and a readtfirstlane after. + return false; } unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64; diff --git a/llvm/test/CodeGen/AMDGPU/bitop3.ll b/llvm/test/CodeGen/AMDGPU/bitop3.ll index b08ab5a2dc422..eb149a93ee328 100644 --- a/llvm/test/CodeGen/AMDGPU/bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/bitop3.ll @@ -52,8 +52,7 @@ define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: not_and_and_and: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 @@ -103,8 +102,7 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: and_and_not_and: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2 -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30 ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog %notc = xor i32 %c, -1 @@ -122,8 +120,7 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: and_and_and: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 ; GFX950-GISEL-NEXT: ; return to shader part epilog %and1 = and i32 %a, %c %and2 = and i32 %and1, %b @@ -141,8 +138,7 @@ define amdgpu_ps float @test_12(i32 %a, i32 %b) { ; ; GFX950-GISEL-LABEL: test_12: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc ; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %b @@ -214,9 +210,11 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: test_12_src_overflow: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 -; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0 -; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v0 +; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v2 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc +; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4 +; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8 ; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notc = xor i32 %c, -1 @@ -242,11 +240,9 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: test_100_src_overflow: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0 -; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3 -; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3 ; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 -; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4 +; GFX950-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30 ; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0 ; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2 ; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2