diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 71d23f9fe30c4..3be865f03df1f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3764,10 +3764,15 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const { if (!Subtarget->hasBitOp3Insts()) return false; + Register DstReg = MI.getOperand(0).getReg(); + const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); + const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; + if (!IsVALU) + return false; + SmallVector Src; uint8_t TTbl; unsigned NumOpcodes; - Register DstReg = MI.getOperand(0).getReg(); std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI); @@ -3776,13 +3781,10 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const { if (NumOpcodes < 2 || Src.empty()) return false; - const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI); - const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID; - // For a uniform case threshold should be higher to account for moves between // VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs // and a readtfirstlane after. - if (NumOpcodes < 4 && !IsVALU) + if (NumOpcodes < 4) return false; bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32); diff --git a/llvm/test/CodeGen/AMDGPU/bitop3.ll b/llvm/test/CodeGen/AMDGPU/bitop3.ll index dd608ef0e5a53..b08ab5a2dc422 100644 --- a/llvm/test/CodeGen/AMDGPU/bitop3.ll +++ b/llvm/test/CodeGen/AMDGPU/bitop3.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s -; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s +; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s ; ========= Single bit functions ========= @@ -45,10 +45,17 @@ define amdgpu_ps float @not_and_and_not_and(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: not_and_and_and: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: not_and_and_and: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: not_and_and_and: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %c %and2 = and i32 %and1, %b @@ -70,10 +77,17 @@ define amdgpu_ps float @and_not_and_not_and(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: and_not_and_and: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: and_not_and_and: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: and_not_and_and: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %notb = xor i32 %b, -1 %and1 = and i32 %a, %c %and2 = and i32 %and1, %notb @@ -82,10 +96,17 @@ define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: and_and_not_and: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: and_and_not_and: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: and_and_not_and: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %notc = xor i32 %c, -1 %and1 = and i32 %a, %notc %and2 = and i32 %and1, %b @@ -94,10 +115,16 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: and_and_and: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: and_and_and: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: and_and_and: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %and1 = and i32 %a, %c %and2 = and i32 %and1, %b %ret_cast = bitcast i32 %and2 to float @@ -107,10 +134,16 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) { ; ========= Multi bit functions ========= define amdgpu_ps float @test_12(i32 %a, i32 %b) { -; GCN-LABEL: test_12: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: test_12: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: test_12: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %and1 = and i32 %nota, %b %ret_cast = bitcast i32 %and1 to float @@ -118,10 +151,17 @@ define amdgpu_ps float @test_12(i32 %a, i32 %b) { } define amdgpu_ps float @test_63(i32 %a, i32 %b) { -; GCN-LABEL: test_63: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: test_63: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: test_63: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 +; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1 +; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notb = xor i32 %b, -1 %or = or i32 %nota, %notb @@ -143,10 +183,17 @@ define amdgpu_ps float @test_59(i32 %a, i32 %b, i32 %c) { } define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: test_126: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: test_126: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: test_126: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, v0, v1 +; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX950-GISEL-NEXT: ; return to shader part epilog %xor1 = xor i32 %a, %b %xor2 = xor i32 %a, %c %or = or i32 %xor1, %xor2 @@ -167,9 +214,9 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) { ; ; GFX950-GISEL-LABEL: test_12_src_overflow: ; GFX950-GISEL: ; %bb.0: -; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v0, v2, v0 bitop3:0xc -; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:3 -; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v3, v1, v0 bitop3:0xc8 +; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0 +; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 ; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notc = xor i32 %c, -1 @@ -185,13 +232,27 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) { ; This could be a single LOP3 operation with tbl = 100, but Src vector exhausted during search. define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) { -; GCN-LABEL: test_100_src_overflow: -; GCN: ; %bb.0: -; GCN-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10 -; GCN-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40 -; GCN-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20 -; GCN-NEXT: v_or3_b32 v0, v3, v4, v0 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: test_100_src_overflow: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10 +; GFX950-SDAG-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40 +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20 +; GFX950-SDAG-NEXT: v_or3_b32 v0, v3, v4, v0 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: test_100_src_overflow: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0 +; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3 +; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1 +; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3 +; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2 +; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX950-GISEL-NEXT: v_or3_b32 v0, v3, v4, v0 +; GFX950-GISEL-NEXT: ; return to shader part epilog %or1 = or i32 %c, %a %not1 = xor i32 %or1, -1 %and1 = and i32 %b, %not1 @@ -260,12 +321,19 @@ define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { } define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) { -; GCN-LABEL: uniform_4_op: -; GCN: ; %bb.0: -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mov_b32_e32 v1, s2 -; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2 -; GCN-NEXT: ; return to shader part epilog +; GFX950-SDAG-LABEL: uniform_4_op: +; GFX950-SDAG: ; %bb.0: +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s1 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s2 +; GFX950-SDAG-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2 +; GFX950-SDAG-NEXT: ; return to shader part epilog +; +; GFX950-GISEL-LABEL: uniform_4_op: +; GFX950-GISEL: ; %bb.0: +; GFX950-GISEL-NEXT: s_andn2_b32 s0, s2, s0 +; GFX950-GISEL-NEXT: s_andn2_b32 s0, s0, s1 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s0 +; GFX950-GISEL-NEXT: ; return to shader part epilog %nota = xor i32 %a, -1 %notb = xor i32 %b, -1 %and1 = and i32 %nota, %c