diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ca4a0fa706c30..6bbf19179b7f6 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1261,23 +1261,39 @@ class ZExt_i16_i1_Pat : GCNPat < $src) >; -foreach vt = [i16, v2i16] in { +foreach p = [NotHasTrue16BitInsts, UseFakeTrue16Insts] in +let True16Predicate = p in { def : GCNPat < - (and vt:$src0, vt:$src1), + (and i16:$src0, i16:$src1), (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; def : GCNPat < - (or vt:$src0, vt:$src1), + (or i16:$src0, i16:$src1), (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; def : GCNPat < - (xor vt:$src0, vt:$src1), + (xor i16:$src0, i16:$src1), (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) >; } +def : GCNPat < + (and v2i16:$src0, v2i16:$src1), + (V_AND_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (or v2i16:$src0, v2i16:$src1), + (V_OR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + +def : GCNPat < + (xor v2i16:$src0, v2i16:$src1), + (V_XOR_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1) +>; + let Predicates = [Has16BitInsts, isGFX8GFX9] in { // Undo sub x, c -> add x, -c canonicalization since c is more likely diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll index 2775de29368fb..572793e1c5d71 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll @@ -42,12 +42,10 @@ define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) { ; GFX11-TRUE16-LABEL: v_uaddsat_i8: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.h -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_min_u16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll index 775602ab80cde..75866e33da23a 100644 --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -39,9 +39,8 @@ define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) { ; GFX11-TRUE16-LABEL: v_usubsat_i8: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xff, v0 -; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 0xff, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 0xff, v0.l ; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, v0.l, v0.h clamp ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ;