diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll index 07c94362d0439..756eb2788607b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll @@ -3,7 +3,8 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s define amdgpu_ps i16 @s_mul_i16(i16 inreg %num, i16 inreg %den) { @@ -65,11 +66,23 @@ define i16 @v_mul_i16(i16 %num, i16 %den) { ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_mul_i16: -; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: v_mul_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: v_mul_i16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mul_i16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_mul_i16: ; GFX12: ; %bb.0: @@ -147,12 +160,26 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) { ; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_mul_i16_zeroext: -; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: v_mul_i16_zeroext: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: v_mul_i16_zeroext: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mul_i16_zeroext: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_mul_i16_zeroext: ; GFX12: ; %bb.0: @@ -236,12 +263,26 @@ define signext i16 @v_mul_i16_signext(i16 signext %num, i16 signext %den) { ; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_mul_i16_signext: -; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1 -; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: v_mul_i16_signext: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-TRUE16-LABEL: v_mul_i16_signext: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_mul_i16_signext: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1 +; GFX11-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: v_mul_i16_signext: ; GFX12: ; %bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/mul.i16.ll b/llvm/test/CodeGen/AMDGPU/mul.i16.ll index 98c3867b11f10..2ba719c4138c6 100644 --- a/llvm/test/CodeGen/AMDGPU/mul.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/mul.i16.ll @@ -1,6 +1,8 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-FAKE16 %s ; GCN-LABEL: {{^}}v_mul_i16: ; SI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} @@ -8,6 +10,9 @@ ; SI: v_mul_u32_u24 ; GFX89: v_mul_lo_u16_e32 v0, v0, v1 + +; GFX11-TRUE16: v_mul_lo_u16 v0.l, v0.l, v0.h +; GFX11-FAKE16: v_mul_lo_u16 v0, v0, v1 define i16 @v_mul_i16(i16 %a, i16 %b) { %r.val = mul i16 %a, %b ret i16 %r.val