Skip to content

Commit 40fb74a

Browse files
authored
[AMDGPU][True16][CodeGen] V_MUL_LO_U16 true16 test (#118118)
This is a NFC. Update and eable V_MUL_LO_U16 codegen test for true16/fake16 flow
1 parent 03b5f8f commit 40fb74a

File tree

2 files changed

+64
-18
lines changed

2 files changed

+64
-18
lines changed

llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx801 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
44
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
55
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
6+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-TRUE16 %s
7+
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16, -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-FAKE16 %s
78
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
89

910
define amdgpu_ps i16 @s_mul_i16(i16 inreg %num, i16 inreg %den) {
@@ -65,11 +66,23 @@ define i16 @v_mul_i16(i16 %num, i16 %den) {
6566
; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
6667
; GFX9-NEXT: s_setpc_b64 s[30:31]
6768
;
68-
; GFX10PLUS-LABEL: v_mul_i16:
69-
; GFX10PLUS: ; %bb.0:
70-
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71-
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
72-
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
69+
; GFX10-LABEL: v_mul_i16:
70+
; GFX10: ; %bb.0:
71+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72+
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
73+
; GFX10-NEXT: s_setpc_b64 s[30:31]
74+
;
75+
; GFX11-TRUE16-LABEL: v_mul_i16:
76+
; GFX11-TRUE16: ; %bb.0:
77+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78+
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
79+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
80+
;
81+
; GFX11-FAKE16-LABEL: v_mul_i16:
82+
; GFX11-FAKE16: ; %bb.0:
83+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84+
; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
85+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
7386
;
7487
; GFX12-LABEL: v_mul_i16:
7588
; GFX12: ; %bb.0:
@@ -147,12 +160,26 @@ define zeroext i16 @v_mul_i16_zeroext(i16 zeroext %num, i16 zeroext %den) {
147160
; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v1
148161
; GFX9-NEXT: s_setpc_b64 s[30:31]
149162
;
150-
; GFX10PLUS-LABEL: v_mul_i16_zeroext:
151-
; GFX10PLUS: ; %bb.0:
152-
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153-
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
154-
; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0
155-
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
163+
; GFX10-LABEL: v_mul_i16_zeroext:
164+
; GFX10: ; %bb.0:
165+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166+
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
167+
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0
168+
; GFX10-NEXT: s_setpc_b64 s[30:31]
169+
;
170+
; GFX11-TRUE16-LABEL: v_mul_i16_zeroext:
171+
; GFX11-TRUE16: ; %bb.0:
172+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173+
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
174+
; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
175+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
176+
;
177+
; GFX11-FAKE16-LABEL: v_mul_i16_zeroext:
178+
; GFX11-FAKE16: ; %bb.0:
179+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180+
; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
181+
; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
182+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
156183
;
157184
; GFX12-LABEL: v_mul_i16_zeroext:
158185
; GFX12: ; %bb.0:
@@ -236,12 +263,26 @@ define signext i16 @v_mul_i16_signext(i16 signext %num, i16 signext %den) {
236263
; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16
237264
; GFX9-NEXT: s_setpc_b64 s[30:31]
238265
;
239-
; GFX10PLUS-LABEL: v_mul_i16_signext:
240-
; GFX10PLUS: ; %bb.0:
241-
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242-
; GFX10PLUS-NEXT: v_mul_lo_u16 v0, v0, v1
243-
; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16
244-
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
266+
; GFX10-LABEL: v_mul_i16_signext:
267+
; GFX10: ; %bb.0:
268+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269+
; GFX10-NEXT: v_mul_lo_u16 v0, v0, v1
270+
; GFX10-NEXT: v_bfe_i32 v0, v0, 0, 16
271+
; GFX10-NEXT: s_setpc_b64 s[30:31]
272+
;
273+
; GFX11-TRUE16-LABEL: v_mul_i16_signext:
274+
; GFX11-TRUE16: ; %bb.0:
275+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276+
; GFX11-TRUE16-NEXT: v_mul_lo_u16 v0.l, v0.l, v1.l
277+
; GFX11-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
278+
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
279+
;
280+
; GFX11-FAKE16-LABEL: v_mul_i16_signext:
281+
; GFX11-FAKE16: ; %bb.0:
282+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283+
; GFX11-FAKE16-NEXT: v_mul_lo_u16 v0, v0, v1
284+
; GFX11-FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
285+
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
245286
;
246287
; GFX12-LABEL: v_mul_i16_signext:
247288
; GFX12: ; %bb.0:

llvm/test/CodeGen/AMDGPU/mul.i16.ll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,18 @@
11
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
22
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,GFX89 %s
33
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,GFX89 %s
4+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-TRUE16 %s
5+
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX11-FAKE16 %s
46

57
; GCN-LABEL: {{^}}v_mul_i16:
68
; SI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
79
; SI: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}}
810
; SI: v_mul_u32_u24
911

1012
; GFX89: v_mul_lo_u16_e32 v0, v0, v1
13+
14+
; GFX11-TRUE16: v_mul_lo_u16 v0.l, v0.l, v0.h
15+
; GFX11-FAKE16: v_mul_lo_u16 v0, v0, v1
1116
define i16 @v_mul_i16(i16 %a, i16 %b) {
1217
%r.val = mul i16 %a, %b
1318
ret i16 %r.val

0 commit comments

Comments
 (0)