Skip to content

Commit 47fadcd

Browse files
Update AMDGPU tests
1 parent f1fee8e commit 47fadcd

File tree

7 files changed

+1377
-739
lines changed

7 files changed

+1377
-739
lines changed

llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,9 @@ define i16 @add1_i16(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture
7070
; GFX9-NEXT: global_load_dword v0, v[0:1], off
7171
; GFX9-NEXT: v_bfe_u32 v1, v31, 10, 10
7272
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v2, v1
73+
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
7374
; GFX9-NEXT: s_waitcnt vmcnt(0)
74-
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v0, vcc
75+
; GFX9-NEXT: v_add_u16_e32 v0, v0, v1
7576
; GFX9-NEXT: s_setpc_b64 s[30:31]
7677
bb:
7778
%x = tail call i32 @llvm.amdgcn.workitem.id.x()

llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ define i16 @num_sign_bits_mul_i48_0(i8 %X, i8 %Y, i8 %Z, i8 %W) {
55
; GFX9-LABEL: num_sign_bits_mul_i48_0:
66
; GFX9: ; %bb.0:
77
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8-
; GFX9-NEXT: v_mul_i32_i24_sdwa v2, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
9-
; GFX9-NEXT: v_mul_i32_i24_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
10-
; GFX9-NEXT: v_mul_i32_i24_e32 v0, v0, v2
8+
; GFX9-NEXT: v_mul_lo_u16_sdwa v2, sext(v2), sext(v3) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
9+
; GFX9-NEXT: v_mul_lo_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0
10+
; GFX9-NEXT: v_mul_lo_u16_e32 v0, v0, v2
1111
; GFX9-NEXT: s_setpc_b64 s[30:31]
1212
%A = sext i8 %X to i48
1313
%B = sext i8 %Y to i48

llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4030,7 +4030,7 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40304030
; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
40314031
; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
40324032
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4033-
; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4033+
; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
40344034
; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
40354035
; VI-NEXT: v_or_b32_e32 v0, v1, v0
40364036
; VI-NEXT: s_setpc_b64 s[30:31]
@@ -4045,19 +4045,22 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40454045
; GFX11-NEXT: v_bfe_u32 v3, v1, 20, 11
40464046
; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v1
40474047
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
4048-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4048+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
40494049
; GFX11-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
4050+
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff8000, v1
40504051
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
4052+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
40514053
; GFX11-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
4052-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
40534054
; GFX11-NEXT: v_med3_i32 v2, v4, 0, 13
4055+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
40544056
; GFX11-NEXT: v_or_b32_e32 v4, 0x1000, v0
40554057
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
40564058
; GFX11-NEXT: v_lshrrev_b32_e32 v5, v2, v4
4059+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
40574060
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v5
4058-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
40594061
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
40604062
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
4063+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
40614064
; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
40624065
; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
40634066
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
@@ -4078,11 +4081,12 @@ define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
40784081
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3)
40794082
; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x7c00, v5, vcc_lo
40804083
; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 31, v3
4084+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
40814085
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
40824086
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4083-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
40844087
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4085-
; GFX11-NEXT: v_and_or_b32 v0, 0x8000, v1, v0
4088+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4089+
; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
40864090
; GFX11-NEXT: s_setpc_b64 s[30:31]
40874091
%fpround = fptrunc double %a to half
40884092
%fneg = fneg half %fpround
@@ -4180,7 +4184,7 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
41804184
; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
41814185
; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
41824186
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4183-
; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4187+
; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
41844188
; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
41854189
; VI-NEXT: v_or_b32_e32 v0, v1, v0
41864190
; VI-NEXT: s_setpc_b64 s[30:31]
@@ -4195,20 +4199,22 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
41954199
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
41964200
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
41974201
; GFX11-NEXT: v_sub_nc_u32_e32 v4, 0x3f1, v3
4202+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
4203+
; GFX11-NEXT: v_and_b32_e32 v1, 0xffff8000, v1
41984204
; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
4199-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
42004205
; GFX11-NEXT: v_and_or_b32 v0, 0xffe, v2, v0
4206+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
42014207
; GFX11-NEXT: v_med3_i32 v2, v4, 0, 13
42024208
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
42034209
; GFX11-NEXT: v_or_b32_e32 v4, 0x1000, v0
42044210
; GFX11-NEXT: v_lshrrev_b32_e32 v5, v2, v4
4205-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
42064211
; GFX11-NEXT: v_lshlrev_b32_e32 v2, v2, v5
4212+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
42074213
; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v2, v4
42084214
; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
4209-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
42104215
; GFX11-NEXT: v_or_b32_e32 v2, v5, v2
42114216
; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xfffffc10, v3
4217+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
42124218
; GFX11-NEXT: v_lshl_or_b32 v4, v3, 12, v0
42134219
; GFX11-NEXT: v_cmp_gt_i32_e32 vcc_lo, 1, v3
42144220
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
@@ -4229,9 +4235,9 @@ define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
42294235
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
42304236
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
42314237
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4238+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
42324239
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4233-
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
4234-
; GFX11-NEXT: v_and_or_b32 v0, 0x8000, v1, v0
4240+
; GFX11-NEXT: v_or_b32_e32 v0, v1, v0
42354241
; GFX11-NEXT: s_setpc_b64 s[30:31]
42364242
%fneg.a = fneg double %a
42374243
%fpround = fptrunc double %fneg.a to half
@@ -4336,7 +4342,7 @@ define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0
43364342
; VI-NEXT: v_cndmask_b32_e32 v0, v6, v7, vcc
43374343
; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v5
43384344
; VI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
4339-
; VI-NEXT: v_mov_b32_e32 v4, 0x8000
4345+
; VI-NEXT: v_mov_b32_e32 v4, 0xffff8000
43404346
; VI-NEXT: v_xor_b32_e32 v2, 0x80000000, v1
43414347
; VI-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
43424348
; VI-NEXT: v_or_b32_e32 v0, v1, v0
@@ -4599,7 +4605,7 @@ define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, do
45994605
; VI-NEXT: v_cndmask_b32_e32 v4, v7, v8, vcc
46004606
; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v6
46014607
; VI-NEXT: v_cndmask_b32_e32 v0, v5, v4, vcc
4602-
; VI-NEXT: v_mov_b32_e32 v4, 0x8000
4608+
; VI-NEXT: v_mov_b32_e32 v4, 0xffff8000
46034609
; VI-NEXT: v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
46044610
; VI-NEXT: v_or_b32_e32 v0, v1, v0
46054611
; VI-NEXT: v_mov_b32_e32 v1, v2
@@ -4757,7 +4763,7 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
47574763
; VI-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc
47584764
; VI-NEXT: v_cmp_eq_u32_e32 vcc, s4, v3
47594765
; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
4760-
; VI-NEXT: v_mov_b32_e32 v2, 0x8000
4766+
; VI-NEXT: v_mov_b32_e32 v2, 0xffff8000
47614767
; VI-NEXT: v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
47624768
; VI-NEXT: v_or_b32_e32 v1, v1, v0
47634769
; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v1
@@ -4807,9 +4813,9 @@ define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
48074813
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
48084814
; GFX11-NEXT: v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
48094815
; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
4810-
; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc_lo
4816+
; GFX11-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_and_b32 v1, 0xffff8000, v1
48114817
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4812-
; GFX11-NEXT: v_and_or_b32 v1, 0x8000, v1, v0
4818+
; GFX11-NEXT: v_or_b32_e32 v1, v1, v0
48134819
; GFX11-NEXT: v_xor_b32_e32 v0, 0x8000, v1
48144820
; GFX11-NEXT: s_setpc_b64 s[30:31]
48154821
; GFX11-SAFE-TRUE16-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:

0 commit comments

Comments
 (0)