Skip to content

Commit ab96ec4

Browse files
committed
[AMDGPU] Precommit some test updates for D68338 "Remove dubious logic in bidirectional list scheduler"
1 parent bcda126 commit ab96ec4

14 files changed

+60
-61
lines changed

llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,8 +203,8 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
203203
; GFX9: buffer_store_dwordx4
204204

205205
; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
206-
; VI: flat_load_dword v[[A:[0-9]+]]
207-
; VI: flat_load_dword v[[B:[0-9]+]]
206+
; VI-DAG: flat_load_dword v[[A:[0-9]+]]
207+
; VI-DAG: flat_load_dword v[[B:[0-9]+]]
208208

209209
; VI-DAG: v_add_u16_e32
210210
; VI: v_add_u16_sdwa v[[ADD_HI:[0-9]+]], v[[A]], v[[B]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1

llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,8 +344,8 @@ define amdgpu_kernel void @v_test_canonicalize_fneg_var_v2f16(<2 x half> addrspa
344344
}
345345

346346
; GCN-LABEL: {{^}}s_test_canonicalize_var_v2f16:
347-
; VI: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
348-
; VI: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
347+
; VI-DAG: v_max_f16_sdwa [[REG0:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
348+
; VI-DAG: v_max_f16_e64 [[REG1:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+}}
349349
; VI-NOT: v_and_b32
350350

351351
; GFX9: v_pk_max_f16 [[REG:v[0-9]+]], {{s[0-9]+}}, {{s[0-9]+$}}

llvm/test/CodeGen/AMDGPU/fneg-combines.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float
115115
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
116116

117117
; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
118-
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
118+
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
119119
define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
120120
%tid = call i32 @llvm.amdgcn.workitem.id.x()
121121
%tid.ext = sext i32 %tid to i64
@@ -139,7 +139,7 @@ define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float
139139
; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
140140

141141
; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
142-
; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
142+
; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
143143
define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
144144
%tid = call i32 @llvm.amdgcn.workitem.id.x()
145145
%tid.ext = sext i32 %tid to i64
@@ -157,9 +157,9 @@ define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, fl
157157
}
158158

159159
; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32:
160-
; GCN-SAFE: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}}
161-
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162-
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
160+
; GCN-SAFE-DAG: s_brev_b32 [[SIGNBIT:s[0-9]+]], 1{{$}}
161+
; GCN-DAG: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
162+
; GCN-DAG: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
163163

164164
; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[SIGNBIT]], [[A]]
165165
; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
@@ -329,7 +329,7 @@ define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out
329329
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
330330
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
331331
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
332-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
332+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
333333
define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
334334
%tid = call i32 @llvm.amdgcn.workitem.id.x()
335335
%tid.ext = sext i32 %tid to i64
@@ -349,7 +349,7 @@ define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float
349349
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
350350
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
351351
; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
352-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
352+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
353353
define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
354354
%tid = call i32 @llvm.amdgcn.workitem.id.x()
355355
%tid.ext = sext i32 %tid to i64
@@ -369,7 +369,7 @@ define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float
369369
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
370370
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
371371
; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
372-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
372+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
373373
define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
374374
%tid = call i32 @llvm.amdgcn.workitem.id.x()
375375
%tid.ext = sext i32 %tid to i64
@@ -1902,7 +1902,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addr
19021902
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
19031903
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
19041904
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1905-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1905+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
19061906
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
19071907
%tid = call i32 @llvm.amdgcn.workitem.id.x()
19081908
%tid.ext = sext i32 %tid to i64
@@ -1922,7 +1922,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out
19221922
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
19231923
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
19241924
; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]]
1925-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1925+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
19261926
define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
19271927
%tid = call i32 @llvm.amdgcn.workitem.id.x()
19281928
%tid.ext = sext i32 %tid to i64
@@ -1942,7 +1942,7 @@ define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out
19421942
; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
19431943
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
19441944
; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]]
1945-
; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
1945+
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ADD]]
19461946
define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 {
19471947
%tid = call i32 @llvm.amdgcn.workitem.id.x()
19481948
%tid.ext = sext i32 %tid to i64

llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ define amdgpu_kernel void @fneg_fabs_fmul_f16(half addrspace(1)* %out, half %x,
4040
; unless isFabsFree returns true
4141

4242
; GCN-LABEL: {{^}}fneg_fabs_free_f16:
43-
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000
43+
; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
4444
define amdgpu_kernel void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in) {
4545
%bc = bitcast i16 %in to half
4646
%fabs = call half @llvm.fabs.f16(half %bc)
@@ -50,7 +50,7 @@ define amdgpu_kernel void @fneg_fabs_free_f16(half addrspace(1)* %out, i16 %in)
5050
}
5151

5252
; GCN-LABEL: {{^}}fneg_fabs_f16:
53-
; GCN: s_or_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000
53+
; GCN: {{s_or_b32 s[0-9]+, s[0-9]+, 0x8000|s_bitset1_b32 s[0-9]+, 15}}
5454
define amdgpu_kernel void @fneg_fabs_f16(half addrspace(1)* %out, half %in) {
5555
%fabs = call half @llvm.fabs.f16(half %in)
5656
%fsub = fsub half -0.0, %fabs

llvm/test/CodeGen/AMDGPU/global_smrd.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,10 @@ define amdgpu_kernel void @memdep(i32 addrspace(1)* %in, [8 x i32], i32 addrspac
8181
; uniform load from global array
8282
; CHECK-LABEL: @global_array
8383
; CHECK: s_getpc_b64 [[GET_PC:s\[[0-9]+:[0-9]+\]]]
84-
; CHECK: s_load_dwordx2 [[A_ADDR:s\[[0-9]+:[0-9]+\]]], [[GET_PC]], 0x0
85-
; CHECK: s_load_dwordx2 [[A_ADDR1:s\[[0-9]+:[0-9]+\]]], [[A_ADDR]], 0x0
86-
; CHECK: s_load_dwordx2 [[OUT:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0
87-
; CHECK: s_load_dword [[SVAL:s[0-9]+]], [[A_ADDR1]], 0x0
84+
; CHECK-DAG: s_load_dwordx2 [[A_ADDR:s\[[0-9]+:[0-9]+\]]], [[GET_PC]], 0x0
85+
; CHECK-DAG: s_load_dwordx2 [[A_ADDR1:s\[[0-9]+:[0-9]+\]]], [[A_ADDR]], 0x0
86+
; CHECK-DAG: s_load_dwordx2 [[OUT:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0
87+
; CHECK-DAG: s_load_dword [[SVAL:s[0-9]+]], [[A_ADDR1]], 0x0
8888
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
8989
; CHECK: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[VVAL]]
9090
@A = common local_unnamed_addr addrspace(1) global i32 addrspace(1)* null, align 4

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, i32 addrspace(1)* %
166166
; NOLOOP: s_mov_b32 m0, 0{{$}}
167167
; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds
168168
; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169-
; NOLOOP-NEXT: load_dword
169+
; NOLOOP: load_dword
170170
define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %ptr) #0 {
171171
call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
172172
%load = load volatile i32, i32 addrspace(1)* %ptr

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) {
304304
; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
305305
; GCN-NEXT: v_mov_b32_e32
306306
; GCN-NEXT: v_mov_b32_e32
307-
; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
307+
; GCN: {{global|flat|buffer}}_store_dwordx2
308308
define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
309309
%c0 = icmp ugt i32 %a, 1
310310
%c1 = icmp ugt i32 %b, 2

llvm/test/CodeGen/AMDGPU/load-hi16.ll

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@
55
; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_lo:
66
; GFX900: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77
; GFX900-NEXT: ds_read_u16 v2, v0
8-
; GFX900-NEXT: v_mov_b32_e32 v3, 0
9-
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
10-
; GFX900-NEXT: v_mov_b32_e32 v1, v2
11-
; GFX900-NEXT: ds_read_u16_d16_hi v1, v0 offset:16
12-
; GFX900-NEXT: ds_write_b16 v3, v2
8+
; GFX900-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
9+
; GFX900-DAG: s_waitcnt lgkmcnt(0)
10+
; GFX900-DAG: v_mov_b32_e32 v1, v2
11+
; GFX900-DAG: ds_read_u16_d16_hi v1, v0 offset:16
12+
; GFX900: ds_write_b16 [[ZERO]], v2
1313
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
1414
; GFX900-NEXT: v_mov_b32_e32 v0, v1
1515
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
@@ -27,14 +27,13 @@ entry:
2727

2828
; GCN-LABEL: {{^}}load_local_lo_hi_v2i16_multi_use_hi:
2929
; GFX900: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30-
; GFX900-NEXT: ds_read_u16 v1, v0
31-
; GFX900-NEXT: ds_read_u16 v0, v0 offset:16
32-
; GFX900-NEXT: v_mov_b32_e32 v2, 0
33-
; GFX900-NEXT: s_waitcnt lgkmcnt(1)
34-
; GFX900-NEXT: v_and_b32_e32 v1, 0xffff, v1
35-
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
36-
; GFX900-NEXT: ds_write_b16 v2, v0
37-
; GFX900-NEXT: v_lshl_or_b32 v0, v0, 16, v1
30+
; GFX900-DAG: ds_read_u16 [[LO:v[0-9]+]], v0
31+
; GFX900-DAG: ds_read_u16 [[HI:v[0-9]+]], v0 offset:16
32+
; GFX900-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
33+
; GFX900-DAG: v_and_b32_e32 [[AND:v[0-9]+]], 0xffff, [[LO]]
34+
; GFX900-DAG: s_waitcnt lgkmcnt(0)
35+
; GFX900-DAG: ds_write_b16 [[ZERO]], [[HI]]
36+
; GFX900: v_lshl_or_b32 [[HI]], [[HI]], 16, [[AND]]
3837
; GFX900-NEXT: s_waitcnt lgkmcnt(0)
3938
; GFX900-NEXT: s_setpc_b64 s[30:31]
4039
define <2 x i16> @load_local_lo_hi_v2i16_multi_use_hi(i16 addrspace(3)* noalias %in) #0 {

llvm/test/CodeGen/AMDGPU/mad_64_32.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ define i63 @mad_i64_i32_sextops_i32_i63(i32 %arg0, i32 %arg1, i63 %arg2) #0 {
101101
; CI: v_bfe_i32 v[[B1:[0-9]+]], v1, 0, 31
102102
; CI: v_ashr_i64
103103
; CI: v_bfe_i32 v[[B2:[0-9]+]], v0, 0, 31
104-
; CI: v_mad_i64_i32 v[0:1], s{{\[[0-9]+:[0-9]+\]}}, v[[B2]], v[[B1]], v[1:2]
104+
; CI: v_mad_i64_i32 v[0:1], s{{\[[0-9]+:[0-9]+\]}}, v[[B2]], v[[B1]], v{{\[[0-9]+:[0-9]+\]}}
105105
define i63 @mad_i64_i32_sextops_i31_i63(i31 %arg0, i31 %arg1, i63 %arg2) #0 {
106106
%sext0 = sext i31 %arg0 to i63
107107
%sext1 = sext i31 %arg1 to i63

llvm/test/CodeGen/AMDGPU/sad.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,10 +255,10 @@ define amdgpu_kernel void @v_sad_u32_i8_pat2(i8 addrspace(1)* %out) {
255255
; GCN-LABEL: {{^}}s_sad_u32_i8_pat2:
256256
; GCN: s_load_dword
257257
; GCN: s_bfe_u32
258-
; GCN: s_sub_i32
259-
; GCN: s_and_b32
260-
; GCN: s_sub_i32
261-
; GCN: s_lshr_b32
258+
; GCN-DAG: s_sub_i32
259+
; GCN-DAG: s_and_b32
260+
; GCN-DAG: s_sub_i32
261+
; GCN-DAG: s_lshr_b32
262262
; GCN: v_add_i32_e32
263263
define amdgpu_kernel void @s_sad_u32_i8_pat2(i8 addrspace(1)* %out, i8 zeroext %a, i8 zeroext %b, i8 zeroext %c) {
264264
%icmp0 = icmp ugt i8 %a, %b

0 commit comments

Comments
 (0)