Skip to content

Commit 2baeb40

Browse files
Update waterfall tests due to upstream
Upstream change: llvm#133229
1 parent ab46c8e commit 2baeb40

File tree

2 files changed

+41
-32
lines changed

2 files changed

+41
-32
lines changed

llvm/test/CodeGen/AMDGPU/amdgcn.waterfall.atomic.opt.ll

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ define dllexport amdgpu_cs void @atomic_add_in_wf(ptr addrspace(1) %arg, i32 inr
3434
; GFX11: ; %bb.0: ; %bb
3535
; GFX11-NEXT: s_ashr_i32 s3, s0, 31
3636
; GFX11-NEXT: v_add_co_u32 v0, vcc, v0, s0
37-
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc
37+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
38+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s3, v1, vcc
3839
; GFX11-NEXT: s_mov_b64 s[4:5], exec
3940
; GFX11-NEXT: global_load_b32 v0, v[0:1], off offset:4
4041
; GFX11-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
@@ -123,7 +124,8 @@ define dllexport amdgpu_cs void @atomic_add_before(ptr addrspace(1) %arg, i32 in
123124
; GFX11: ; %bb.0: ; %bb
124125
; GFX11-NEXT: s_ashr_i32 s6, s0, 31
125126
; GFX11-NEXT: v_add_co_u32 v0, vcc, v0, s0
126-
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc, s6, v1, vcc
127+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
128+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s6, v1, vcc
127129
; GFX11-NEXT: s_mov_b64 s[4:5], exec
128130
; GFX11-NEXT: s_mov_b64 s[6:7], exec
129131
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off
@@ -235,7 +237,8 @@ define dllexport amdgpu_cs void @atomic_add_after(ptr addrspace(1) %arg, i32 inr
235237
; GFX11: ; %bb.0: ; %bb
236238
; GFX11-NEXT: s_ashr_i32 s4, s0, 31
237239
; GFX11-NEXT: v_add_co_u32 v0, vcc, v0, s0
238-
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc, s4, v1, vcc
240+
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
241+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s4, v1, vcc
239242
; GFX11-NEXT: s_mov_b64 s[4:5], exec
240243
; GFX11-NEXT: global_load_b32 v1, v[0:1], off offset:4
241244
; GFX11-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
@@ -344,7 +347,7 @@ define dllexport amdgpu_cs void @atomic_add_in_wf_partial(ptr addrspace(1) %arg,
344347
; GFX11-NEXT: s_addc_u32 s5, s2, s5
345348
; GFX11-NEXT: v_add_co_u32 v0, vcc, v0, s0
346349
; GFX11-NEXT: s_load_b128 s[4:7], s[4:5], 0x0
347-
; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc, s8, v1, vcc
350+
; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s8, v1, vcc
348351
; GFX11-NEXT: v_mov_b32_e32 v2, 1
349352
; GFX11-NEXT: v_mov_b32_e32 v3, 0
350353
; GFX11-NEXT: global_load_b64 v[0:1], v[0:1], off

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.waterfall.ll

Lines changed: 34 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ define amdgpu_ps void @test_waterfall_readlane(i32 addrspace(1)* inreg %out, <2
116116
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
117117
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
118118
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
119-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
120-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc
119+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
120+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s3, v1, vcc
121121
; GFX1150-NEXT: s_mov_b64 s[2:3], exec
122122
; GFX1150-NEXT: global_load_b64 v[0:1], v[0:1], off
123123
; GFX1150-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
@@ -486,8 +486,8 @@ define amdgpu_ps <4 x float> @test_waterfall_non_uniform_img_single_read(<8 x i3
486486
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
487487
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[4:5]
488488
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
489-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
490-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
489+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
490+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
491491
; GFX1150-NEXT: s_mov_b64 s[0:1], exec
492492
; GFX1150-NEXT: s_clause 0x1
493493
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
@@ -1283,8 +1283,8 @@ define amdgpu_ps void @test_waterfall_non_uniform_img_single_store(<8 x i32> add
12831283
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
12841284
; GFX1150-NEXT: v_lshlrev_b64 v[7:8], 5, v[0:1]
12851285
; GFX1150-NEXT: v_add_co_u32 v7, vcc, s0, v7
1286-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
1287-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v8, vcc, s1, v8, vcc
1286+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
1287+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v8, null, s1, v8, vcc
12881288
; GFX1150-NEXT: s_mov_b64 s[0:1], exec
12891289
; GFX1150-NEXT: s_clause 0x1
12901290
; GFX1150-NEXT: global_load_b128 v[11:14], v[7:8], off offset:16
@@ -2189,10 +2189,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin(<8 x i32> addrspace(4)*
21892189
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
21902190
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[7:8]
21912191
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
2192-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2193-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
2192+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
2193+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
21942194
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
2195-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
2195+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
2196+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
21962197
; GFX1150-NEXT: s_clause 0x1
21972198
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
21982199
; GFX1150-NEXT: global_load_b128 v[7:10], v[0:1], off
@@ -2549,10 +2550,11 @@ define amdgpu_ps <4 x float> @test_waterfall_full_idx_multi_begin(<8 x i32> addr
25492550
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
25502551
; GFX1150-NEXT: v_lshlrev_b64 v[3:4], 5, v[3:4]
25512552
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s0, v3
2552-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
2553-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s1, v4, vcc
2553+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
2554+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s1, v4, vcc
25542555
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
2555-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc
2556+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
2557+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s3, v1, vcc
25562558
; GFX1150-NEXT: s_clause 0x1
25572559
; GFX1150-NEXT: global_load_b128 v[8:11], v[2:3], off offset:16
25582560
; GFX1150-NEXT: global_load_b128 v[4:7], v[2:3], off
@@ -2803,9 +2805,9 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_1(<8 x i32>
28032805
; GFX1150-NEXT: s_ashr_i32 s5, s5, 31
28042806
; GFX1150-NEXT: s_mov_b64 s[10:11], exec
28052807
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 4, v[1:2]
2806-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
2808+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
28072809
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
2808-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc
2810+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s3, v1, vcc
28092811
; GFX1150-NEXT: s_lshl_b64 s[2:3], s[4:5], 5
28102812
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
28112813
; GFX1150-NEXT: s_add_u32 s0, s0, s2
@@ -3053,8 +3055,8 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_2(<8 x i32>
30533055
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
30543056
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[1:2]
30553057
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3056-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2)
3057-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
3058+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3059+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
30583060
; GFX1150-NEXT: s_lshl_b64 s[0:1], s[4:5], 4
30593061
; GFX1150-NEXT: s_mov_b64 s[4:5], exec
30603062
; GFX1150-NEXT: s_add_u32 s0, s2, s0
@@ -3332,10 +3334,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_3(<8 x i32>
33323334
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
33333335
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
33343336
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3335-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3336-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
3337+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
3338+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
33373339
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3338-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3340+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3341+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
33393342
; GFX1150-NEXT: s_clause 0x1
33403343
; GFX1150-NEXT: global_load_b128 v[9:12], v[0:1], off offset:16
33413344
; GFX1150-NEXT: global_load_b128 v[5:8], v[0:1], off
@@ -3615,10 +3618,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_4(<8 x i32>
36153618
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
36163619
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
36173620
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3618-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3619-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
3621+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
3622+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
36203623
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3621-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3624+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3625+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
36223626
; GFX1150-NEXT: s_clause 0x1
36233627
; GFX1150-NEXT: global_load_b128 v[9:12], v[0:1], off offset:16
36243628
; GFX1150-NEXT: global_load_b128 v[5:8], v[0:1], off
@@ -3962,10 +3966,11 @@ define amdgpu_ps {<4 x float>,<4 x float>} @test_waterfall_multi_begin_uniform_i
39623966
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
39633967
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[7:8]
39643968
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3965-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
3966-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
3969+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
3970+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
39673971
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3968-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3972+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3973+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
39693974
; GFX1150-NEXT: s_clause 0x1
39703975
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
39713976
; GFX1150-NEXT: global_load_b128 v[15:18], v[0:1], off
@@ -4604,10 +4609,11 @@ define amdgpu_ps {<4 x float>,<4 x float>} @test_waterfall_multi_end_1loop(
46044609
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
46054610
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
46064611
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
4607-
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4)
4608-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s1, v1, vcc
4612+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_4)
4613+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s1, v1, vcc
46094614
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
4610-
; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
4615+
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
4616+
; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
46114617
; GFX1150-NEXT: s_clause 0x1
46124618
; GFX1150-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
46134619
; GFX1150-NEXT: global_load_b128 v[10:13], v[0:1], off

0 commit comments

Comments
 (0)