@@ -116,8 +116,8 @@ define amdgpu_ps void @test_waterfall_readlane(i32 addrspace(1)* inreg %out, <2
116
116
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
117
117
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
118
118
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
119
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 )
120
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s3, v1, vcc
119
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 )
120
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s3, v1, vcc
121
121
; GFX1150-NEXT: s_mov_b64 s[2:3], exec
122
122
; GFX1150-NEXT: global_load_b64 v[0:1], v[0:1], off
123
123
; GFX1150-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
@@ -486,8 +486,8 @@ define amdgpu_ps <4 x float> @test_waterfall_non_uniform_img_single_read(<8 x i3
486
486
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
487
487
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[4:5]
488
488
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
489
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 )
490
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
489
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 )
490
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
491
491
; GFX1150-NEXT: s_mov_b64 s[0:1], exec
492
492
; GFX1150-NEXT: s_clause 0x1
493
493
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
@@ -1283,8 +1283,8 @@ define amdgpu_ps void @test_waterfall_non_uniform_img_single_store(<8 x i32> add
1283
1283
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1284
1284
; GFX1150-NEXT: v_lshlrev_b64 v[7:8], 5, v[0:1]
1285
1285
; GFX1150-NEXT: v_add_co_u32 v7, vcc, s0, v7
1286
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 )
1287
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v8, vcc , s1, v8, vcc
1286
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 )
1287
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v8, null , s1, v8, vcc
1288
1288
; GFX1150-NEXT: s_mov_b64 s[0:1], exec
1289
1289
; GFX1150-NEXT: s_clause 0x1
1290
1290
; GFX1150-NEXT: global_load_b128 v[11:14], v[7:8], off offset:16
@@ -2189,10 +2189,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin(<8 x i32> addrspace(4)*
2189
2189
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2190
2190
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[7:8]
2191
2191
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
2192
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
2193
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
2192
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
2193
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
2194
2194
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
2195
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
2195
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
2196
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
2196
2197
; GFX1150-NEXT: s_clause 0x1
2197
2198
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
2198
2199
; GFX1150-NEXT: global_load_b128 v[7:10], v[0:1], off
@@ -2549,10 +2550,11 @@ define amdgpu_ps <4 x float> @test_waterfall_full_idx_multi_begin(<8 x i32> addr
2549
2550
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
2550
2551
; GFX1150-NEXT: v_lshlrev_b64 v[3:4], 5, v[3:4]
2551
2552
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s0, v3
2552
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
2553
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc , s1, v4, vcc
2553
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
2554
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null , s1, v4, vcc
2554
2555
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
2555
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc, s3, v1, vcc
2556
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
2557
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null, s3, v1, vcc
2556
2558
; GFX1150-NEXT: s_clause 0x1
2557
2559
; GFX1150-NEXT: global_load_b128 v[8:11], v[2:3], off offset:16
2558
2560
; GFX1150-NEXT: global_load_b128 v[4:7], v[2:3], off
@@ -2803,9 +2805,9 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_1(<8 x i32>
2803
2805
; GFX1150-NEXT: s_ashr_i32 s5, s5, 31
2804
2806
; GFX1150-NEXT: s_mov_b64 s[10:11], exec
2805
2807
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 4, v[1:2]
2806
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2 )
2808
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1 )
2807
2809
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s2, v0
2808
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s3, v1, vcc
2810
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s3, v1, vcc
2809
2811
; GFX1150-NEXT: s_lshl_b64 s[2:3], s[4:5], 5
2810
2812
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2811
2813
; GFX1150-NEXT: s_add_u32 s0, s0, s2
@@ -3053,8 +3055,8 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_2(<8 x i32>
3053
3055
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3054
3056
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[1:2]
3055
3057
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3056
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 )
3057
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
3058
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 )
3059
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
3058
3060
; GFX1150-NEXT: s_lshl_b64 s[0:1], s[4:5], 4
3059
3061
; GFX1150-NEXT: s_mov_b64 s[4:5], exec
3060
3062
; GFX1150-NEXT: s_add_u32 s0, s2, s0
@@ -3332,10 +3334,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_3(<8 x i32>
3332
3334
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3333
3335
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
3334
3336
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3335
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3336
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
3337
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3338
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
3337
3339
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3338
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3340
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3341
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
3339
3342
; GFX1150-NEXT: s_clause 0x1
3340
3343
; GFX1150-NEXT: global_load_b128 v[9:12], v[0:1], off offset:16
3341
3344
; GFX1150-NEXT: global_load_b128 v[5:8], v[0:1], off
@@ -3615,10 +3618,11 @@ define amdgpu_ps <4 x float> @test_waterfall_multi_begin_uniform_idx_4(<8 x i32>
3615
3618
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3616
3619
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
3617
3620
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3618
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3619
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
3621
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3622
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
3620
3623
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3621
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3624
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3625
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
3622
3626
; GFX1150-NEXT: s_clause 0x1
3623
3627
; GFX1150-NEXT: global_load_b128 v[9:12], v[0:1], off offset:16
3624
3628
; GFX1150-NEXT: global_load_b128 v[5:8], v[0:1], off
@@ -3962,10 +3966,11 @@ define amdgpu_ps {<4 x float>,<4 x float>} @test_waterfall_multi_begin_uniform_i
3962
3966
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3963
3967
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[7:8]
3964
3968
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
3965
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3966
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
3969
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
3970
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
3967
3971
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
3968
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
3972
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
3973
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
3969
3974
; GFX1150-NEXT: s_clause 0x1
3970
3975
; GFX1150-NEXT: global_load_b128 v[11:14], v[0:1], off offset:16
3971
3976
; GFX1150-NEXT: global_load_b128 v[15:18], v[0:1], off
@@ -4604,10 +4609,11 @@ define amdgpu_ps {<4 x float>,<4 x float>} @test_waterfall_multi_end_1loop(
4604
4609
; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4605
4610
; GFX1150-NEXT: v_lshlrev_b64 v[0:1], 5, v[5:6]
4606
4611
; GFX1150-NEXT: v_add_co_u32 v0, vcc, s0, v0
4607
- ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_2 ) | instskip(NEXT) | instid1(VALU_DEP_4)
4608
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v1, vcc , s1, v1, vcc
4612
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1 ) | instskip(NEXT) | instid1(VALU_DEP_4)
4613
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v1, null , s1, v1, vcc
4609
4614
; GFX1150-NEXT: v_add_co_u32 v2, vcc, s2, v2
4610
- ; GFX1150-NEXT: v_add_co_ci_u32_e32 v3, vcc, s3, v3, vcc
4615
+ ; GFX1150-NEXT: s_delay_alu instid0(VALU_DEP_1)
4616
+ ; GFX1150-NEXT: v_add_co_ci_u32_e64 v3, null, s3, v3, vcc
4611
4617
; GFX1150-NEXT: s_clause 0x1
4612
4618
; GFX1150-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16
4613
4619
; GFX1150-NEXT: global_load_b128 v[10:13], v[0:1], off
0 commit comments