@@ -147,14 +147,13 @@ define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr {
147147; GFX8-OPT-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24 
148148; GFX8-OPT-NEXT:    v_mov_b32_e32 v2, 0 
149149; GFX8-OPT-NEXT:    s_waitcnt lgkmcnt(0) 
150- ; GFX8-OPT-NEXT:    s_barrier 
151- ; GFX8-OPT-NEXT:    v_add_u32_e32 v1, vcc, v1, v1 
152- ; GFX8-OPT-NEXT:    s_nop 1 
153- ; GFX8-OPT-NEXT:    v_mov_b32_dpp v2, v1 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
154- ; GFX8-OPT-NEXT:    v_add_u32_e32 v2, vcc, v2, v1 
155- ; GFX8-OPT-NEXT:    v_mov_b32_e32 v1, s1 
150+ ; GFX8-OPT-NEXT:    v_add_u32_e32 v4, vcc, v1, v1 
151+ ; GFX8-OPT-NEXT:    v_mov_b32_e32 v3, s1 
156152; GFX8-OPT-NEXT:    v_add_u32_e32 v0, vcc, s0, v0 
157- ; GFX8-OPT-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc 
153+ ; GFX8-OPT-NEXT:    v_mov_b32_dpp v2, v4 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
154+ ; GFX8-OPT-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc 
155+ ; GFX8-OPT-NEXT:    v_add_u32_e32 v2, vcc, v2, v4 
156+ ; GFX8-OPT-NEXT:    s_barrier 
158157; GFX8-OPT-NEXT:    flat_store_dword v[0:1], v2 
159158; GFX8-OPT-NEXT:    s_endpgm 
160159; 
@@ -194,14 +193,14 @@ define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr {
194193; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24 
195194; GFX10-NEXT:    v_mov_b32_e32 v2, 0 
196195; GFX10-NEXT:    ds_read_b32 v1, v0 
197- ; GFX10-NEXT:    s_barrier 
198- ; GFX10-NEXT:    buffer_gl0_inv 
199196; GFX10-NEXT:    s_waitcnt lgkmcnt(0) 
200197; GFX10-NEXT:    v_add_co_u32 v0, s0, s0, v0 
201- ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v1 
202- ; GFX10-NEXT:    v_mov_b32_dpp v2, v1 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
203- ; GFX10-NEXT:    v_add_nc_u32_e32 v2, v2, v1 
198+ ; GFX10-NEXT:    v_add_nc_u32_e32 v3, v1, v1 
204199; GFX10-NEXT:    v_add_co_ci_u32_e64 v1, s0, s1, 0, s0 
200+ ; GFX10-NEXT:    v_mov_b32_dpp v2, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
201+ ; GFX10-NEXT:    v_add_nc_u32_e32 v2, v2, v3 
202+ ; GFX10-NEXT:    s_barrier 
203+ ; GFX10-NEXT:    buffer_gl0_inv 
205204; GFX10-NEXT:    flat_store_dword v[0:1], v2 
206205; GFX10-NEXT:    s_endpgm 
207206; 
@@ -213,15 +212,15 @@ define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr {
213212; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) 
214213; GFX11-NEXT:    v_and_b32_e32 v0, 0xffc, v0 
215214; GFX11-NEXT:    ds_load_b32 v1, v0 
216- ; GFX11-NEXT:    s_barrier 
217- ; GFX11-NEXT:    buffer_gl0_inv 
218215; GFX11-NEXT:    s_waitcnt lgkmcnt(0) 
219216; GFX11-NEXT:    v_add_co_u32 v0, s0, s0, v0 
220- ; GFX11-NEXT:    v_add_nc_u32_e32 v1, v1, v1 
221- ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 
222- ; GFX11-NEXT:    v_mov_b32_dpp v2, v1 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
223- ; GFX11-NEXT:    v_add_nc_u32_e32 v2, v2, v1 
217+ ; GFX11-NEXT:    v_add_nc_u32_e32 v3, v1, v1 
224218; GFX11-NEXT:    v_add_co_ci_u32_e64 v1, null, s1, 0, s0 
219+ ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 
220+ ; GFX11-NEXT:    v_mov_b32_dpp v2, v3 quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 
221+ ; GFX11-NEXT:    v_add_nc_u32_e32 v2, v2, v3 
222+ ; GFX11-NEXT:    s_barrier 
223+ ; GFX11-NEXT:    buffer_gl0_inv 
225224; GFX11-NEXT:    flat_store_b32 v[0:1], v2 
226225; GFX11-NEXT:    s_endpgm 
227226bb:
0 commit comments