@@ -52,11 +52,11 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
5252; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
5353; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
5454; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
55- ; GFX8-NEXT: v_mov_b32_e32 v5, s3
5655; GFX8-NEXT: v_mov_b32_e32 v4, s2
56+ ; GFX8-NEXT: v_mov_b32_e32 v5, s3
5757; GFX8-NEXT: s_waitcnt vmcnt(0)
58- ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
5958; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
59+ ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
6060; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
6161; GFX8-NEXT: s_endpgm
6262;
@@ -77,10 +77,10 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
7777; GFX11-LABEL: update_dppi64_test:
7878; GFX11: ; %bb.0:
7979; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
80- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
8180; GFX11-NEXT: s_waitcnt lgkmcnt(0)
82- ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
81+ ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_and_b32 v0, 0x3ff, v0
8382; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
83+ ; GFX11-NEXT: v_mov_b32_e32 v2, s2
8484; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
8585; GFX11-NEXT: s_waitcnt vmcnt(0)
8686; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
@@ -106,11 +106,11 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
106106; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
107107; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
108108; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
109- ; GFX8-NEXT: v_mov_b32_e32 v5, s3
110109; GFX8-NEXT: v_mov_b32_e32 v4, s2
110+ ; GFX8-NEXT: v_mov_b32_e32 v5, s3
111111; GFX8-NEXT: s_waitcnt vmcnt(0)
112- ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
113112; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
113+ ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
114114; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
115115; GFX8-NEXT: s_endpgm
116116;
@@ -131,10 +131,10 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
131131; GFX11-LABEL: update_dppf64_test:
132132; GFX11: ; %bb.0:
133133; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
134- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
135134; GFX11-NEXT: s_waitcnt lgkmcnt(0)
136- ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
135+ ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_and_b32 v0, 0x3ff, v0
137136; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
137+ ; GFX11-NEXT: v_mov_b32_e32 v2, s2
138138; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
139139; GFX11-NEXT: s_waitcnt vmcnt(0)
140140; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
@@ -160,11 +160,11 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
160160; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
161161; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
162162; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
163- ; GFX8-NEXT: v_mov_b32_e32 v5, s3
164163; GFX8-NEXT: v_mov_b32_e32 v4, s2
164+ ; GFX8-NEXT: v_mov_b32_e32 v5, s3
165165; GFX8-NEXT: s_waitcnt vmcnt(0)
166- ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
167166; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
167+ ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
168168; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
169169; GFX8-NEXT: s_endpgm
170170;
@@ -185,10 +185,10 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
185185; GFX11-LABEL: update_dppv2i32_test:
186186; GFX11: ; %bb.0:
187187; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
188- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
189188; GFX11-NEXT: s_waitcnt lgkmcnt(0)
190- ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
189+ ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_and_b32 v0, 0x3ff, v0
191190; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
191+ ; GFX11-NEXT: v_mov_b32_e32 v2, s2
192192; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
193193; GFX11-NEXT: s_waitcnt vmcnt(0)
194194; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
@@ -214,11 +214,11 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
214214; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
215215; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
216216; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
217- ; GFX8-NEXT: v_mov_b32_e32 v5, s3
218217; GFX8-NEXT: v_mov_b32_e32 v4, s2
218+ ; GFX8-NEXT: v_mov_b32_e32 v5, s3
219219; GFX8-NEXT: s_waitcnt vmcnt(0)
220- ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
221220; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
221+ ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
222222; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
223223; GFX8-NEXT: s_endpgm
224224;
@@ -239,10 +239,10 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
239239; GFX11-LABEL: update_dppv2f32_test:
240240; GFX11: ; %bb.0:
241241; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
242- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
243242; GFX11-NEXT: s_waitcnt lgkmcnt(0)
244- ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
243+ ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_and_b32 v0, 0x3ff, v0
245244; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
245+ ; GFX11-NEXT: v_mov_b32_e32 v2, s2
246246; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
247247; GFX11-NEXT: s_waitcnt vmcnt(0)
248248; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
@@ -268,11 +268,11 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
268268; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2
269269; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
270270; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
271- ; GFX8-NEXT: v_mov_b32_e32 v5, s3
272271; GFX8-NEXT: v_mov_b32_e32 v4, s2
272+ ; GFX8-NEXT: v_mov_b32_e32 v5, s3
273273; GFX8-NEXT: s_waitcnt vmcnt(0)
274- ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
275274; GFX8-NEXT: v_mov_b32_dpp v4, v2 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
275+ ; GFX8-NEXT: v_mov_b32_dpp v5, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
276276; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
277277; GFX8-NEXT: s_endpgm
278278;
@@ -293,10 +293,10 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
293293; GFX11-LABEL: update_dpp_p0_test:
294294; GFX11: ; %bb.0:
295295; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
296- ; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
297296; GFX11-NEXT: s_waitcnt lgkmcnt(0)
298- ; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
297+ ; GFX11-NEXT: v_dual_mov_b32 v3, s3 :: v_dual_and_b32 v0, 0x3ff, v0
299298; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
299+ ; GFX11-NEXT: v_mov_b32_e32 v2, s2
300300; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
301301; GFX11-NEXT: s_waitcnt vmcnt(0)
302302; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
0 commit comments