@@ -18,13 +18,13 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
1818; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1919; GFX9-NEXT: s_mov_b64 exec, s[18:19]
2020; GFX9-NEXT: v_writelane_b32 v40, s17, 2
21- ; GFX9-NEXT: s_addk_i32 s32, 0x400
2221; GFX9-NEXT: v_writelane_b32 v40, s30, 0
22+ ; GFX9-NEXT: s_addk_i32 s32, 0x400
23+ ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2324; GFX9-NEXT: s_getpc_b64 s[18:19]
2425; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_bf16_inreg@rel32@lo+4
2526; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_bf16_inreg@rel32@hi+12
2627; GFX9-NEXT: s_mov_b32 s0, s16
27- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
2828; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
2929; GFX9-NEXT: v_readlane_b32 s30, v40, 0
3030; GFX9-NEXT: v_readlane_b32 s31, v40, 1
@@ -48,12 +48,12 @@ define void @test_call_external_void_func_bf16_inreg(bfloat inreg %arg) #0 {
4848; GFX11-NEXT: v_writelane_b32 v40, s1, 2
4949; GFX11-NEXT: v_writelane_b32 v40, s30, 0
5050; GFX11-NEXT: s_add_i32 s32, s32, 16
51+ ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
5152; GFX11-NEXT: s_getpc_b64 s[2:3]
5253; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_bf16_inreg@rel32@lo+4
5354; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_bf16_inreg@rel32@hi+12
54- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
55+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
5556; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
56- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
5757; GFX11-NEXT: v_readlane_b32 s30, v40, 0
5858; GFX11-NEXT: v_readlane_b32 s31, v40, 1
5959; GFX11-NEXT: s_mov_b32 s32, s33
@@ -78,13 +78,13 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
7878; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7979; GFX9-NEXT: s_mov_b64 exec, s[18:19]
8080; GFX9-NEXT: v_writelane_b32 v40, s17, 2
81- ; GFX9-NEXT: s_addk_i32 s32, 0x400
8281; GFX9-NEXT: v_writelane_b32 v40, s30, 0
82+ ; GFX9-NEXT: s_addk_i32 s32, 0x400
83+ ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8384; GFX9-NEXT: s_getpc_b64 s[18:19]
8485; GFX9-NEXT: s_add_u32 s18, s18, external_void_func_v2bf16_inreg@rel32@lo+4
8586; GFX9-NEXT: s_addc_u32 s19, s19, external_void_func_v2bf16_inreg@rel32@hi+12
8687; GFX9-NEXT: s_mov_b32 s0, s16
87- ; GFX9-NEXT: v_writelane_b32 v40, s31, 1
8888; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
8989; GFX9-NEXT: v_readlane_b32 s30, v40, 0
9090; GFX9-NEXT: v_readlane_b32 s31, v40, 1
@@ -108,12 +108,12 @@ define void @test_call_external_void_func_v2bf16_inreg(<2 x bfloat> inreg %arg)
108108; GFX11-NEXT: v_writelane_b32 v40, s1, 2
109109; GFX11-NEXT: v_writelane_b32 v40, s30, 0
110110; GFX11-NEXT: s_add_i32 s32, s32, 16
111+ ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
111112; GFX11-NEXT: s_getpc_b64 s[2:3]
112113; GFX11-NEXT: s_add_u32 s2, s2, external_void_func_v2bf16_inreg@rel32@lo+4
113114; GFX11-NEXT: s_addc_u32 s3, s3, external_void_func_v2bf16_inreg@rel32@hi+12
114- ; GFX11-NEXT: v_writelane_b32 v40, s31, 1
115+ ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
115116; GFX11-NEXT: s_swappc_b64 s[30:31], s[2:3]
116- ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
117117; GFX11-NEXT: v_readlane_b32 s30, v40, 0
118118; GFX11-NEXT: v_readlane_b32 s31, v40, 1
119119; GFX11-NEXT: s_mov_b32 s32, s33
0 commit comments