@@ -16,10 +16,12 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
1616; CHECK-NEXT: s_bitcmp1_b32 s0, 8
1717; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0
1818; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
19- ; CHECK-NEXT: s_xor_b64 s[20:21], s[2:3], -1
2019; CHECK-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v0
21- ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
2220; CHECK-NEXT: v_mov_b32_e32 v0, 0x9037ab78
21+ ; CHECK-NEXT: v_accvgpr_write_b32 a3, v1
22+ ; CHECK-NEXT: s_xor_b64 s[20:21], s[2:3], -1
23+ ; CHECK-NEXT: s_and_b64 s[2:3], exec, s[2:3]
24+ ; CHECK-NEXT: v_accvgpr_write_b32 a2, v0
2325; CHECK-NEXT: v_mov_b32_e32 v3, 0xbe927e4f
2426; CHECK-NEXT: v_mov_b32_e32 v4, 0x19f4ec90
2527; CHECK-NEXT: v_mov_b32_e32 v5, 0x3efa01a0
@@ -34,14 +36,14 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
3436; CHECK-NEXT: v_mov_b32_e32 v14, 0x8427b883
3537; CHECK-NEXT: v_mov_b32_e32 v15, 0x3fae1bb4
3638; CHECK-NEXT: s_mov_b64 s[22:23], 0
37- ; CHECK-NEXT: v_mov_b32_e32 v16 , 0x57b87036
38- ; CHECK-NEXT: v_mov_b32_e32 v17 , 0x3fb3b136
39+ ; CHECK-NEXT: v_mov_b32_e32 v0 , 0x57b87036
40+ ; CHECK-NEXT: v_mov_b32_e32 v1 , 0x3fb3b136
3941; CHECK-NEXT: s_and_b64 s[4:5], exec, s[16:17]
4042; CHECK-NEXT: v_mov_b32_e32 v18, 0x55555523
4143; CHECK-NEXT: v_mov_b32_e32 v19, 0xbfd55555
4244; CHECK-NEXT: s_and_b64 s[6:7], exec, s[18:19]
4345; CHECK-NEXT: v_mov_b32_e32 v20, 0
44- ; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
46+ ; CHECK-NEXT: ; implicit-def: $vgpr30_vgpr31
4547; CHECK-NEXT: ; implicit-def: $vgpr22_vgpr23
4648; CHECK-NEXT: s_branch .LBB0_2
4749; CHECK-NEXT: .LBB0_1: ; %Flow9
@@ -61,9 +63,12 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
6163; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
6264; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[14:15]
6365; CHECK-NEXT: flat_load_dwordx2 v[24:25], v[24:25]
64- ; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[0:1]
66+ ; CHECK-NEXT: v_accvgpr_read_b32 v27, a3
67+ ; CHECK-NEXT: v_accvgpr_read_b32 v26, a2
6568; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[2:3]
66- ; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[16:17]
69+ ; CHECK-NEXT: v_mov_b64_e32 v[16:17], v[0:1]
70+ ; CHECK-NEXT: v_accvgpr_write_b32 a0, 0
71+ ; CHECK-NEXT: v_accvgpr_write_b32 a1, 0
6772; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
6873; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[24:25]
6974; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
@@ -79,10 +84,9 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
7984; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[28:29]
8085; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[14:15]
8186; CHECK-NEXT: v_fmac_f64_e32 v[28:29], 0, v[26:27]
82- ; CHECK-NEXT: v_fmac_f64_e32 v[30:31 ], 0, v[28:29]
87+ ; CHECK-NEXT: v_fmac_f64_e32 v[16:17 ], 0, v[28:29]
8388; CHECK-NEXT: v_mov_b64_e32 v[26:27], v[18:19]
84- ; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[30:31]
85- ; CHECK-NEXT: v_mov_b64_e32 v[30:31], 0
89+ ; CHECK-NEXT: v_fmac_f64_e32 v[26:27], 0, v[16:17]
8690; CHECK-NEXT: s_branch .LBB0_6
8791; CHECK-NEXT: .LBB0_5: ; %Flow
8892; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
@@ -91,30 +95,30 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
9195; CHECK-NEXT: .LBB0_6: ; %.preheader1855.i.i.i3329
9296; CHECK-NEXT: ; Parent Loop BB0_2 Depth=1
9397; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
94- ; CHECK-NEXT: v_mov_b64_e32 v[28:29], v[30:31]
98+ ; CHECK-NEXT: v_accvgpr_read_b32 v29, a1
99+ ; CHECK-NEXT: v_accvgpr_read_b32 v28, a0
95100; CHECK-NEXT: s_mov_b64 s[24:25], -1
96101; CHECK-NEXT: s_mov_b64 s[8:9], -1
97102; CHECK-NEXT: s_mov_b64 vcc, s[2:3]
98- ; CHECK-NEXT: ; implicit-def: $vgpr30_vgpr31
103+ ; CHECK-NEXT: ; implicit-def: $agpr0_agpr1
99104; CHECK-NEXT: s_cbranch_vccz .LBB0_5
100105; CHECK-NEXT: ; %bb.7: ; %.lr.ph2070.i.i.i3291
101106; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
102- ; CHECK-NEXT: v_accvgpr_read_b32 v31, a1
103- ; CHECK-NEXT: v_accvgpr_read_b32 v30, a0
107+ ; CHECK-NEXT: v_accvgpr_write_b32 a0, v30
108+ ; CHECK-NEXT: v_accvgpr_write_b32 a1, v31
104109; CHECK-NEXT: s_mov_b64 s[8:9], s[18:19]
105110; CHECK-NEXT: s_mov_b64 vcc, s[6:7]
106111; CHECK-NEXT: s_cbranch_vccz .LBB0_5
107112; CHECK-NEXT: ; %bb.8: ; %.preheader1856.preheader.i.i.i3325
108113; CHECK-NEXT: ; in Loop: Header=BB0_6 Depth=2
114+ ; CHECK-NEXT: v_accvgpr_write_b32 a0, v26
109115; CHECK-NEXT: s_mov_b64 s[24:25], 0
110- ; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[26:27]
116+ ; CHECK-NEXT: v_accvgpr_write_b32 a1, v27
111117; CHECK-NEXT: s_mov_b64 s[8:9], 0
112118; CHECK-NEXT: s_branch .LBB0_5
113119; CHECK-NEXT: .LBB0_9: ; in Loop: Header=BB0_2 Depth=1
114- ; CHECK-NEXT: v_mov_b64_e32 v[24:25], s[10:11]
115- ; CHECK-NEXT: v_accvgpr_write_b32 a0, v24
116120; CHECK-NEXT: s_mov_b64 s[22:23], 0
117- ; CHECK-NEXT: v_accvgpr_write_b32 a1, v25
121+ ; CHECK-NEXT: v_mov_b64_e32 v[30:31], s[10:11]
118122; CHECK-NEXT: s_mov_b64 s[8:9], s[20:21]
119123; CHECK-NEXT: s_branch .LBB0_15
120124; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_2 Depth=1
@@ -128,24 +132,22 @@ define amdgpu_kernel void @vgpr_mfma_pass_av_split_crash(double %arg1, i1 %arg2,
128132; CHECK-NEXT: ; %bb.12: ; %._crit_edge2105.i.i.i2330.loopexit
129133; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
130134; CHECK-NEXT: v_cmp_nlg_f64_e64 s[8:9], 0, v[28:29]
131- ; CHECK-NEXT: v_accvgpr_write_b32 a0, v24
132135; CHECK-NEXT: v_cndmask_b32_e64 v23, v23, 0, s[16:17]
133- ; CHECK-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[8:9]
134- ; CHECK-NEXT: v_mov_b32_e32 v27, v26
135- ; CHECK-NEXT: s_and_b64 s[8:9], exec, s[16:17]
136136; CHECK-NEXT: v_cndmask_b32_e64 v22, v22, 0, s[16:17]
137- ; CHECK-NEXT: global_store_dwordx2 v20, v[26:27], s[12:13]
137+ ; CHECK-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[8:9]
138+ ; CHECK-NEXT: v_mov_b32_e32 v17, v16
139+ ; CHECK-NEXT: s_and_b64 s[8:9], exec, s[16:17]
140+ ; CHECK-NEXT: global_store_dwordx2 v20, v[16:17], s[12:13]
138141; CHECK-NEXT: s_cselect_b32 s23, s23, 0
139142; CHECK-NEXT: s_cselect_b32 s22, s22, 0
140143; CHECK-NEXT: s_mov_b64 s[8:9], -1
141144; CHECK-NEXT: s_branch .LBB0_14
142145; CHECK-NEXT: .LBB0_13: ; in Loop: Header=BB0_2 Depth=1
143- ; CHECK-NEXT: v_accvgpr_write_b32 a0, v24
144146; CHECK-NEXT: s_mov_b64 s[8:9], 0
145147; CHECK-NEXT: v_mov_b64_e32 v[22:23], 0
146148; CHECK-NEXT: .LBB0_14: ; %Flow6
147149; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
148- ; CHECK-NEXT: v_accvgpr_write_b32 a1, v25
150+ ; CHECK-NEXT: v_mov_b64_e32 v[30:31], v[24:25]
149151; CHECK-NEXT: .LBB0_15: ; %Flow6
150152; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
151153; CHECK-NEXT: s_mov_b64 s[24:25], -1
0 commit comments