@@ -95,66 +95,66 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 {
95
95
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[32:63], v3, v0, a[0:31]
96
96
; GREEDY908-NEXT: s_nop 15
97
97
; GREEDY908-NEXT: s_nop 1
98
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a32
99
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a61
100
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
101
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v1
102
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a33
103
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v7, a59
104
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v8, a58
105
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v1
98
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a32
99
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a33
106
100
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a34
107
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v9, a57
108
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v10, a56
101
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a2, v2
102
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a3, v6
109
103
; GREEDY908-NEXT: v_accvgpr_write_b32 a4, v1
110
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a35
111
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v11, a55
112
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v12, a54
113
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v1
114
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a36
115
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v13, a53
116
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v14, a52
117
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v1
104
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a35
105
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a36
118
106
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a37
119
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v15, a51
120
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v16, a50
107
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a5, v2
108
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a6, v6
121
109
; GREEDY908-NEXT: v_accvgpr_write_b32 a7, v1
122
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a38
123
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v17, a49
124
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v18, a48
125
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a8, v1
126
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a39
127
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v19, a47
128
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a46
129
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v1
110
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a38
111
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a39
130
112
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a40
131
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a16 , v2
132
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v19
113
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a8 , v2
114
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a9, v6
133
115
; GREEDY908-NEXT: v_accvgpr_write_b32 a10, v1
134
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a41
135
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v18
136
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v17
137
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v1
138
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a42
139
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v16
140
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v15
141
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v1
116
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a41
117
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a42
142
118
; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a43
143
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v14
144
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v13
119
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a11, v2
120
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a12, v6
145
121
; GREEDY908-NEXT: v_accvgpr_write_b32 a13, v1
146
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a44
147
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v12
148
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v11
149
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v1
150
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a45
151
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v10
152
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v9
153
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v1
154
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v8
155
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v7
122
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a44
123
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a45
124
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a46
125
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a14, v2
126
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a15, v6
127
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a16, v1
128
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a47
129
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a48
130
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a49
131
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a17, v2
132
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a18, v6
133
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a19, v1
134
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a50
135
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a51
136
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a52
137
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a20, v2
138
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a21, v6
139
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a22, v1
140
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a53
141
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a54
142
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a55
143
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a23, v2
144
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a24, v6
145
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a25, v1
146
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a56
147
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a57
148
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a58
149
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a26, v2
150
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a27, v6
151
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a28, v1
152
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a59
153
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v6, a60
154
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v1, a61
155
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a29, v2
156
156
; GREEDY908-NEXT: v_accvgpr_write_b32 a30, v6
157
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v5
157
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a31, v1
158
158
; GREEDY908-NEXT: s_nop 0
159
159
; GREEDY908-NEXT: v_mfma_f32_32x32x1f32 a[0:31], v3, v0, a[0:31]
160
160
; GREEDY908-NEXT: s_nop 15
@@ -667,11 +667,11 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 {
667
667
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[18:33], v0, v1, a[18:33]
668
668
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[2:17], v0, v1, a[18:33]
669
669
; GREEDY908-NEXT: s_nop 8
670
+ ; GREEDY908-NEXT: v_accvgpr_read_b32 v5, a18
670
671
; GREEDY908-NEXT: v_accvgpr_read_b32 v2, a19
671
- ; GREEDY908-NEXT: v_accvgpr_read_b32 v3, a18
672
672
; GREEDY908-NEXT: s_nop 0
673
+ ; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v5
673
674
; GREEDY908-NEXT: v_accvgpr_write_b32 a1, v2
674
- ; GREEDY908-NEXT: v_accvgpr_write_b32 a0, v3
675
675
; GREEDY908-NEXT: s_nop 0
676
676
; GREEDY908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[0:15]
677
677
; GREEDY908-NEXT: s_nop 9
0 commit comments