@@ -134,16 +134,169 @@ bb:
134134 ret void
135135}
136136
137- ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm :
137+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_0 :
138138; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
139139; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
140140; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
141141; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
142142; GCN: global_store_dwordx4
143143; GCN: global_store_dwordx4
144- define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
144+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_0 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
145145bb:
146- %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > <double 0 .0 , double 0 .0 , double 0 .0 , double 0 .0 >, i32 0 , i32 0 , i32 0 )
146+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > zeroinitializer , i32 0 , i32 0 , i32 0 )
147+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
148+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
149+ ret void
150+ }
151+
152+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_neg1:
153+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], -1{{$}}
154+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
155+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], -1{{$}}
156+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
157+ ; GCN: global_store_dwordx4
158+ ; GCN: global_store_dwordx4
159+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_neg1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
160+ bb:
161+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 -1 to double )), i32 0 , i32 0 , i32 0 )
162+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
163+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
164+ ret void
165+ }
166+
167+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_1:
168+ ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0x3ff00000
169+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
170+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
171+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
172+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
173+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
174+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
175+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
176+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
177+
178+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
179+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
180+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
181+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
182+ ; GCN: global_store_dwordx4
183+ ; GCN: global_store_dwordx4
184+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
185+ bb:
186+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double 1 .0 ), i32 0 , i32 0 , i32 0 )
187+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
188+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
189+ ret void
190+ }
191+
192+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_neg1:
193+ ; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0xbff00000
194+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], [[HIGH_BITS]]
195+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
196+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
197+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
198+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
199+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
200+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
201+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
202+
203+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
204+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
205+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
206+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
207+ ; GCN: global_store_dwordx4
208+ ; GCN: global_store_dwordx4
209+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_neg1 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
210+ bb:
211+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double -1 .0 ), i32 0 , i32 0 , i32 0 )
212+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
213+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
214+ ret void
215+ }
216+
217+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64:
218+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 64{{$}}
219+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 0
220+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
221+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
222+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
223+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
224+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
225+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
226+
227+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
228+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
229+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
230+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
231+ ; GCN: global_store_dwordx4
232+ ; GCN: global_store_dwordx4
233+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64 (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
234+ bb:
235+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 64 to double )), i32 0 , i32 0 , i32 0 )
236+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
237+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
238+ ret void
239+ }
240+
241+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bits:
242+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 0{{$}}
243+ ; GCN: v_accvgpr_write_b32 a[[A_HIGH_BITS_0:[0-9]+]], 64
244+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
245+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
246+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
247+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_HIGH_BITS_0]]
248+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
249+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_HIGH_BITS_0]]
250+
251+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
252+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
253+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
254+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
255+ ; GCN: global_store_dwordx4
256+ ; GCN: global_store_dwordx4
257+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_bits (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
258+ bb:
259+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 274877906944 to double )), i32 0 , i32 0 , i32 0 )
260+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
261+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
262+ ret void
263+ }
264+
265+ ; FIXME: This should not be foldable as an inline immediate
266+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low:
267+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 64{{$}}
268+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
269+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 64{{$}}
270+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
271+ ; GCN: global_store_dwordx4
272+ ; GCN: global_store_dwordx4
273+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_int_64_in_high_and_low (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
274+ bb:
275+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (i64 274877907008 to double )), i32 0 , i32 0 , i32 0 )
276+ %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
277+ store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
278+ ret void
279+ }
280+
281+ ; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low:
282+ ; GCN: v_accvgpr_write_b32 a[[A_LOW_BITS_0:[0-9]+]], 1.0
283+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
284+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
285+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
286+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
287+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
288+ ; GCN: v_accvgpr_mov_b32 a{{[0-9]+}}, a[[A_LOW_BITS_0]]
289+ ; GCN: v_accvgpr_mov_b32 a[[LAST_CONST_REG:[0-9]+]], a[[A_LOW_BITS_0]]
290+
291+ ; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
292+ ; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
293+ ; GFX942: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a{{\[}}[[A_LOW_BITS_0]]:[[LAST_CONST_REG]]{{\]$}}
294+ ; GFX942: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
295+ ; GCN: global_store_dwordx4
296+ ; GCN: global_store_dwordx4
297+ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm_f32_1_in_high_and_low (ptr addrspace (1 ) %arg , double %a , double %b ) #0 {
298+ bb:
299+ %mai.1 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > splat (double bitcast (<2 x float > splat (float 1 .0 ) to double )), i32 0 , i32 0 , i32 0 )
147300 %mai.2 = tail call <4 x double > @llvm.amdgcn.mfma.f64.16x16x4f64 (double %a , double %b , <4 x double > %mai.1 , i32 1 , i32 2 , i32 3 )
148301 store <4 x double > %mai.2 , ptr addrspace (1 ) %arg
149302 ret void
0 commit comments