|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA |
3 | | -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA |
4 | | -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN |
5 | | -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN |
| 2 | +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA |
| 3 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfwma -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFWMA |
| 4 | +; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN |
| 5 | +; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs | FileCheck %s --check-prefix=ZVFBFMIN |
6 | 6 |
|
7 | 7 | define <1 x float> @vfwmaccbf16_vv_v1f32(<1 x float> %a, <1 x bfloat> %b, <1 x bfloat> %c) { |
8 | 8 | ; ZVFBFWMA-LABEL: vfwmaccbf16_vv_v1f32: |
@@ -295,3 +295,53 @@ define <32 x float> @vfwmaccbf32_vf_v32f32(<32 x float> %a, bfloat %b, <32 x bfl |
295 | 295 | %res = call <32 x float> @llvm.fma.v32f32(<32 x float> %b.ext, <32 x float> %c.ext, <32 x float> %a) |
296 | 296 | ret <32 x float> %res |
297 | 297 | } |
| 298 | + |
| 299 | +define <4 x float> @vfwmaccbf16_vf_v4f32_scalar_extend(<4 x float> %rd, bfloat %a, <4 x bfloat> %b) local_unnamed_addr #0 { |
| 300 | +; ZVFBFWMA-LABEL: vfwmaccbf16_vf_v4f32_scalar_extend: |
| 301 | +; ZVFBFWMA: # %bb.0: |
| 302 | +; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma |
| 303 | +; ZVFBFWMA-NEXT: vfwmaccbf16.vf v8, fa0, v9 |
| 304 | +; ZVFBFWMA-NEXT: ret |
| 305 | +; |
| 306 | +; ZVFBFMIN-LABEL: vfwmaccbf16_vf_v4f32_scalar_extend: |
| 307 | +; ZVFBFMIN: # %bb.0: |
| 308 | +; ZVFBFMIN-NEXT: fmv.x.w a0, fa0 |
| 309 | +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma |
| 310 | +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v10, v9 |
| 311 | +; ZVFBFMIN-NEXT: slli a0, a0, 16 |
| 312 | +; ZVFBFMIN-NEXT: fmv.w.x fa5, a0 |
| 313 | +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| 314 | +; ZVFBFMIN-NEXT: vfmacc.vf v8, fa5, v10 |
| 315 | +; ZVFBFMIN-NEXT: ret |
| 316 | + %b_ext = fpext <4 x bfloat> %b to <4 x float> |
| 317 | + %a_extend = fpext bfloat %a to float |
| 318 | + %a_insert = insertelement <4 x float> poison, float %a_extend, i64 0 |
| 319 | + %a_shuffle = shufflevector <4 x float> %a_insert, <4 x float> poison, <4 x i32> zeroinitializer |
| 320 | + %fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a_shuffle, <4 x float> %b_ext, <4 x float> %rd) |
| 321 | + ret <4 x float> %fma |
| 322 | +} |
| 323 | + |
| 324 | +; Negative test with a mix of bfloat and half fpext. |
| 325 | +define <4 x float> @mix(<4 x float> %rd, <4 x half> %a, <4 x bfloat> %b) { |
| 326 | +; ZVFBFWMA-LABEL: mix: |
| 327 | +; ZVFBFWMA: # %bb.0: |
| 328 | +; ZVFBFWMA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma |
| 329 | +; ZVFBFWMA-NEXT: vfwcvt.f.f.v v11, v9 |
| 330 | +; ZVFBFWMA-NEXT: vfwcvtbf16.f.f.v v9, v10 |
| 331 | +; ZVFBFWMA-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| 332 | +; ZVFBFWMA-NEXT: vfmacc.vv v8, v11, v9 |
| 333 | +; ZVFBFWMA-NEXT: ret |
| 334 | +; |
| 335 | +; ZVFBFMIN-LABEL: mix: |
| 336 | +; ZVFBFMIN: # %bb.0: |
| 337 | +; ZVFBFMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma |
| 338 | +; ZVFBFMIN-NEXT: vfwcvt.f.f.v v11, v9 |
| 339 | +; ZVFBFMIN-NEXT: vfwcvtbf16.f.f.v v9, v10 |
| 340 | +; ZVFBFMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma |
| 341 | +; ZVFBFMIN-NEXT: vfmacc.vv v8, v11, v9 |
| 342 | +; ZVFBFMIN-NEXT: ret |
| 343 | + %a_ext = fpext <4 x half> %a to <4 x float> |
| 344 | + %b_ext = fpext <4 x bfloat> %b to <4 x float> |
| 345 | + %fma = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %a_ext, <4 x float> %b_ext, <4 x float> %rd) |
| 346 | + ret <4 x float> %fma |
| 347 | +} |
0 commit comments