|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 |
2 | | -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-f16f32mm < %s | FileCheck %s --check-prefixes=CHECK |
| 2 | +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve-f16f32mm < %s | FileCheck %s |
3 | 3 |
|
4 | | -define <vscale x 4 x float> @_Z1tu13__SVFloat32_tu13__SVFloat16_tS0_(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { |
5 | | -; CHECK-LABEL: _Z1tu13__SVFloat32_tu13__SVFloat16_tS0_: |
| 4 | +define <vscale x 4 x float> @fmmla_f32f16(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { |
| 5 | +; CHECK-LABEL: fmmla_f32f16: |
6 | 6 | ; CHECK: // %bb.0: // %entry |
7 | | -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill |
8 | | -; CHECK-NEXT: addvl sp, sp, #-3 |
9 | | -; CHECK-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG |
10 | | -; CHECK-NEXT: .cfi_offset w29, -16 |
11 | | -; CHECK-NEXT: str z0, [sp, #2, mul vl] |
12 | 7 | ; CHECK-NEXT: fmmla z0.s, z1.h, z2.h |
13 | | -; CHECK-NEXT: str z1, [sp, #1, mul vl] |
14 | | -; CHECK-NEXT: str z2, [sp] |
15 | | -; CHECK-NEXT: addvl sp, sp, #3 |
16 | | -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload |
17 | 8 | ; CHECK-NEXT: ret |
18 | 9 | entry: |
19 | | - %acc.addr = alloca <vscale x 4 x float>, align 16 |
20 | | - %a.addr = alloca <vscale x 8 x half>, align 16 |
21 | | - %b.addr = alloca <vscale x 8 x half>, align 16 |
22 | | - store <vscale x 4 x float> %acc, ptr %acc.addr, align 16 |
23 | | - store <vscale x 8 x half> %a, ptr %a.addr, align 16 |
24 | | - store <vscale x 8 x half> %b, ptr %b.addr, align 16 |
25 | | - %0 = load <vscale x 4 x float>, ptr %acc.addr, align 16 |
26 | | - %1 = load <vscale x 8 x half>, ptr %a.addr, align 16 |
27 | | - %2 = load <vscale x 8 x half>, ptr %b.addr, align 16 |
28 | | - %3 = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2) |
29 | | - ret <vscale x 4 x float> %3 |
| 10 | + %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmmla.nxv4f32.nxv8f16(<vscale x 4 x float> %acc, <vscale x 8 x half> %a, <vscale x 8 x half> %b) |
| 11 | + ret <vscale x 4 x float> %out |
30 | 12 | } |
31 | | - |
32 | | -declare <vscale x 4 x float> @llvm.aarch64.sve.fmmla.f16f32(<vscale x 4 x float>, <vscale x 8 x half>, <vscale x 8 x half>) |
0 commit comments