Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2468,4 +2468,11 @@ let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
// 8-bit floating-point convert to BFloat16/Float16 (top)
def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;

// BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point
def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, "aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>;

// Single-precision convert, narrow and interleave to 8-bit floating-point (top and bottom)
def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>;
def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, "aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>;
}
101 changes: 101 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX

// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX

// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

// REQUIRES: aarch64-registered-target

#ifdef __ARM_FEATURE_SME
#include <arm_sme.h>
#else
#include <arm_sve.h>
#endif

#ifdef SVE_OVERLOADED_FORMS
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
#endif

#ifdef __ARM_FEATURE_SME
#define STREAMING __arm_streaming
#else
#define STREAMING
#endif

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_bf16(
// CHECK-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm(
// CHECK-CXX-SAME: <vscale x 8 x bfloat> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> [[ZN_ZM_COERCE0]], <vscale x 8 x bfloat> [[ZN_ZM_COERCE1]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);
}

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtn_f8_f16(
// CHECK-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z18test_svcvtn_f8_f1613svfloat16x2_tm(
// CHECK-CXX-SAME: <vscale x 8 x half> [[ZN_ZM_COERCE0:%.*]], <vscale x 8 x half> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> [[ZN_ZM_COERCE0]], <vscale x 8 x half> [[ZN_ZM_COERCE1]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svcvtn_f8_f16(svfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtn_mf8,_f16_x2,_fpm)(zn_zm, fpm);
}

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnb_f8_f32(
// CHECK-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnb_f8_f3213svfloat32x2_tm(
// CHECK-CXX-SAME: <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svcvtnb_f8_f32(svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtnb_mf8,_f32_x2,_fpm)(zn_zm, fpm);
}

// CHECK-LABEL: define dso_local <vscale x 16 x i8> @test_svcvtnt_f8_f32(
// CHECK-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
// CHECK-CXX-LABEL: define dso_local <vscale x 16 x i8> @_Z19test_svcvtnt_f8_f32u13__SVMfloat8_t13svfloat32x2_tm(
// CHECK-CXX-SAME: <vscale x 16 x i8> [[ZD:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE0:%.*]], <vscale x 4 x float> [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
// CHECK-CXX-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> [[ZD]], <vscale x 4 x float> [[ZN_ZM_COERCE0]], <vscale x 4 x float> [[ZN_ZM_COERCE1]])
// CHECK-CXX-NEXT: ret <vscale x 16 x i8> [[TMP0]]
//
svmfloat8_t test_svcvtnt_f8_f32(svmfloat8_t zd, svfloat32x2_t zn_zm, fpm_t fpm) STREAMING {
return SVE_ACLE_FUNC(svcvtnt_mf8,_f32_x2,_fpm)(zd, zn_zm, fpm);
}
11 changes: 10 additions & 1 deletion clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// REQUIRES: aarch64-registered-target

// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -emit-llvm -o - %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -verify -emit-llvm -o - %s

#include <arm_sve.h>

Expand All @@ -21,4 +21,13 @@ void test_features(svmfloat8_t zn, fpm_t fpm) {
// expected-error@-1 {{'svcvtlt1_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
svcvtlt2_f16_mf8_fpm(zn, fpm);
// expected-error@-1 {{'svcvtlt2_f16_mf8_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}

svcvtn_mf8_bf16_x2_fpm(svcreate2(svundef_bf16(), svundef_bf16()), fpm);
// expected-error@-1 {{'svcvtn_mf8_bf16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
svcvtn_mf8_f16_x2_fpm(svcreate2(svundef_f16(), svundef_f16()), fpm);
// expected-error@-1 {{'svcvtn_mf8_f16_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
svcvtnb_mf8_f32_x2_fpm(svcreate2(svundef_f32(), svundef_f32()), fpm);
// expected-error@-1 {{'svcvtnb_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
svcvtnt_mf8_f32_x2_fpm(zn, svcreate2(svundef_f32(), svundef_f32()), fpm);
// expected-error@-1 {{'svcvtnt_mf8_f32_x2_fpm' needs target feature (sve,sve2,fp8)|(sme,sme2,fp8)}}
}
14 changes: 14 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3872,6 +3872,20 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sve_fp8_cvtlt1 : SVE2_FP8_Cvt;
def int_aarch64_sve_fp8_cvtlt2 : SVE2_FP8_Cvt;

// SVE Narrowing Conversions
class SVE2_FP8_Narrow_Cvt
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_anyvector_ty, LLVMMatchType<0>],
[IntrReadMem, IntrInaccessibleMemOnly]>;

def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt;
def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt;

def int_aarch64_sve_fp8_cvtnt
: DefaultAttrsIntrinsic<[llvm_nxv16i8_ty],
[llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrReadMem, IntrInaccessibleMemOnly]>;

class SME2_FP8_CVT_X2_Single_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_nxv16i8_ty],
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -4379,10 +4379,11 @@ defm BF1CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b10, "bf1cvtlt", nxv8bf16, int_aar
defm BF2CVTLT_ZZ : sve2_fp8_cvt_single<0b1, 0b11, "bf2cvtlt", nxv8bf16, int_aarch64_sve_fp8_cvtlt2>;

// FP8 downconvert
defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r>;
defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r>;
defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r>;
defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
defm FCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b00, "fcvtn", ZZ_h_mul_r, nxv8f16, int_aarch64_sve_fp8_cvtn>;
defm FCVTNB_Z2Z_StoB : sve2_fp8_down_cvt_single<0b01, "fcvtnb", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnb>;
defm BFCVTN_Z2Z_HtoB : sve2_fp8_down_cvt_single<0b10, "bfcvtn", ZZ_h_mul_r, nxv8bf16, int_aarch64_sve_fp8_cvtn>;

defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single_top<0b11, "fcvtnt", ZZ_s_mul_r, nxv4f32, int_aarch64_sve_fp8_cvtnt>;
} // End HasSVE2orSME2, HasFP8

let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
Expand Down
37 changes: 36 additions & 1 deletion llvm/lib/Target/AArch64/SVEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -10794,10 +10794,45 @@ class sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic,
let Inst{5} = 0b0;
let Inst{4-0} = Zd;
let Uses = [FPMR, FPCR];

let mayLoad = 1;
let mayStore = 0;
}

multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src> {
multiclass sve2_fp8_down_cvt_single<bits<2> opc, string mnemonic, RegisterOperand src,
ValueType ty, SDPatternOperator op> {
def NAME : sve2_fp8_down_cvt_single<opc, mnemonic, ZPR8, src>;

def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)),
(!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
}

class sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty>
: I<(outs ZPR8:$Zd), (ins ZPR8:$_Zd, src_ty:$Zn), mnemonic, "\t$Zd, $Zn","", []>, Sched<[]> {
bits<5> Zd;
bits<4> Zn;

let Inst{31-12} = 0b01100101000010100011;
let Inst{11-10} = opc;
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-0} = Zd;

let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR8.ElementSize;

let Uses = [FPMR, FPCR];
let mayLoad = 1;
let mayStore = 0;
}

multiclass sve2_fp8_down_cvt_single_top<bits<2> opc, string mnemonic, RegisterOperand src_ty,
ValueType ty, SDPatternOperator op> {
def NAME : sve2_fp8_down_cvt_single_top<opc, mnemonic, src_ty>;

def : Pat<(nxv16i8 (op nxv16i8:$Zd, ty:$Zn1, ty:$Zn2)),
(!cast<Instruction>(NAME) $Zd, (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, $Zn2, zsub1))>;
}

// FP8 Widening Multiply-Add Long - Indexed Group
Expand Down
49 changes: 49 additions & 0 deletions llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mattr=+sve2,+fp8 < %s | FileCheck %s
; RUN: llc -mattr=+sme2,+fp8 --force-streaming < %s | FileCheck %s

target triple = "aarch64-linux"

define <vscale x 16 x i8> @cvtn_bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2) {
; CHECK-LABEL: cvtn_bf16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: bfcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8bf16(<vscale x 8 x bfloat> %s1, <vscale x 8 x bfloat> %s2)
ret <vscale x 16 x i8> %r
}

define <vscale x 16 x i8> @cvtn_f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2) {
; CHECK-LABEL: cvtn_f16:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtn z0.b, { z0.h, z1.h }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.nxv8f16(<vscale x 8 x half> %s1, <vscale x 8 x half> %s2)
ret <vscale x 16 x i8> %r
}

define <vscale x 16 x i8> @cvtnb_f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
; CHECK-LABEL: cvtnb_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1
; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1
; CHECK-NEXT: fcvtnb z0.b, { z0.s, z1.s }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnb.nxv4f32(<vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
ret <vscale x 16 x i8> %r
}

define <vscale x 16 x i8> @cvtnt_f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2) {
; CHECK-LABEL: cvtnt_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: mov z3.d, z2.d
; CHECK-NEXT: mov z2.d, z1.d
; CHECK-NEXT: fcvtnt z0.b, { z2.s, z3.s }
; CHECK-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtnt.nxv4f32(<vscale x 16 x i8> %d, <vscale x 4 x float> %s1, <vscale x 4 x float> %s2)
ret <vscale x 16 x i8> %r
}
Loading