Skip to content

Commit ed401d7

Browse files
[AArch64] Add Neon FP8 conversion intrinsics
1 parent dbfe4c9 commit ed401d7

File tree

8 files changed

+232
-24
lines changed

8 files changed

+232
-24
lines changed

clang/include/clang/Basic/arm_neon.td

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2125,6 +2125,29 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "lut" in {
21252125
}
21262126
}
21272127

2128+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,bf16,neon" in {
2129+
def VBF1CVT_BF16_MF8 : VInst<"vcvt1_bf16_mf8_fpm", "(QB).V", "m">;
2130+
def VBF1CVT_LOW_BF16_MF8 : VInst<"vcvt1_low_bf16_mf8_fpm", "B.V", "Qm">;
2131+
def VBF2CVTL_BF16_MF8 : VInst<"vcvt2_bf16_mf8_fpm", "(QB).V", "m">;
2132+
def VBF2CVTL_LOW_BF16_MF8 : VInst<"vcvt2_low_bf16_mf8_fpm", "B.V", "Qm">;
2133+
def VBF1CVTL2_HIGH_BF16_MF8 : VInst<"vcvt1_high_bf16_mf8_fpm", "B.V", "Qm">;
2134+
def VBF2CVTL2_HIGH_BF16_MF8 : VInst<"vcvt2_high_bf16_mf8_fpm", "B.V", "Qm">;
2135+
}
2136+
2137+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
2138+
def VF1CVT_F16_MF8 : VInst<"vcvt1_f16_mf8_fpm", "(>QF).V", "m">;
2139+
def VF1CVT_LOW_F16_MF8 : VInst<"vcvt1_low_f16_mf8_fpm", "(>F).V", "Qm">;
2140+
def VF2CVTL_F16_MF8 : VInst<"vcvt2_f16_mf8_fpm", "(>QF).V", "m">;
2141+
def VF2CVTL_LOW_F16_MF8 : VInst<"vcvt2_low_f16_mf8_fpm", "(>F).V", "Qm">;
2142+
def VF1CVTL2_HIGH_F16_MF8 : VInst<"vcvt1_high_f16_mf8_fpm", "(>F).V", "Qm">;
2143+
def VF2CVTL2_HIGH_F16_MF8 : VInst<"vcvt2_high_f16_mf8_fpm", "(>F).V", "Qm">;
2144+
2145+
def VCVTN_LOW_F8_F32 : VInst<"vcvt_mf8_f32_fpm", ".(>>QF)(>>QF)V", "m">;
2146+
def VCVTN_HIGH_F8_F32 : VInst<"vcvt_high_mf8_f32_fpm", ".(q)(>>F)(>>F)V", "Qm">;
2147+
def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V", "m">;
2148+
def VCVTNQ_F8_F16 : VInst<"vcvtq_mf8_f16_fpm", ".(>F)(>F)V", "Qm">;
2149+
}
2150+
21282151
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
21292152
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
21302153
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
@@ -2134,4 +2157,4 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8,neon" in {
21342157
// fscale
21352158
def FSCALE_V128 : WInst<"vscale", "..(.S)", "QdQfQh">;
21362159
def FSCALE_V64 : WInst<"vscale", "(.q)(.q)(.qS)", "fh">;
2137-
}
2160+
}

clang/include/clang/Basic/arm_neon_incl.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ def OP_UNAVAILABLE : Operation {
243243
// B: change to BFloat16
244244
// P: change to polynomial category.
245245
// p: change polynomial to equivalent integer category. Otherwise nop.
246+
// V: change to fpm_t
246247
//
247248
// >: double element width (vector size unchanged).
248249
// <: half element width (vector size unchanged).
@@ -301,6 +302,7 @@ class Inst <string n, string p, string t, Operation o, list<ImmCheck> ch = []>{
301302
class SInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
302303
class IInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
303304
class WInst<string n, string p, string t, list<ImmCheck> ch = []> : Inst<n, p, t, OP_NONE, ch> {}
305+
class VInst<string n, string p, string t> : Inst<n, p, t, OP_NONE> {}
304306

305307
// The following instruction classes are implemented via operators
306308
// instead of builtins. As such these declarations are only used for

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 119 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6908,6 +6908,13 @@ Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
69086908
return Builder.CreateCall(F, Ops, name);
69096909
}
69106910

6911+
Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
6912+
SmallVectorImpl<Value *> &Ops,
6913+
Value *FPM, const char *name) {
6914+
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr), FPM);
6915+
return EmitNeonCall(F, Ops, name);
6916+
}
6917+
69116918
Value *CodeGenFunction::EmitNeonShiftVector(Value *V, llvm::Type *Ty,
69126919
bool neg) {
69136920
int SV = cast<ConstantInt>(V)->getSExtValue();
@@ -14081,7 +14088,118 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
1408114088
Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2;
1408214089
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2");
1408314090
}
14084-
14091+
case NEON::BI__builtin_neon_vcvt1_bf16_mf8_fpm:
14092+
case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm:
14093+
case NEON::BI__builtin_neon_vcvt1_high_bf16_mf8_fpm: {
14094+
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14095+
llvm::Type *Tys[2];
14096+
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14097+
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower part of
14098+
// the vector.
14099+
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14100+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14101+
/*isQuad*/ false));
14102+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14103+
} else
14104+
Tys[1] = Ops[0]->getType();
14105+
llvm::Value *FPM =
14106+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14107+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14108+
}
14109+
case NEON::BI__builtin_neon_vcvt2_bf16_mf8_fpm:
14110+
case NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm:
14111+
case NEON::BI__builtin_neon_vcvt2_high_bf16_mf8_fpm: {
14112+
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14113+
llvm::Type *Tys[2];
14114+
Tys[0] = llvm::FixedVectorType::get(BFloatTy, 8);
14115+
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14116+
// part of the vector.
14117+
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14118+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14119+
/*isQuad*/ false));
14120+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14121+
} else
14122+
Tys[1] = Ops[0]->getType();
14123+
llvm::Value *FPM =
14124+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14125+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14126+
}
14127+
case NEON::BI__builtin_neon_vcvt1_f16_mf8_fpm:
14128+
case NEON::BI__builtin_neon_vcvt1_low_f16_mf8_fpm:
14129+
case NEON::BI__builtin_neon_vcvt1_high_f16_mf8_fpm: {
14130+
Int = Intrinsic::aarch64_neon_fp8_cvtl1;
14131+
llvm::Type *Tys[2];
14132+
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14133+
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14134+
// part of the vector.
14135+
if (BuiltinID == NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm) {
14136+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14137+
/*isQuad*/ false));
14138+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14139+
} else
14140+
Tys[1] = Ops[0]->getType();
14141+
llvm::Value *FPM =
14142+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14143+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt1");
14144+
}
14145+
case NEON::BI__builtin_neon_vcvt2_f16_mf8_fpm:
14146+
case NEON::BI__builtin_neon_vcvt2_low_f16_mf8_fpm:
14147+
case NEON::BI__builtin_neon_vcvt2_high_f16_mf8_fpm: {
14148+
Int = Intrinsic::aarch64_neon_fp8_cvtl2;
14149+
llvm::Type *Tys[2];
14150+
Tys[0] = llvm::FixedVectorType::get(HalfTy, 8);
14151+
// Op[1] is mfloat8x16_t, but the intrinsic converts only the lower
14152+
// part of the vector.
14153+
if (BuiltinID == NEON::BI__builtin_neon_vcvt2_low_bf16_mf8_fpm) {
14154+
Tys[1] = GetNeonType(this, NeonTypeFlags(Type.getEltType(), false,
14155+
/*isQuad*/ false));
14156+
Ops[0] = Builder.CreateExtractVector(Tys[1], Ops[0], Builder.getInt64(0));
14157+
} else
14158+
Tys[1] = Ops[0]->getType();
14159+
llvm::Value *FPM =
14160+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14161+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vbfcvt2");
14162+
}
14163+
case NEON::BI__builtin_neon_vcvt_mf8_f32_fpm: {
14164+
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14165+
llvm::Type *Tys[2];
14166+
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14167+
Tys[1] = Ops[0]->getType();
14168+
llvm::Value *FPM =
14169+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14170+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14171+
}
14172+
case NEON::BI__builtin_neon_vcvt_mf8_f16_fpm: {
14173+
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14174+
llvm::Type *Tys[2];
14175+
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 8);
14176+
// Gets the expected type, because arm_neon.h casts float16x4_t to int8x8_t
14177+
Tys[1] = llvm::FixedVectorType::get(HalfTy, 4);
14178+
llvm::Value *FPM =
14179+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14180+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14181+
}
14182+
case NEON::BI__builtin_neon_vcvtq_mf8_f16_fpm: {
14183+
Int = Intrinsic::aarch64_neon_fp8_fcvtn;
14184+
llvm::Type *Tys[2];
14185+
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14186+
// Gets the expected type, because arm_neon.h casts float16x8_t to int8x16_t
14187+
Tys[1] = llvm::FixedVectorType::get(HalfTy, 8);
14188+
llvm::Value *FPM =
14189+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14190+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn");
14191+
}
14192+
case NEON::BI__builtin_neon_vcvt_high_mf8_f32_fpm: {
14193+
Int = Intrinsic::aarch64_neon_fp8_fcvtn2;
14194+
llvm::Type *Tys[2];
14195+
Tys[0] = llvm::FixedVectorType::get(Int8Ty, 16);
14196+
Tys[1] = Ops[1]->getType();
14197+
Ops[0] = Builder.CreateInsertVector(Tys[0], PoisonValue::get(Tys[0]),
14198+
Ops[0], Builder.getInt64(0));
14199+
llvm::Value *FPM =
14200+
EmitScalarOrConstFoldImmArg(ICEArguments, E->getNumArgs() - 1, E);
14201+
return EmitFP8NeonCall(CGM.getIntrinsic(Int, Tys), Ops, FPM, "vfcvtn2");
14202+
}
1408514203
case NEON::BI__builtin_neon_vamin_f16:
1408614204
case NEON::BI__builtin_neon_vaminq_f16:
1408714205
case NEON::BI__builtin_neon_vamin_f32:

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4663,6 +4663,9 @@ class CodeGenFunction : public CodeGenTypeCache {
46634663
SmallVectorImpl<llvm::Value*> &O,
46644664
const char *name,
46654665
unsigned shift = 0, bool rightshift = false);
4666+
llvm::Value *EmitFP8NeonCall(llvm::Function *F,
4667+
SmallVectorImpl<llvm::Value *> &O,
4668+
llvm::Value *FPM, const char *name);
46664669
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx,
46674670
const llvm::ElementCount &Count);
46684671
llvm::Value *EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx);

clang/utils/TableGen/NeonEmitter.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ enum ClassKind {
7474
ClassI, // generic integer instruction, e.g., "i8" suffix
7575
ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
7676
ClassW, // width-specific instruction, e.g., "8" suffix
77+
ClassV, // void-suffix instruction, no suffix
7778
ClassB, // bitcast arguments with enum argument to specify type
7879
ClassL, // Logical instructions which are op instructions
7980
// but we need to not emit any suffix for in our
@@ -144,7 +145,7 @@ class Type {
144145
private:
145146
TypeSpec TS;
146147

147-
enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8 };
148+
enum TypeKind { Void, Float, SInt, UInt, Poly, BFloat16, MFloat8, FPM };
148149
TypeKind Kind;
149150
bool Immediate, Constant, Pointer;
150151
// ScalarForMangling and NoManglingQ are really not suited to live here as
@@ -198,6 +199,7 @@ class Type {
198199
bool isVoid() const { return Kind == Void; }
199200
bool isBFloat16() const { return Kind == BFloat16; }
200201
bool isMFloat8() const { return Kind == MFloat8; }
202+
bool isFPM() const { return Kind == FPM; }
201203
unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
202204
unsigned getSizeInBits() const { return Bitwidth; }
203205
unsigned getElementSizeInBits() const { return ElementBitwidth; }
@@ -600,6 +602,7 @@ class NeonEmitter {
600602
const Record *SI = R.getClass("SInst");
601603
const Record *II = R.getClass("IInst");
602604
const Record *WI = R.getClass("WInst");
605+
const Record *VI = R.getClass("VInst");
603606
const Record *SOpI = R.getClass("SOpInst");
604607
const Record *IOpI = R.getClass("IOpInst");
605608
const Record *WOpI = R.getClass("WOpInst");
@@ -609,6 +612,7 @@ class NeonEmitter {
609612
ClassMap[SI] = ClassS;
610613
ClassMap[II] = ClassI;
611614
ClassMap[WI] = ClassW;
615+
ClassMap[VI] = ClassV;
612616
ClassMap[SOpI] = ClassS;
613617
ClassMap[IOpI] = ClassI;
614618
ClassMap[WOpI] = ClassW;
@@ -641,6 +645,9 @@ class NeonEmitter {
641645
std::string Type::str() const {
642646
if (isVoid())
643647
return "void";
648+
if (isFPM())
649+
return "fpm_t";
650+
644651
std::string S;
645652

646653
if (isInteger() && !isSigned())
@@ -699,6 +706,8 @@ std::string Type::builtin_str() const {
699706
} else if (isMFloat8()) {
700707
assert(ElementBitwidth == 8 && "MFloat8 can only be 8 bits");
701708
S += "m";
709+
} else if (isFPM()) {
710+
S += "UWi";
702711
} else
703712
switch (ElementBitwidth) {
704713
case 16: S += "h"; break;
@@ -888,6 +897,7 @@ void Type::applyTypespec(bool &Quad) {
888897
case 'm':
889898
Kind = MFloat8;
890899
ElementBitwidth = 8;
900+
NoManglingQ = true;
891901
break;
892902
default:
893903
llvm_unreachable("Unhandled type code!");
@@ -925,6 +935,13 @@ void Type::applyModifiers(StringRef Mods) {
925935
case 'P':
926936
Kind = Poly;
927937
break;
938+
case 'V':
939+
Kind = FPM;
940+
Bitwidth = ElementBitwidth = 64;
941+
NumVectors = 0;
942+
Immediate = Constant = Pointer = false;
943+
ScalarForMangling = NoManglingQ = true;
944+
break;
928945
case '>':
929946
assert(ElementBitwidth < 128);
930947
ElementBitwidth *= 2;
@@ -1000,6 +1017,9 @@ std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
10001017
if (CK == ClassB && TargetGuard == "neon")
10011018
return "";
10021019

1020+
if (this->CK == ClassV)
1021+
return "";
1022+
10031023
if (T.isBFloat16())
10041024
return "bf16";
10051025

@@ -1349,7 +1369,7 @@ void Intrinsic::emitBodyAsBuiltinCall() {
13491369
if (!protoHasScalar())
13501370
LocalCK = ClassB;
13511371

1352-
if (!getReturnType().isVoid() && !SRet)
1372+
if (!getReturnType().isVoid() && !SRet && !getReturnType().isMFloat8())
13531373
S += "(" + RetVar.getType().str() + ") ";
13541374

13551375
S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,6 +1004,28 @@ def int_aarch64_st64b: Intrinsic<[], !listconcat([llvm_ptr_ty], data512)>;
10041004
def int_aarch64_st64bv: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
10051005
def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], data512)>;
10061006

1007+
//
1008+
// Neon FP8 intrinsics
1009+
//
1010+
1011+
// Conversions
1012+
class AdvSIMD_FP8_1VectorArg_Long_Intrinsic
1013+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>;
1014+
1015+
def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
1016+
def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic;
1017+
1018+
def int_aarch64_neon_fp8_fcvtn
1019+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1020+
[llvm_anyvector_ty,
1021+
LLVMMatchType<1>],
1022+
[IntrReadMem, IntrInaccessibleMemOnly]>;
1023+
def int_aarch64_neon_fp8_fcvtn2
1024+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
1025+
[LLVMMatchType<0>,
1026+
llvm_anyvector_ty,
1027+
LLVMMatchType<1>],
1028+
[IntrReadMem, IntrInaccessibleMemOnly]>;
10071029
}
10081030

10091031
def llvm_nxv1i1_ty : LLVMType<nxv1i1>;

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6551,17 +6551,30 @@ class BaseSIMDThreeVectors<bit Q, bit U, bits<2> size, bits<4> op,
65516551

65526552

65536553
// FCVTN (FP16 to FP8)
6554-
multiclass SIMDThreeSameSizeVectorCvt<string asm> {
6555-
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
6556-
def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
6554+
multiclass SIMD_FP8_CVTN_F16<string asm, SDPatternOperator Op> {
6555+
let Uses = [FPMR, FPCR], mayLoad = 1 in {
6556+
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b01, 0b1110, V64, V64, asm, ".8b",".4h">;
6557+
def v16f8 : BaseSIMDThreeVectors<0b1, 0b0, 0b01, 0b1110, V128, V128, asm, ".16b", ".8h">;
6558+
}
6559+
def : Pat<(v8i8 (Op (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
6560+
(!cast<Instruction>(NAME # v8f8) V64:$Rn, V64:$Rm)>;
6561+
def : Pat<(v16i8 (Op (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
6562+
(!cast<Instruction>(NAME # v16f8) V128:$Rn, V128:$Rm)>;
65576563
}
65586564

6559-
// TODO : Create v16f8 value type
65606565
// FCVTN, FCVTN2 (FP32 to FP8)
6561-
multiclass SIMDThreeVectorCvt<string asm> {
6562-
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
6563-
def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s",
6564-
V128, v16i8, v4f32, null_frag>;
6566+
multiclass SIMD_FP8_CVTN_F32<string asm, SDPatternOperator Op> {
6567+
let Uses = [FPMR, FPCR], mayLoad = 1 in {
6568+
def v8f8 : BaseSIMDThreeVectors<0b0, 0b0, 0b00, 0b1110, V64, V128, asm, ".8b", ".4s">;
6569+
def 2v16f8 : BaseSIMDThreeSameVectorDot<0b1, 0b0, 0b00, 0b1110, asm#2, ".16b", ".4s",
6570+
V128, v16i8, v4f32, null_frag>;
6571+
}
6572+
6573+
def : Pat<(v8i8 (Op (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
6574+
(!cast<Instruction>(NAME # v8f8) V128:$Rn, V128:$Rm)>;
6575+
6576+
def : Pat<(v16i8 (!cast<SDPatternOperator>(Op # 2) (v16i8 V128:$_Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm))),
6577+
(!cast<Instruction>(NAME # 2v16f8) V128:$_Rd, V128:$Rn, V128:$Rm)>;
65656578
}
65666579

65676580
// TODO: Create a new Value Type v8f8 and v16f8
@@ -7025,11 +7038,18 @@ multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
70257038
//----------------------------------------------------------------------------
70267039
// FP8 Advanced SIMD two-register miscellaneous
70277040
//----------------------------------------------------------------------------
7028-
multiclass SIMDMixedTwoVectorFP8<bits<2>sz, string asm> {
7029-
def v8f16 : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
7030-
asm, ".8h", ".8b", []>;
7031-
def 2v8f16 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
7032-
asm#2, ".8h", ".16b", []>;
7041+
multiclass SIMD_FP8_CVTL<bits<2>sz, string asm, ValueType dty, SDPatternOperator Op> {
7042+
let Uses=[FPMR, FPCR], mayLoad = 1 in {
7043+
def NAME : BaseSIMDMixedTwoVector<0b0, 0b1, sz, 0b10111, V64, V128,
7044+
asm, ".8h", ".8b", []>;
7045+
def NAME#2 : BaseSIMDMixedTwoVector<0b1, 0b1, sz, 0b10111, V128, V128,
7046+
asm#2, ".8h", ".16b", []>;
7047+
}
7048+
def : Pat<(dty (Op (v8i8 V64:$Rn))),
7049+
(!cast<Instruction>(NAME) V64:$Rn)>;
7050+
7051+
def : Pat<(dty (Op (v16i8 V128:$Rn))),
7052+
(!cast<Instruction>(NAME#2) V128:$Rn)>;
70337053
}
70347054

70357055
class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<2> size2,

0 commit comments

Comments
 (0)