Skip to content

Commit f41eb54

Browse files
Update SVE load and store intrinsics to have FP8 variants
1 parent ced23aa commit f41eb54

File tree

18 files changed

+816
-49
lines changed

18 files changed

+816
-49
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ include "arm_sve_sme_incl.td"
1919
// Loads
2020

2121
// Load one vector (scalar base)
22-
def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">;
22+
def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">;
2323
def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">;
2424
def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">;
2525
def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">;
@@ -33,7 +33,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
3333
}
3434

3535
// Load one vector (scalar base, VL displacement)
36-
def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">;
36+
def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ld1">;
3737
def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">;
3838
def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_ld1">;
3939
def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_ld1">;
@@ -247,10 +247,10 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = InvalidMode in {
247247
}
248248

249249
// Load one vector, unextended load, non-temporal (scalar base)
250-
def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
250+
def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
251251

252252
// Load one vector, unextended load, non-temporal (scalar base, VL displacement)
253-
def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
253+
def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfdm", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
254254

255255
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
256256
def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_ldnt1">;
@@ -265,7 +265,7 @@ let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
265265
}
266266

267267
multiclass StructLoad<string name, string proto, string i> {
268-
def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
268+
def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
269269
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
270270
def: SInst<name, proto, "b", MergeNone, i, [IsStructLoad, VerifyRuntimeMode]>;
271271
}
@@ -314,11 +314,11 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
314314
def SVLD1UDQ_VNUM : MInst<"svld1udq_vnum[_{d}]", "dPcl", "lUld", [IsLoad], MemEltTyInt64, "aarch64_sve_ld1udq">;
315315

316316
// Load one vector (vector base + scalar offset)
317-
def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">;
318-
def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">;
317+
def SVLD1Q_GATHER_U64BASE_OFFSET : MInst<"svld1q_gather[_{2}base]_offset_{d}", "dPgl", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">;
318+
def SVLD1Q_GATHER_U64BASE : MInst<"svld1q_gather[_{2}base]_{d}", "dPg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_scalar_offset">;
319319

320320
// Load one vector (scalar base + vector offset)
321-
def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdb", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">;
321+
def SVLD1Q_GATHER_U64OFFSET : MInst<"svld1q_gather_[{3}]offset[_{d}]", "dPcg", "cUcsUsiUilUlfhdbm", [IsGatherLoad, IsByteIndexed], MemEltTyDefault, "aarch64_sve_ld1q_gather_vector_offset">;
322322

323323
// Load N-element structure into N vectors (scalar base)
324324
defm SVLD2Q : StructLoad<"svld2q[_{2}]", "2Pc", "aarch64_sve_ld2q_sret">;
@@ -341,7 +341,7 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
341341
// Stores
342342

343343
// Store one vector (scalar base)
344-
def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
344+
def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
345345
def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">;
346346
def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">;
347347
def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">;
@@ -350,7 +350,7 @@ def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, Verify
350350
def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, VerifyRuntimeMode], MemEltTyInt32, "aarch64_sve_st1">;
351351

352352
// Store one vector (scalar base, VL displacement)
353-
def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
353+
def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_st1">;
354354
def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">;
355355
def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, VerifyRuntimeMode], MemEltTyInt8, "aarch64_sve_st1">;
356356
def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, VerifyRuntimeMode], MemEltTyInt16, "aarch64_sve_st1">;
@@ -435,7 +435,7 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "v
435435
} // let SVETargetGuard = "sve"
436436

437437
multiclass StructStore<string name, string proto, string i> {
438-
def : SInst<name, proto, "csilUcUsUiUlhfd", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
438+
def : SInst<name, proto, "csilUcUsUiUlhfdm", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
439439
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
440440
def: SInst<name, proto, "b", MergeNone, i, [IsStructStore, VerifyRuntimeMode]>;
441441
}
@@ -451,10 +451,10 @@ defm SVST3_VNUM : StructStore<"svst3_vnum[_{d}]", "vPpl3", "aarch64_sve_st3">;
451451
defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">;
452452

453453
// Store one vector, with no truncation, non-temporal (scalar base)
454-
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
454+
def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
455455

456456
// Store one vector, with no truncation, non-temporal (scalar base, VL displacement)
457-
def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
457+
def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfdm", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
458458

459459
let SVETargetGuard = "sve,bf16", SMETargetGuard = "sme,bf16" in {
460460
def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_stnt1">;
@@ -470,12 +470,12 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = InvalidMode in {
470470
def SVST1UDQ_VNUM : MInst<"svst1dq_vnum[_{d}]", "vPpld", "lUld", [IsStore], MemEltTyInt64, "aarch64_sve_st1dq">;
471471

472472
// Store one vector (vector base + scalar offset)
473-
def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
474-
def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
473+
def SVST1Q_SCATTER_U64BASE_OFFSET : MInst<"svst1q_scatter[_{2}base]_offset[_{d}]", "vPgld", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
474+
def SVST1Q_SCATTER_U64BASE : MInst<"svst1q_scatter[_{2}base][_{d}]", "vPgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_scalar_offset">;
475475

476476
// Store one vector (scalar base + vector offset)
477-
def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
478-
def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdb", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
477+
def SVST1Q_SCATTER_OFFSETS_U : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPpgd", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
478+
def SVST1Q_SCATTER_OFFSETS_S : MInst<"svst1q_scatter_[{3}]offset[_{d}]", "vPp#d", "cUcsUsiUilUlfhdbm", [IsScatterStore, IsByteIndexed], MemEltTyDefault, "aarch64_sve_st1q_scatter_vector_offset">;
479479

480480
// Store N vectors into N-element structure (scalar base)
481481
defm SVST2Q : StructStore<"svst2q[_{d}]", "vPc2", "aarch64_sve_st2q">;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10199,6 +10199,7 @@ llvm::Type *CodeGenFunction::getEltType(const SVETypeFlags &TypeFlags) {
1019910199
default:
1020010200
llvm_unreachable("Invalid SVETypeFlag!");
1020110201

10202+
case SVETypeFlags::EltTyMFloat8:
1020210203
case SVETypeFlags::EltTyInt8:
1020310204
return Builder.getInt8Ty();
1020410205
case SVETypeFlags::EltTyInt16:
@@ -10627,7 +10628,7 @@ Value *CodeGenFunction::EmitSVEMaskedLoad(const CallExpr *E,
1062710628
unsigned IntrinsicID,
1062810629
bool IsZExtReturn) {
1062910630
QualType LangPTy = E->getArg(1)->getType();
10630-
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10631+
llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem(
1063110632
LangPTy->castAs<PointerType>()->getPointeeType());
1063210633

1063310634
// The vector type that is returned may be different from the
@@ -10674,7 +10675,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
1067410675
SmallVectorImpl<Value *> &Ops,
1067510676
unsigned IntrinsicID) {
1067610677
QualType LangPTy = E->getArg(1)->getType();
10677-
llvm::Type *MemEltTy = CGM.getTypes().ConvertType(
10678+
llvm::Type *MemEltTy = CGM.getTypes().ConvertTypeForMem(
1067810679
LangPTy->castAs<PointerType>()->getPointeeType());
1067910680

1068010681
// The vector type that is stored may be different from the

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
107107
MT->getNumRows() * MT->getNumColumns());
108108
}
109109

110+
if (T->isMFloat8Type())
111+
return llvm::Type::getIntNTy(getLLVMContext(), 8);
112+
110113
llvm::Type *R = ConvertType(T);
111114

112115
// Check for the boolean vector case.

clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_fdot.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
4949
// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_svdot_n_f32_mf8(
5050
// CHECK-SAME: <vscale x 4 x float> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
5151
// CHECK-NEXT: [[ENTRY:.*:]]
52-
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
5352
// CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0
53+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
5454
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP0]], i64 0
5555
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
5656
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fdot.nxv4f32(<vscale x 4 x float> [[ZDA]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[DOTSPLAT]])
@@ -59,8 +59,8 @@ svfloat32_t test_svdot_f32_mf8(svfloat32_t zda, svmfloat8_t zn, svmfloat8_t zm,
5959
// CHECK-CXX-LABEL: define dso_local <vscale x 4 x float> @_Z20test_svdot_n_f32_mf8u13__SVFloat32_tu13__SVMfloat8_tu6__mfp8m(
6060
// CHECK-CXX-SAME: <vscale x 4 x float> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
6161
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
62-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
6362
// CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0
63+
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
6464
// CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP0]], i64 0
6565
// CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
6666
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fp8.fdot.nxv4f32(<vscale x 4 x float> [[ZDA]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[DOTSPLAT]])
@@ -91,8 +91,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm,
9191
// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_svdot_n_f16_mf8(
9292
// CHECK-SAME: <vscale x 8 x half> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
9393
// CHECK-NEXT: [[ENTRY:.*:]]
94-
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
9594
// CHECK-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0
95+
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
9696
// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP0]], i64 0
9797
// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
9898
// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fdot.nxv8f16(<vscale x 8 x half> [[ZDA]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[DOTSPLAT]])
@@ -101,8 +101,8 @@ svfloat16_t test_svdot_f16_mf8(svfloat16_t zda, svmfloat8_t zn, svmfloat8_t zm,
101101
// CHECK-CXX-LABEL: define dso_local <vscale x 8 x half> @_Z20test_svdot_n_f16_mf8u13__SVFloat16_tu13__SVMfloat8_tu6__mfp8m(
102102
// CHECK-CXX-SAME: <vscale x 8 x half> [[ZDA:%.*]], <vscale x 16 x i8> [[ZN:%.*]], <1 x i8> [[ZM:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0]] {
103103
// CHECK-CXX-NEXT: [[ENTRY:.*:]]
104-
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
105104
// CHECK-CXX-NEXT: [[TMP0:%.*]] = extractelement <1 x i8> [[ZM]], i64 0
105+
// CHECK-CXX-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
106106
// CHECK-CXX-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[TMP0]], i64 0
107107
// CHECK-CXX-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
108108
// CHECK-CXX-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.fp8.fdot.nxv8f16(<vscale x 8 x half> [[ZDA]], <vscale x 16 x i8> [[ZN]], <vscale x 16 x i8> [[DOTSPLAT]])

0 commit comments

Comments
 (0)