Skip to content

Commit ec2a2ac

Browse files
Add multi loads and stores
1 parent f41eb54 commit ec2a2ac

File tree

6 files changed

+310
-9
lines changed

6 files changed

+310
-9
lines changed

clang/include/clang/Basic/arm_sve.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,20 +2042,20 @@ def SVWHILEHS_COUNT : SInst<"svwhilege_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNo
20422042
}
20432043

20442044
multiclass MultiVecLoad<string i> {
2045-
def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2045+
def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20462046
def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20472047
def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20482048
def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}]_x2", "2}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2049-
def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
2049+
def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20502050
def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20512051
def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20522052
def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}]_x4", "4}c", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20532053

2054-
def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2054+
def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20552055
def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20562056
def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20572057
def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2058-
def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
2058+
def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "cUcm", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20592059
def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20602060
def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20612061
def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
@@ -2067,20 +2067,20 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
20672067
}
20682068

20692069
multiclass MultiVecStore<string i> {
2070-
def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2070+
def SV # NAME # B_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20712071
def SV # NAME # H_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20722072
def SV # NAME # W_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20732073
def SV # NAME # D_X2 : MInst<"sv" # i # "[_{2}_x2]", "v}p2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2074-
def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
2074+
def SV # NAME # B_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20752075
def SV # NAME # H_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20762076
def SV # NAME # W_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20772077
def SV # NAME # D_X4 : MInst<"sv" # i # "[_{2}_x4]", "v}p4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20782078

2079-
def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2079+
def SV # NAME # B_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20802080
def SV # NAME # H_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20812081
def SV # NAME # W_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
20822082
def SV # NAME # D_VNUM_X2 : MInst<"sv" # i # "_vnum" # "[_{2}_x2]", "v}pl2", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x2">;
2083-
def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUc", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
2083+
def SV # NAME # B_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "cUcm", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20842084
def SV # NAME # H_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "sUshb", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20852085
def SV # NAME # W_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "iUif", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;
20862086
def SV # NAME # D_VNUM_X4 : MInst<"sv" # i # "_vnum" # "[_{2}_x4]", "v}pl4", "lUld", [IsStructStore, VerifyRuntimeMode], MemEltTyDefault, "aarch64_sve_" # i # "_pn_x4">;

clang/lib/CodeGen/CodeGenTypes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
108108
}
109109

110110
if (T->isMFloat8Type())
111-
return llvm::Type::getIntNTy(getLLVMContext(), 8);
111+
return llvm::Type::getInt8Ty(getLLVMContext());
112112

113113
llvm::Type *R = ConvertType(T);
114114

clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ld1.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,21 @@ svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) ATTR
309309
return SVE_ACLE_FUNC(svld1,_f64,_x2,)(pn, base);
310310
}
311311

312+
// CHECK-LABEL: @test_svld1_mf8_x2(
313+
// CHECK-NEXT: entry:
314+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
315+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
316+
//
317+
// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x2u11__SVCount_tPKu6__mfp8(
318+
// CPP-CHECK-NEXT: entry:
319+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
320+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
321+
//
322+
svmfloat8x2_t test_svld1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR
323+
{
324+
return SVE_ACLE_FUNC(svld1,_mf8,_x2,)(pn, base);
325+
}
326+
312327
// CHECK-LABEL: @test_svld1_f16_x4(
313328
// CHECK-NEXT: entry:
314329
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ld1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
@@ -354,6 +369,20 @@ svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) ATTR
354369
return SVE_ACLE_FUNC(svld1,_f64,_x4,)(pn, base);
355370
}
356371

372+
// CHECK-LABEL: @test_svld1_mf8_x4(
373+
// CHECK-NEXT: entry:
374+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
375+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
376+
//
377+
// CPP-CHECK-LABEL: @_Z17test_svld1_mf8_x4u11__SVCount_tPKu6__mfp8(
378+
// CPP-CHECK-NEXT: entry:
379+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
380+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
381+
//
382+
svmfloat8x4_t test_svld1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR
383+
{
384+
return SVE_ACLE_FUNC(svld1,_mf8,_x4,)(pn, base);
385+
}
357386

358387
// == VNUM variants ==
359388

@@ -795,6 +824,29 @@ svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_
795824
return SVE_ACLE_FUNC(svld1_vnum,_f64,_x2,)(pn, base, vnum);
796825
}
797826

827+
// CHECK-LABEL: @test_svld1_vnum_mf8_x2(
828+
// CHECK-NEXT: entry:
829+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
830+
// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
831+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
832+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
833+
// CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
834+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
835+
//
836+
// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l(
837+
// CPP-CHECK-NEXT: entry:
838+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
839+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
840+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
841+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
842+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
843+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
844+
//
845+
svmfloat8x2_t test_svld1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR
846+
{
847+
return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x2,)(pn, base, vnum);
848+
}
849+
798850
// CHECK-LABEL: @test_svld1_vnum_f16_x4(
799851
// CHECK-NEXT: entry:
800852
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -863,3 +915,26 @@ svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_
863915
{
864916
return SVE_ACLE_FUNC(svld1_vnum,_f64,_x4,)(pn, base, vnum);
865917
}
918+
919+
// CHECK-LABEL: @test_svld1_vnum_mf8_x4(
920+
// CHECK-NEXT: entry:
921+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
922+
// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
923+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
924+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
925+
// CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
926+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
927+
//
928+
// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l(
929+
// CPP-CHECK-NEXT: entry:
930+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
931+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
932+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
933+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
934+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
935+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
936+
//
937+
svmfloat8x4_t test_svld1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR
938+
{
939+
return SVE_ACLE_FUNC(svld1_vnum,_mf8,_x4,)(pn, base, vnum);
940+
}

clang/test/CodeGen/AArch64/sve2p1-intrinsics/acle_sve2p1_ldnt1.c

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,21 @@ svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) ATTR
307307
return SVE_ACLE_FUNC(svldnt1,_f64,_x2,)(pn, base);
308308
}
309309

310+
// CHECK-LABEL: @test_svldnt1_mf8_x2(
311+
// CHECK-NEXT: entry:
312+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
313+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
314+
//
315+
// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x2u11__SVCount_tPKu6__mfp8(
316+
// CPP-CHECK-NEXT: entry:
317+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
318+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
319+
//
320+
svmfloat8x2_t test_svldnt1_mf8_x2(svcount_t pn, const mfloat8_t *base) ATTR
321+
{
322+
return SVE_ACLE_FUNC(svldnt1,_mf8,_x2,)(pn, base);
323+
}
324+
310325
// CHECK-LABEL: @test_svldnt1_f16_x4(
311326
// CHECK-NEXT: entry:
312327
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv8f16(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
@@ -352,6 +367,20 @@ svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) ATTR
352367
return SVE_ACLE_FUNC(svldnt1,_f64,_x4,)(pn, base);
353368
}
354369

370+
// CHECK-LABEL: @test_svldnt1_mf8_x4(
371+
// CHECK-NEXT: entry:
372+
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
373+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
374+
//
375+
// CPP-CHECK-LABEL: @_Z19test_svldnt1_mf8_x4u11__SVCount_tPKu6__mfp8(
376+
// CPP-CHECK-NEXT: entry:
377+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[BASE:%.*]])
378+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP0]]
379+
//
380+
svmfloat8x4_t test_svldnt1_mf8_x4(svcount_t pn, const mfloat8_t *base) ATTR
381+
{
382+
return SVE_ACLE_FUNC(svldnt1,_mf8,_x4,)(pn, base);
383+
}
355384

356385
// == VNUM variants ==
357386

@@ -793,6 +822,29 @@ svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int6
793822
return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x2,)(pn, base, vnum);
794823
}
795824

825+
// CHECK-LABEL: @test_svldnt1_vnum_mf8_x2(
826+
// CHECK-NEXT: entry:
827+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
828+
// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
829+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
830+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
831+
// CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
832+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
833+
//
834+
// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x2u11__SVCount_tPKu6__mfp8l(
835+
// CPP-CHECK-NEXT: entry:
836+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
837+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
838+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
839+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
840+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x2.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
841+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
842+
//
843+
svmfloat8x2_t test_svldnt1_vnum_mf8_x2(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR
844+
{
845+
return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x2,)(pn, base, vnum);
846+
}
847+
796848
// CHECK-LABEL: @test_svldnt1_vnum_f16_x4(
797849
// CHECK-NEXT: entry:
798850
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
@@ -861,3 +913,26 @@ svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int6
861913
{
862914
return SVE_ACLE_FUNC(svldnt1_vnum,_f64,_x4,)(pn, base, vnum);
863915
}
916+
917+
// CHECK-LABEL: @test_svldnt1_vnum_mf8_x4(
918+
// CHECK-NEXT: entry:
919+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
920+
// CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
921+
// CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
922+
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
923+
// CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
924+
// CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
925+
//
926+
// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_mf8_x4u11__SVCount_tPKu6__mfp8l(
927+
// CPP-CHECK-NEXT: entry:
928+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
929+
// CPP-CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[VNUM:%.*]], 4
930+
// CPP-CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[TMP1]], [[TMP0]]
931+
// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[DOTIDX]]
932+
// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.ldnt1.pn.x4.nxv16i8(target("aarch64.svcount") [[PN:%.*]], ptr [[TMP2]])
933+
// CPP-CHECK-NEXT: ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[TMP3]]
934+
//
935+
svmfloat8x4_t test_svldnt1_vnum_mf8_x4(svcount_t pn, const mfloat8_t *base, int64_t vnum) ATTR
936+
{
937+
return SVE_ACLE_FUNC(svldnt1_vnum,_mf8,_x4,)(pn, base, vnum);
938+
}

0 commit comments

Comments
 (0)