Skip to content

Commit cb999c1

Browse files
committed
[RISCV][SLEEF]: Support SLEEF vector library for RISC-V target.
SLEEF math vector library support RISC-V target. Commit: shibatch/sleef#477 This patch enables the use of auto-vectorization with subsequent replacement by the corresponding SLEEF function.
1 parent 59085e9 commit cb999c1

File tree

7 files changed

+3226
-21
lines changed

7 files changed

+3226
-21
lines changed

llvm/include/llvm/Analysis/VecFuncs.def

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -892,6 +892,155 @@ TLI_DEFINE_VECFUNC("llvm.tanh.f32", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED, "_ZGV
892892
TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED, "_ZGVsMxv")
893893
TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED, "_ZGVsMxv")
894894

895+
#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV)
896+
897+
TLI_DEFINE_VECFUNC("acos", "Sleef_acosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
898+
TLI_DEFINE_VECFUNC("acosf", "Sleef_acosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
899+
900+
TLI_DEFINE_VECFUNC("acosh", "Sleef_acoshdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
901+
TLI_DEFINE_VECFUNC("acoshf", "Sleef_acoshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
902+
903+
TLI_DEFINE_VECFUNC("asin", "Sleef_asindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
904+
TLI_DEFINE_VECFUNC("asinf", "Sleef_asinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
905+
906+
TLI_DEFINE_VECFUNC("asinh", "Sleef_asinhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
907+
TLI_DEFINE_VECFUNC("asinhf", "Sleef_asinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
908+
909+
TLI_DEFINE_VECFUNC("atan", "Sleef_atandx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
910+
TLI_DEFINE_VECFUNC("atanf", "Sleef_atanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
911+
912+
TLI_DEFINE_VECFUNC("atan2", "Sleef_atan2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
913+
TLI_DEFINE_VECFUNC("atan2f", "Sleef_atan2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
914+
915+
TLI_DEFINE_VECFUNC("atanh", "Sleef_atanhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
916+
TLI_DEFINE_VECFUNC("atanhf", "Sleef_atanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
917+
918+
TLI_DEFINE_VECFUNC("cbrt", "Sleef_cbrtdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
919+
TLI_DEFINE_VECFUNC("cbrtf", "Sleef_cbrtfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
920+
921+
TLI_DEFINE_VECFUNC("copysign", "Sleef_copysigndx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
922+
TLI_DEFINE_VECFUNC("copysignf", "Sleef_copysignfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
923+
924+
TLI_DEFINE_VECFUNC("cos", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
925+
TLI_DEFINE_VECFUNC("cosf", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
926+
TLI_DEFINE_VECFUNC("llvm.cos.f64", "Sleef_cosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
927+
TLI_DEFINE_VECFUNC("llvm.cos.f32", "Sleef_cosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
928+
929+
TLI_DEFINE_VECFUNC("cosh", "Sleef_coshdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
930+
TLI_DEFINE_VECFUNC("coshf", "Sleef_coshfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
931+
932+
TLI_DEFINE_VECFUNC("cospi", "Sleef_cospidx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
933+
TLI_DEFINE_VECFUNC("cospif", "Sleef_cospifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
934+
935+
TLI_DEFINE_VECFUNC("erf", "Sleef_erfdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
936+
TLI_DEFINE_VECFUNC("erff", "Sleef_erffx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
937+
938+
TLI_DEFINE_VECFUNC("erfc", "Sleef_erfcdx_u15rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
939+
TLI_DEFINE_VECFUNC("erfcf", "Sleef_erfcfx_u15rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
940+
941+
TLI_DEFINE_VECFUNC("exp", "Sleef_expdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
942+
TLI_DEFINE_VECFUNC("llvm.exp.f64", "Sleef_expdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
943+
TLI_DEFINE_VECFUNC("expf", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
944+
TLI_DEFINE_VECFUNC("llvm.exp.f32", "Sleef_expfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
945+
946+
TLI_DEFINE_VECFUNC("exp10", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
947+
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "Sleef_exp10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
948+
TLI_DEFINE_VECFUNC("exp10f", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
949+
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "Sleef_exp10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
950+
951+
TLI_DEFINE_VECFUNC("exp2", "Sleef_exp2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
952+
TLI_DEFINE_VECFUNC("exp2f", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
953+
TLI_DEFINE_VECFUNC("llvm.exp2.f64", "Sleef_exp2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
954+
TLI_DEFINE_VECFUNC("llvm.exp2.f32", "Sleef_exp2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
955+
956+
TLI_DEFINE_VECFUNC("expm1", "Sleef_expm1dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
957+
TLI_DEFINE_VECFUNC("expm1f", "Sleef_expm1fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
958+
959+
TLI_DEFINE_VECFUNC("fdim", "Sleef_fdimdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
960+
TLI_DEFINE_VECFUNC("fdimf", "Sleef_fdimfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
961+
962+
TLI_DEFINE_VECFUNC("fma", "Sleef_fmadx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvvv")
963+
TLI_DEFINE_VECFUNC("fmaf", "Sleef_fmafx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvvv")
964+
965+
TLI_DEFINE_VECFUNC("fmax", "Sleef_fmaxdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
966+
TLI_DEFINE_VECFUNC("fmaxf", "Sleef_fmaxfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
967+
968+
TLI_DEFINE_VECFUNC("fmin", "Sleef_fmindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
969+
TLI_DEFINE_VECFUNC("fminf", "Sleef_fminfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
970+
971+
TLI_DEFINE_VECFUNC("fmod", "Sleef_fmoddx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
972+
TLI_DEFINE_VECFUNC("fmodf", "Sleef_fmodfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
973+
974+
TLI_DEFINE_VECFUNC("hypot", "Sleef_hypotdx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
975+
TLI_DEFINE_VECFUNC("hypotf", "Sleef_hypotfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
976+
977+
TLI_DEFINE_VECFUNC("ilogb", "Sleef_ilogbdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
978+
TLI_DEFINE_VECFUNC("ilogbf", "Sleef_ilogbfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
979+
980+
TLI_DEFINE_VECFUNC("ldexp", "Sleef_ldexpdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
981+
TLI_DEFINE_VECFUNC("ldexpf", "Sleef_ldexpfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
982+
983+
TLI_DEFINE_VECFUNC("lgamma", "Sleef_lgammadx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
984+
TLI_DEFINE_VECFUNC("lgammaf", "Sleef_lgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
985+
986+
TLI_DEFINE_VECFUNC("log", "Sleef_logdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVsMxv")
987+
TLI_DEFINE_VECFUNC("logf", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
988+
TLI_DEFINE_VECFUNC("llvm.log.f64", "Sleef_logdx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
989+
TLI_DEFINE_VECFUNC("llvm.log.f32", "Sleef_logfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
990+
991+
TLI_DEFINE_VECFUNC("log10", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
992+
TLI_DEFINE_VECFUNC("llvm.log10.f64", "Sleef_log10dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
993+
TLI_DEFINE_VECFUNC("log10f", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
994+
TLI_DEFINE_VECFUNC("llvm.log10.f32", "Sleef_log10fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
995+
996+
TLI_DEFINE_VECFUNC("log1p", "Sleef_log1pdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
997+
TLI_DEFINE_VECFUNC("log1pf", "Sleef_log1pfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
998+
999+
TLI_DEFINE_VECFUNC("log2", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1000+
TLI_DEFINE_VECFUNC("log2f", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1001+
TLI_DEFINE_VECFUNC("llvm.log2.f64", "Sleef_log2dx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1002+
TLI_DEFINE_VECFUNC("llvm.log2.f32", "Sleef_log2fx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1003+
1004+
TLI_DEFINE_VECFUNC("modf", "Sleef_modfdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8")
1005+
TLI_DEFINE_VECFUNC("modff", "Sleef_modffx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4")
1006+
1007+
TLI_DEFINE_VECFUNC("nextafter", "Sleef_nextafterdx_rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
1008+
TLI_DEFINE_VECFUNC("nextafterf", "Sleef_nextafterfx_rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
1009+
1010+
TLI_DEFINE_VECFUNC("pow", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
1011+
TLI_DEFINE_VECFUNC("powf", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
1012+
TLI_DEFINE_VECFUNC("llvm.pow.f64", "Sleef_powdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvv")
1013+
TLI_DEFINE_VECFUNC("llvm.pow.f32", "Sleef_powfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvv")
1014+
1015+
TLI_DEFINE_VECFUNC("sin", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1016+
TLI_DEFINE_VECFUNC("sinf", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1017+
TLI_DEFINE_VECFUNC("llvm.sin.f64", "Sleef_sindx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1018+
TLI_DEFINE_VECFUNC("llvm.sin.f32", "Sleef_sinfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1019+
1020+
TLI_DEFINE_VECFUNC("sincos", "Sleef_sincosdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
1021+
TLI_DEFINE_VECFUNC("sincosf", "Sleef_sincosfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")
1022+
1023+
TLI_DEFINE_VECFUNC("sincospi", "Sleef_sincospidx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxvl8l8")
1024+
TLI_DEFINE_VECFUNC("sincospif", "Sleef_sincospifx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxvl4l4")
1025+
1026+
TLI_DEFINE_VECFUNC("sinh", "Sleef_sinhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1027+
TLI_DEFINE_VECFUNC("sinhf", "Sleef_sinhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1028+
1029+
TLI_DEFINE_VECFUNC("sinpi", "Sleef_sinpidx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1030+
TLI_DEFINE_VECFUNC("sinpif", "Sleef_sinpifx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1031+
1032+
TLI_DEFINE_VECFUNC("sqrt", "Sleef_sqrtdx_u05rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1033+
TLI_DEFINE_VECFUNC("sqrtf", "Sleef_sqrtfx_u05rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1034+
1035+
TLI_DEFINE_VECFUNC("tan", "Sleef_tandx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1036+
TLI_DEFINE_VECFUNC("tanf", "Sleef_tanfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1037+
1038+
TLI_DEFINE_VECFUNC("tanh", "Sleef_tanhdx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1039+
TLI_DEFINE_VECFUNC("tanhf", "Sleef_tanhfx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1040+
1041+
TLI_DEFINE_VECFUNC("tgamma", "Sleef_tgammadx_u10rvvm2", SCALABLE(2), NOMASK, "_ZGVvNxv")
1042+
TLI_DEFINE_VECFUNC("tgammaf", "Sleef_tgammafx_u10rvvm2", SCALABLE(4), NOMASK, "_ZGVvNxv")
1043+
8951044
#elif defined(TLI_DEFINE_ARMPL_VECFUNCS)
8961045

8971046
TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")

llvm/include/llvm/IR/VFABIDemangler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ enum class VFParamKind {
4444
enum class VFISAKind {
4545
AdvancedSIMD, // AArch64 Advanced SIMD (NEON)
4646
SVE, // AArch64 Scalable Vector Extension
47+
RVV, // RISC-V Scalable Vector Extension
4748
SSE, // x86 SSE
4849
AVX, // x86 AVX
4950
AVX2, // x86 AVX2

llvm/lib/Analysis/TargetLibraryInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1322,6 +1322,14 @@ static const VecDesc VecFuncs_SLEEFGNUABI_VFScalable[] = {
13221322
#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
13231323
};
13241324

1325+
static const VecDesc VecFuncs_SKEEFGNUABI_VFScalableRISCV[] = {
1326+
#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
1327+
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
1328+
{SCAL, VEC, VF, MASK, VABI_PREFIX},
1329+
#include "llvm/Analysis/VecFuncs.def"
1330+
#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS_RISCV
1331+
};
1332+
13251333
static const VecDesc VecFuncs_ArmPL[] = {
13261334
#define TLI_DEFINE_ARMPL_VECFUNCS
13271335
#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK, VABI_PREFIX) \
@@ -1371,6 +1379,9 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
13711379
addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VF4);
13721380
addVectorizableFunctions(VecFuncs_SLEEFGNUABI_VFScalable);
13731381
break;
1382+
case llvm::Triple::riscv64:
1383+
addVectorizableFunctions(VecFuncs_SKEEFGNUABI_VFScalableRISCV);
1384+
break;
13741385
}
13751386
break;
13761387
}

llvm/lib/IR/VFABIDemangler.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ static ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
4242
ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
4343
.Case("n", VFISAKind::AdvancedSIMD)
4444
.Case("s", VFISAKind::SVE)
45+
.Case("v", VFISAKind::RVV)
4546
.Case("b", VFISAKind::SSE)
4647
.Case("c", VFISAKind::AVX)
4748
.Case("d", VFISAKind::AVX2)
@@ -79,9 +80,9 @@ static ParseRet tryParseVLEN(StringRef &ParseString, VFISAKind ISA,
7980
std::pair<unsigned, bool> &ParsedVF) {
8081
if (ParseString.consume_front("x")) {
8182
// SVE is the only scalable ISA currently supported.
82-
if (ISA != VFISAKind::SVE) {
83+
if (ISA != VFISAKind::SVE && ISA != VFISAKind::RVV) {
8384
LLVM_DEBUG(dbgs() << "Vector function variant declared with scalable VF "
84-
<< "but ISA is not SVE\n");
85+
<< "but ISA supported for SVE and RVV only\n");
8586
return ParseRet::Error;
8687
}
8788
// We can't determine the VF of a scalable vector by looking at the vlen
@@ -301,9 +302,8 @@ static ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
301302
// the number of elements of the given type which would fit in such a vector.
302303
static std::optional<ElementCount> getElementCountForTy(const VFISAKind ISA,
303304
const Type *Ty) {
304-
// Only AArch64 SVE is supported at present.
305-
assert(ISA == VFISAKind::SVE &&
306-
"Scalable VF decoding only implemented for SVE\n");
305+
assert((ISA == VFISAKind::SVE || ISA == VFISAKind::RVV) &&
306+
"Scalable VF decoding only implemented for SVE and RVV\n");
307307

308308
if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
309309
return ElementCount::getScalable(2);

0 commit comments

Comments
 (0)