Skip to content

Commit 1d35a33

Browse files
jthackrayvinay-deshmukh
authored andcommitted
[AArch64][llvm] Add support for new vcvt* intrinsics (llvm#163572)
Add support for these new vcvt* intrinsics: ``` int64_t vcvts_s64_f32(float32_t); uint64_t vcvts_u64_f32(float32_t); int32_t vcvtd_s32_f64(float64_t); uint32_t vcvtd_u32_f64(float64_t); int64_t vcvtns_s64_f32(float32_t); uint64_t vcvtns_u64_f32(float32_t); int32_t vcvtnd_s32_f64(float64_t); uint32_t vcvtnd_u32_f64(float64_t); int64_t vcvtms_s64_f32(float32_t); uint64_t vcvtms_u64_f32(float32_t); int32_t vcvtmd_s32_f64(float64_t); uint32_t vcvtmd_u32_f64(float64_t); int64_t vcvtps_s64_f32(float32_t); uint64_t vcvtps_u64_f32(float32_t); int32_t vcvtpd_s32_f64(float64_t); uint32_t vcvtpd_u32_f64(float64_t); int64_t vcvtas_s64_f32(float32_t); uint64_t vcvtas_u64_f32(float32_t); int32_t vcvtad_s32_f64(float64_t); uint32_t vcvtad_u32_f64(float64_t); ```
1 parent 5dcfe54 commit 1d35a33

File tree

4 files changed

+272
-22
lines changed

4 files changed

+272
-22
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,8 @@ X86 Support
572572

573573
Arm and AArch64 Support
574574
^^^^^^^^^^^^^^^^^^^^^^^
575+
- More intrinsics for the following AArch64 instructions:
576+
FCVTZ[US], FCVTN[US], FCVTM[US], FCVTP[US], FCVTA[US]
575577

576578
Android Support
577579
^^^^^^^^^^^^^^^

clang/include/clang/Basic/arm_neon.td

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,26 +1466,51 @@ def SCALAR_UCVTFD : SInst<"vcvt_f64", "(1F)(1!)", "SUl">;
14661466
////////////////////////////////////////////////////////////////////////////////
14671467
// Scalar Floating-point Converts
14681468
def SCALAR_FCVTXN : IInst<"vcvtx_f32", "(1F<)(1!)", "Sd">;
1469-
def SCALAR_FCVTNSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
1470-
def SCALAR_FCVTNUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
1471-
def SCALAR_FCVTNSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
1472-
def SCALAR_FCVTNUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
1473-
def SCALAR_FCVTMSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
1474-
def SCALAR_FCVTMUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
1475-
def SCALAR_FCVTMSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
1476-
def SCALAR_FCVTMUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
1477-
def SCALAR_FCVTASS : SInst<"vcvta_s32", "(1S)1", "Sf">;
1478-
def SCALAR_FCVTAUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
1479-
def SCALAR_FCVTASD : SInst<"vcvta_s64", "(1S)1", "Sd">;
1480-
def SCALAR_FCVTAUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
1481-
def SCALAR_FCVTPSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
1482-
def SCALAR_FCVTPUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
1483-
def SCALAR_FCVTPSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
1484-
def SCALAR_FCVTPUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
1485-
def SCALAR_FCVTZSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
1486-
def SCALAR_FCVTZUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
1487-
def SCALAR_FCVTZSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
1488-
def SCALAR_FCVTZUD : SInst<"vcvt_u64", "(1U)1", "Sd">;
1469+
1470+
def SCALAR_FCVTN_F32toSS : SInst<"vcvtn_s32", "(1S)1", "Sf">;
1471+
def SCALAR_FCVTN_F32toUS : SInst<"vcvtn_u32", "(1U)1", "Sf">;
1472+
def SCALAR_FCVTN_F64toSS : SInst<"vcvtn_s32", "(1S<)1", "Sd">;
1473+
def SCALAR_FCVTN_F64toUS : SInst<"vcvtn_u32", "(1U<)1", "Sd">;
1474+
def SCALAR_FCVTN_F32toSD : SInst<"vcvtn_s64", "(1S>)1", "Sf">;
1475+
def SCALAR_FCVTN_F32toUD : SInst<"vcvtn_u64", "(1U>)1", "Sf">;
1476+
def SCALAR_FCVTN_F64toSD : SInst<"vcvtn_s64", "(1S)1", "Sd">;
1477+
def SCALAR_FCVTN_F64toUD : SInst<"vcvtn_u64", "(1U)1", "Sd">;
1478+
1479+
def SCALAR_FCVTM_F32toSS : SInst<"vcvtm_s32", "(1S)1", "Sf">;
1480+
def SCALAR_FCVTM_F32toUS : SInst<"vcvtm_u32", "(1U)1", "Sf">;
1481+
def SCALAR_FCVTM_F64toSS : SInst<"vcvtm_s32", "(1S<)1", "Sd">;
1482+
def SCALAR_FCVTM_F64toUS : SInst<"vcvtm_u32", "(1U<)1", "Sd">;
1483+
def SCALAR_FCVTM_F32toSD : SInst<"vcvtm_s64", "(1S>)1", "Sf">;
1484+
def SCALAR_FCVTM_F32toUD : SInst<"vcvtm_u64", "(1U>)1", "Sf">;
1485+
def SCALAR_FCVTM_F64toSD : SInst<"vcvtm_s64", "(1S)1", "Sd">;
1486+
def SCALAR_FCVTM_F64toUD : SInst<"vcvtm_u64", "(1U)1", "Sd">;
1487+
1488+
def SCALAR_FCVTA_F32toSS : SInst<"vcvta_s32", "(1S)1", "Sf">;
1489+
def SCALAR_FCVTA_F32toUS : SInst<"vcvta_u32", "(1U)1", "Sf">;
1490+
def SCALAR_FCVTA_F64toSS : SInst<"vcvta_s32", "(1S<)1", "Sd">;
1491+
def SCALAR_FCVTA_F64toUS : SInst<"vcvta_u32", "(1U<)1", "Sd">;
1492+
def SCALAR_FCVTA_F32toSD : SInst<"vcvta_s64", "(1S>)1", "Sf">;
1493+
def SCALAR_FCVTA_F32toUD : SInst<"vcvta_u64", "(1U>)1", "Sf">;
1494+
def SCALAR_FCVTA_F64toSD : SInst<"vcvta_s64", "(1S)1", "Sd">;
1495+
def SCALAR_FCVTA_F64toUD : SInst<"vcvta_u64", "(1U)1", "Sd">;
1496+
1497+
def SCALAR_FCVTP_F32toSS : SInst<"vcvtp_s32", "(1S)1", "Sf">;
1498+
def SCALAR_FCVTP_F32toUS : SInst<"vcvtp_u32", "(1U)1", "Sf">;
1499+
def SCALAR_FCVTP_F64toSS : SInst<"vcvtp_s32", "(1S<)1", "Sd">;
1500+
def SCALAR_FCVTP_F64toUS : SInst<"vcvtp_u32", "(1U<)1", "Sd">;
1501+
def SCALAR_FCVTP_F32toSD : SInst<"vcvtp_s64", "(1S>)1", "Sf">;
1502+
def SCALAR_FCVTP_F32toUD : SInst<"vcvtp_u64", "(1U>)1", "Sf">;
1503+
def SCALAR_FCVTP_F64toSD : SInst<"vcvtp_s64", "(1S)1", "Sd">;
1504+
def SCALAR_FCVTP_F64toUD : SInst<"vcvtp_u64", "(1U)1", "Sd">;
1505+
1506+
def SCALAR_FCVTZ_F32toSS : SInst<"vcvt_s32", "(1S)1", "Sf">;
1507+
def SCALAR_FCVTZ_F32toUS : SInst<"vcvt_u32", "(1U)1", "Sf">;
1508+
def SCALAR_FCVTZ_F64toSS : SInst<"vcvt_s32", "(1S<)1", "Sd">;
1509+
def SCALAR_FCVTZ_F64toUS : SInst<"vcvt_u32", "(1U<)1", "Sd">;
1510+
def SCALAR_FCVTZ_F32toSD : SInst<"vcvt_s64", "(1S>)1", "Sf">;
1511+
def SCALAR_FCVTZ_F32toUD : SInst<"vcvt_u64", "(1U>)1", "Sf">;
1512+
def SCALAR_FCVTZ_F64toSD : SInst<"vcvt_s64", "(1S)1", "Sd">;
1513+
def SCALAR_FCVTZ_F64toUD : SInst<"vcvt_u64", "(1U)1", "Sd">;
14891514

14901515
////////////////////////////////////////////////////////////////////////////////
14911516
// Scalar Floating-point Reciprocal Estimate

clang/lib/CodeGen/TargetBuiltins/ARM.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ struct ARMVectorIntrinsicInfo {
590590
Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
591591
TypeModifier }
592592

593+
// clang-format off
593594
static const ARMVectorIntrinsicInfo ARMSIMDIntrinsicMap [] = {
594595
NEONMAP1(__a32_vcvt_bf16_f32, arm_neon_vcvtfp2bf, 0),
595596
NEONMAP0(splat_lane_v),
@@ -1217,35 +1218,55 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
12171218
NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
12181219
NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
12191220
NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
1221+
NEONMAP1(vcvtad_s32_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
12201222
NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1223+
NEONMAP1(vcvtad_u32_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
12211224
NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
12221225
NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
1226+
NEONMAP1(vcvtas_s64_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
12231227
NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
1228+
NEONMAP1(vcvtas_u64_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
12241229
NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
12251230
NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
12261231
NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
12271232
NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
1233+
NEONMAP1(vcvtd_s32_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
12281234
NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1235+
NEONMAP1(vcvtd_u32_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
12291236
NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
12301237
NEONMAP0(vcvth_bf16_f32),
1238+
NEONMAP1(vcvtmd_s32_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
12311239
NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1240+
NEONMAP1(vcvtmd_u32_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
12321241
NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
12331242
NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
1243+
NEONMAP1(vcvtms_s64_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
12341244
NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1245+
NEONMAP1(vcvtms_u64_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
1246+
NEONMAP1(vcvtnd_s32_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
12351247
NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1248+
NEONMAP1(vcvtnd_u32_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
12361249
NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
12371250
NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
1251+
NEONMAP1(vcvtns_s64_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
12381252
NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1253+
NEONMAP1(vcvtns_u64_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
1254+
NEONMAP1(vcvtpd_s32_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
12391255
NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1256+
NEONMAP1(vcvtpd_u32_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
12401257
NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
12411258
NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
1259+
NEONMAP1(vcvtps_s64_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
12421260
NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
1261+
NEONMAP1(vcvtps_u64_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
12431262
NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
12441263
NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
12451264
NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
12461265
NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
12471266
NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
1267+
NEONMAP1(vcvts_s64_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType),
12481268
NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
1269+
NEONMAP1(vcvts_u64_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType),
12491270
NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
12501271
NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
12511272
NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
@@ -1446,6 +1467,7 @@ static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = {
14461467
NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
14471468
NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
14481469
};
1470+
// clang-format on
14491471

14501472
// Some intrinsics are equivalent for codegen.
14511473
static const std::pair<unsigned, unsigned> NEONEquivalentIntrinsicMap[] = {

0 commit comments

Comments
 (0)