@@ -3701,16 +3701,73 @@ static HWY_INLINE V VsxF2INormalizeSrcVals(V v) {
37013701#endif
37023702}
37033703
3704+ template <class VF32 >
3705+ static HWY_INLINE HWY_MAYBE_UNUSED VFromD<Repartition<int64_t , DFromV<VF32>>>
3706+ VsxXvcvspsxds (VF32 vf32) {
3707+ using VI64 = VFromD<Repartition<int64_t , DFromV<VF32>>>;
3708+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1500) || \
3709+ HWY_HAS_BUILTIN (__builtin_vsx_xvcvspsxds)
3710+ // Use __builtin_vsx_xvcvspsxds if it is available (which is the case with
3711+ // GCC 4.8 through GCC 14 or Clang 13 or later on PPC8/PPC9/PPC10)
3712+ return VI64{__builtin_vsx_xvcvspsxds (vf32.raw )};
3713+ #elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_LITTLE_ENDIAN
3714+ // On little-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->I64
3715+ // vec_signedo intrinsic as the __builtin_vsx_xvcvspsxds intrinsic has been
3716+ // removed from GCC in GCC 15
3717+ return VI64{vec_signedo (vf32.raw )};
3718+ #elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_BIG_ENDIAN
3719+ // On big-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->I64
3720+ // vec_signede intrinsic as the __builtin_vsx_xvcvspsxds intrinsic has been
3721+ // removed from GCC in GCC 15
3722+ return VI64{vec_signede (vf32.raw )};
3723+ #else
3724+ // Inline assembly fallback for older versions of Clang that do not have the
3725+ // __builtin_vsx_xvcvspsxds intrinsic
3726+ __vector signed long long raw_result;
3727+ __asm__ (" xvcvspsxds %x0, %x1" : " =wa" (raw_result) : " wa" (vf32.raw ) :);
3728+ return VI64{raw_result};
3729+ #endif
3730+ }
3731+
3732+ template <class VF32 >
3733+ static HWY_INLINE HWY_MAYBE_UNUSED VFromD<Repartition<uint64_t , DFromV<VF32>>>
3734+ VsxXvcvspuxds (VF32 vf32) {
3735+ using VU64 = VFromD<Repartition<uint64_t , DFromV<VF32>>>;
3736+ #if (HWY_COMPILER_GCC_ACTUAL && HWY_COMPILER_GCC_ACTUAL < 1500) || \
3737+ HWY_HAS_BUILTIN (__builtin_vsx_xvcvspuxds)
3738+ // Use __builtin_vsx_xvcvspuxds if it is available (which is the case with
3739+ // GCC 4.8 through GCC 14 or Clang 13 or later on PPC8/PPC9/PPC10)
3740+ return VU64{reinterpret_cast <__vector unsigned long long >(
3741+ __builtin_vsx_xvcvspuxds (vf32.raw ))};
3742+ #elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_LITTLE_ENDIAN
3743+ // On little-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->U64
3744+ // vec_unsignedo intrinsic as the __builtin_vsx_xvcvspuxds intrinsic has been
3745+ // removed from GCC in GCC 15
3746+ return VU64{vec_unsignedo (vf32.raw )};
3747+ #elif HWY_COMPILER_GCC_ACTUAL >= 1500 && HWY_IS_BIG_ENDIAN
3748+ // On big-endian PPC8/PPC9/PPC10 with GCC 15 or later, use the F32->U64
3749+ // vec_unsignedo intrinsic as the __builtin_vsx_xvcvspuxds intrinsic has been
3750+ // removed from GCC in GCC 15
3751+ return VU64{vec_unsignede (vf32.raw )};
3752+ #else
3753+ // Inline assembly fallback for older versions of Clang that do not have the
3754+ // __builtin_vsx_xvcvspuxds intrinsic
3755+ __vector unsigned long long raw_result;
3756+ __asm__ (" xvcvspuxds %x0, %x1" : " =wa" (raw_result) : " wa" (vf32.raw ) :);
3757+ return VU64{raw_result};
3758+ #endif
3759+ }
3760+
37043761} // namespace detail
37053762#endif // !HWY_S390X_HAVE_Z14
37063763
37073764template <class D , HWY_IF_I64_D(D)>
37083765HWY_API VFromD<D> PromoteTo (D di64, VFromD<Rebind<float , D>> v) {
3709- #if !HWY_S390X_HAVE_Z14 && \
3710- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspsxds))
3711- const __vector float raw_v =
3712- detail::VsxF2INormalizeSrcVals ( InterleaveLower (v, v)). raw ;
3713- return VFromD< decltype (di64)>{ __builtin_vsx_xvcvspsxds (raw_v)} ;
3766+ #if !HWY_S390X_HAVE_Z14
3767+ const Repartition< float , decltype (di64)> dt_f32;
3768+ const auto vt_f32 = ResizeBitCast (dt_f32, v);
3769+ return detail::VsxXvcvspsxds (
3770+ detail::VsxF2INormalizeSrcVals ( InterleaveLower (vt_f32, vt_f32))) ;
37143771#else
37153772 const RebindToFloat<decltype (di64)> df64;
37163773 return ConvertTo (di64, PromoteTo (df64, v));
@@ -3719,12 +3776,11 @@ HWY_API VFromD<D> PromoteTo(D di64, VFromD<Rebind<float, D>> v) {
37193776
37203777template <class D , HWY_IF_U64_D(D)>
37213778HWY_API VFromD<D> PromoteTo (D du64, VFromD<Rebind<float , D>> v) {
3722- #if !HWY_S390X_HAVE_Z14 && \
3723- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspuxds))
3724- const __vector float raw_v =
3725- detail::VsxF2INormalizeSrcVals (InterleaveLower (v, v)).raw ;
3726- return VFromD<decltype (du64)>{reinterpret_cast <__vector unsigned long long >(
3727- __builtin_vsx_xvcvspuxds (raw_v))};
3779+ #if !HWY_S390X_HAVE_Z14
3780+ const Repartition<float , decltype (du64)> dt_f32;
3781+ const auto vt_f32 = ResizeBitCast (dt_f32, v);
3782+ return detail::VsxXvcvspuxds (
3783+ detail::VsxF2INormalizeSrcVals (InterleaveLower (vt_f32, vt_f32)));
37283784#else
37293785 const RebindToFloat<decltype (du64)> df64;
37303786 return ConvertTo (du64, PromoteTo (df64, v));
@@ -3829,12 +3885,10 @@ HWY_API VFromD<D> PromoteUpperTo(D df64, Vec128<uint32_t> v) {
38293885
38303886template <class D , HWY_IF_V_SIZE_D(D, 16 ), HWY_IF_I64_D(D)>
38313887HWY_API VFromD<D> PromoteUpperTo (D di64, Vec128<float > v) {
3832- #if !HWY_S390X_HAVE_Z14 && \
3833- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspsxds))
3834- const __vector float raw_v =
3835- detail::VsxF2INormalizeSrcVals (InterleaveUpper (Full128<float >(), v, v))
3836- .raw ;
3837- return VFromD<decltype (di64)>{__builtin_vsx_xvcvspsxds (raw_v)};
3888+ #if !HWY_S390X_HAVE_Z14
3889+ (void )di64;
3890+ return detail::VsxXvcvspsxds (
3891+ detail::VsxF2INormalizeSrcVals (InterleaveUpper (Full128<float >(), v, v)));
38383892#else
38393893 const RebindToFloat<decltype (di64)> df64;
38403894 return ConvertTo (di64, PromoteUpperTo (df64, v));
@@ -3843,13 +3897,10 @@ HWY_API VFromD<D> PromoteUpperTo(D di64, Vec128<float> v) {
38433897
38443898template <class D , HWY_IF_V_SIZE_D(D, 16 ), HWY_IF_U64_D(D)>
38453899HWY_API VFromD<D> PromoteUpperTo (D du64, Vec128<float > v) {
3846- #if !HWY_S390X_HAVE_Z14 && \
3847- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspuxds))
3848- const __vector float raw_v =
3849- detail::VsxF2INormalizeSrcVals (InterleaveUpper (Full128<float >(), v, v))
3850- .raw ;
3851- return VFromD<decltype (du64)>{reinterpret_cast <__vector unsigned long long >(
3852- __builtin_vsx_xvcvspuxds (raw_v))};
3900+ #if !HWY_S390X_HAVE_Z14
3901+ (void )du64;
3902+ return detail::VsxXvcvspuxds (
3903+ detail::VsxF2INormalizeSrcVals (InterleaveUpper (Full128<float >(), v, v)));
38533904#else
38543905 const RebindToFloat<decltype (du64)> df64;
38553906 return ConvertTo (du64, PromoteUpperTo (df64, v));
@@ -3937,20 +3988,18 @@ HWY_INLINE VFromD<D> PromoteEvenTo(hwy::SignedTag /*to_type_tag*/,
39373988 hwy::SizeTag<8 > /* to_lane_size_tag*/ ,
39383989 hwy::FloatTag /* from_type_tag*/ , D d_to,
39393990 V v) {
3940- #if !HWY_S390X_HAVE_Z14 && \
3941- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspsxds))
3991+ #if !HWY_S390X_HAVE_Z14
39423992 (void )d_to;
39433993 const auto normalized_v = detail::VsxF2INormalizeSrcVals (v);
39443994#if HWY_IS_LITTLE_ENDIAN
3945- // __builtin_vsx_xvcvspsxds expects the source values to be in the odd lanes
3946- // on little-endian PPC, and the vec_sld operation below will shift the even
3995+ // VsxXvcvspsxds expects the source values to be in the odd lanes on
3996+ // little-endian PPC, and the Shuffle2103 operation below will shift the even
39473997 // lanes of normalized_v into the odd lanes.
3948- return VFromD<D>{
3949- __builtin_vsx_xvcvspsxds (vec_sld (normalized_v.raw , normalized_v.raw , 4 ))};
3998+ return VsxXvcvspsxds (Shuffle2103 (normalized_v));
39503999#else
3951- // __builtin_vsx_xvcvspsxds expects the source values to be in the even lanes
3952- // on big-endian PPC.
3953- return VFromD<D>{ __builtin_vsx_xvcvspsxds (normalized_v. raw )} ;
4000+ // VsxXvcvspsxds expects the source values to be in the even lanes on
4001+ // big-endian PPC.
4002+ return VsxXvcvspsxds (normalized_v) ;
39544003#endif
39554004#else
39564005 const RebindToFloat<decltype (d_to)> df64;
@@ -3965,22 +4014,18 @@ HWY_INLINE VFromD<D> PromoteEvenTo(hwy::UnsignedTag /*to_type_tag*/,
39654014 hwy::SizeTag<8 > /* to_lane_size_tag*/ ,
39664015 hwy::FloatTag /* from_type_tag*/ , D d_to,
39674016 V v) {
3968- #if !HWY_S390X_HAVE_Z14 && \
3969- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspuxds))
4017+ #if !HWY_S390X_HAVE_Z14
39704018 (void )d_to;
39714019 const auto normalized_v = detail::VsxF2INormalizeSrcVals (v);
39724020#if HWY_IS_LITTLE_ENDIAN
3973- // __builtin_vsx_xvcvspuxds expects the source values to be in the odd lanes
3974- // on little-endian PPC, and the vec_sld operation below will shift the even
3975- // lanes of normalized_v into the odd lanes.
3976- return VFromD<D>{
3977- reinterpret_cast <__vector unsigned long long >(__builtin_vsx_xvcvspuxds (
3978- vec_sld (normalized_v.raw , normalized_v.raw , 4 )))};
4021+ // VsxXvcvspuxds expects the source values to be in the odd lanes
4022+ // on little-endian PPC, and the Shuffle2103 operation below will shift the
4023+ // even lanes of normalized_v into the odd lanes.
4024+ return VsxXvcvspuxds (Shuffle2103 (normalized_v));
39794025#else
3980- // __builtin_vsx_xvcvspuxds expects the source values to be in the even lanes
4026+ // VsxXvcvspuxds expects the source values to be in the even lanes
39814027 // on big-endian PPC.
3982- return VFromD<D>{reinterpret_cast <__vector unsigned long long >(
3983- __builtin_vsx_xvcvspuxds (normalized_v.raw ))};
4028+ return VsxXvcvspuxds (normalized_v);
39844029#endif
39854030#else
39864031 const RebindToFloat<decltype (d_to)> df64;
@@ -4022,20 +4067,18 @@ HWY_INLINE VFromD<D> PromoteOddTo(hwy::SignedTag /*to_type_tag*/,
40224067 hwy::SizeTag<8 > /* to_lane_size_tag*/ ,
40234068 hwy::FloatTag /* from_type_tag*/ , D d_to,
40244069 V v) {
4025- #if !HWY_S390X_HAVE_Z14 && \
4026- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspsxds))
4070+ #if !HWY_S390X_HAVE_Z14
40274071 (void )d_to;
40284072 const auto normalized_v = detail::VsxF2INormalizeSrcVals (v);
40294073#if HWY_IS_LITTLE_ENDIAN
4030- // __builtin_vsx_xvcvspsxds expects the source values to be in the odd lanes
4074+ // VsxXvcvspsxds expects the source values to be in the odd lanes
40314075 // on little-endian PPC
4032- return VFromD<D>{ __builtin_vsx_xvcvspsxds (normalized_v. raw )} ;
4076+ return VsxXvcvspsxds (normalized_v) ;
40334077#else
4034- // __builtin_vsx_xvcvspsxds expects the source values to be in the even lanes
4035- // on big-endian PPC, and the vec_sld operation below will shift the odd lanes
4036- // of normalized_v into the even lanes.
4037- return VFromD<D>{
4038- __builtin_vsx_xvcvspsxds (vec_sld (normalized_v.raw , normalized_v.raw , 4 ))};
4078+ // VsxXvcvspsxds expects the source values to be in the even lanes
4079+ // on big-endian PPC, and the Shuffle0321 operation below will shift the odd
4080+ // lanes of normalized_v into the even lanes.
4081+ return VsxXvcvspsxds (Shuffle0321 (normalized_v));
40394082#endif
40404083#else
40414084 const RebindToFloat<decltype (d_to)> df64;
@@ -4050,22 +4093,18 @@ HWY_INLINE VFromD<D> PromoteOddTo(hwy::UnsignedTag /*to_type_tag*/,
40504093 hwy::SizeTag<8 > /* to_lane_size_tag*/ ,
40514094 hwy::FloatTag /* from_type_tag*/ , D d_to,
40524095 V v) {
4053- #if !HWY_S390X_HAVE_Z14 && \
4054- (HWY_COMPILER_GCC_ACTUAL || HWY_HAS_BUILTIN (__builtin_vsx_xvcvspuxds))
4096+ #if !HWY_S390X_HAVE_Z14
40554097 (void )d_to;
40564098 const auto normalized_v = detail::VsxF2INormalizeSrcVals (v);
40574099#if HWY_IS_LITTLE_ENDIAN
4058- // __builtin_vsx_xvcvspuxds expects the source values to be in the odd lanes
4100+ // VsxXvcvspuxds expects the source values to be in the odd lanes
40594101 // on little-endian PPC
4060- return VFromD<D>{reinterpret_cast <__vector unsigned long long >(
4061- __builtin_vsx_xvcvspuxds (normalized_v.raw ))};
4102+ return VsxXvcvspuxds (normalized_v);
40624103#else
4063- // __builtin_vsx_xvcvspuxds expects the source values to be in the even lanes
4064- // on big-endian PPC, and the vec_sld operation below will shift the odd lanes
4065- // of normalized_v into the even lanes.
4066- return VFromD<D>{
4067- reinterpret_cast <__vector unsigned long long >(__builtin_vsx_xvcvspuxds (
4068- vec_sld (normalized_v.raw , normalized_v.raw , 4 )))};
4104+ // VsxXvcvspuxds expects the source values to be in the even lanes
4105+ // on big-endian PPC, and the Shuffle0321 operation below will shift the odd
4106+ // lanes of normalized_v into the even lanes.
4107+ return VsxXvcvspuxds (Shuffle0321 (normalized_v));
40694108#endif
40704109#else
40714110 const RebindToFloat<decltype (d_to)> df64;
0 commit comments