@@ -1341,24 +1341,48 @@ double findMaximum (const double* src, Size num) noexcept
13411341template <typename Size>
13421342void convertFixedToFloat (float * dest, const int * src, float multiplier, Size num) noexcept
13431343{
1344- #if YUP_USE_ARM_NEON
1344+ #if YUP_USE_VDSP_FRAMEWORK
1345+ vDSP_vflt32 (reinterpret_cast <const int *> (src), 1 , dest, 1 , (vDSP_Length) num);
1346+ vDSP_vsmul (dest, 1 , &multiplier, dest, 1 , (vDSP_Length) num);
1347+ #elif YUP_USE_ARM_NEON
13451348 YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float ) src[i] * multiplier,
13461349 vmulq_n_f32 (vcvtq_f32_s32 (vld1q_s32 (src)), multiplier),
13471350 YUP_LOAD_NONE,
13481351 YUP_INCREMENT_SRC_DEST, )
1349- #else
1352+ #elif YUP_USE_SSE_INTRINSICS
13501353 YUP_PERFORM_VEC_OP_SRC_DEST (dest[i] = (float ) src[i] * multiplier,
13511354 Mode::mul (mult, _mm_cvtepi32_ps (_mm_loadu_si128 (reinterpret_cast <const __m128i*> (src)))),
13521355 YUP_LOAD_NONE,
13531356 YUP_INCREMENT_SRC_DEST,
13541357 const Mode::ParallelType mult = Mode::load1 (multiplier);)
1358+ #else
1359+ for (Size i = 0 ; i < num; ++i)
1360+ dest[i] = (float ) src[i] * multiplier;
13551361#endif
13561362}
13571363
13581364template <typename Size>
13591365void convertFloatToFixed (int * dest, const float * src, float multiplier, Size num) noexcept
13601366{
1361- #if YUP_USE_ARM_NEON
1367+ #if YUP_USE_VDSP_FRAMEWORK
1368+ constexpr Size kStackBufferSize = 256 ;
1369+ float stackBuffer[kStackBufferSize ];
1370+
1371+ if (num <= kStackBufferSize )
1372+ {
1373+ vDSP_vsmul (src, 1 , &multiplier, stackBuffer, 1 , (vDSP_Length) num);
1374+ vDSP_vfix32 (stackBuffer, 1 , reinterpret_cast <int *> (dest), 1 , (vDSP_Length) num);
1375+ }
1376+ else
1377+ {
1378+ for (Size i = 0 ; i < num; i += kStackBufferSize )
1379+ {
1380+ const Size currentChunk = jmin (kStackBufferSize , num - i);
1381+ vDSP_vsmul (src + i, 1 , &multiplier, stackBuffer, 1 , (vDSP_Length) currentChunk);
1382+ vDSP_vfix32 (stackBuffer, 1 , reinterpret_cast <int *> (dest + i), 1 , (vDSP_Length) currentChunk);
1383+ }
1384+ }
1385+ #elif YUP_USE_ARM_NEON
13621386 const auto numLongs = num & ~3 ;
13631387
13641388 if (numLongs != 0 )
@@ -1374,7 +1398,6 @@ void convertFloatToFixed (int* dest, const float* src, float multiplier, Size nu
13741398
13751399 for (Size i = numLongs; i < num; ++i)
13761400 dest[i] = (int ) (src[i] * multiplier);
1377-
13781401#elif YUP_USE_SSE_INTRINSICS
13791402 const auto numLongs = num & ~3 ;
13801403 const __m128 mult = _mm_set1_ps (multiplier);
@@ -1392,7 +1415,115 @@ void convertFloatToFixed (int* dest, const float* src, float multiplier, Size nu
13921415
13931416 for (Size i = numLongs; i < num; ++i)
13941417 dest[i] = (int ) (src[i] * multiplier);
1418+ #else
1419+ for (Size i = 0 ; i < num; ++i)
1420+ dest[i] = (int ) (src[i] * multiplier);
1421+ #endif
1422+ }
1423+
1424+ template <typename Size>
1425+ void convertFixedToFloat (double * dest, const int * src, double multiplier, Size num) noexcept
1426+ {
1427+ #if YUP_USE_VDSP_FRAMEWORK
1428+ vDSP_vflt32D (reinterpret_cast <const int *> (src), 1 , dest, 1 , (vDSP_Length) num);
1429+ vDSP_vsmulD (dest, 1 , &multiplier, dest, 1 , (vDSP_Length) num);
1430+ #elif YUP_USE_ARM_NEON
1431+ const auto numLongs = num & ~1 ;
1432+
1433+ if (numLongs != 0 )
1434+ {
1435+ for (Size i = 0 ; i < numLongs; i += 2 )
1436+ {
1437+ int32x2_t intVec = vld1_s32 (src + i);
1438+ int val0 = vget_lane_s32 (intVec, 0 );
1439+ int val1 = vget_lane_s32 (intVec, 1 );
1440+ dest[i] = (double ) val0 * multiplier;
1441+ dest[i + 1 ] = (double ) val1 * multiplier;
1442+ }
1443+ }
1444+
1445+ for (Size i = numLongs; i < num; ++i)
1446+ dest[i] = (double ) src[i] * multiplier;
1447+ #elif YUP_USE_SSE_INTRINSICS
1448+ const auto numLongs = num & ~1 ;
1449+
1450+ if (numLongs != 0 )
1451+ {
1452+ for (Size i = 0 ; i < numLongs; i += 2 )
1453+ {
1454+ __m128i intVec = _mm_loadl_epi64 (reinterpret_cast <const __m128i*> (src + i));
1455+ int val0 = _mm_extract_epi32 (intVec, 0 );
1456+ int val1 = _mm_extract_epi32 (intVec, 1 );
1457+ dest[i] = (double ) val0 * multiplier;
1458+ dest[i + 1 ] = (double ) val1 * multiplier;
1459+ }
1460+ }
1461+
1462+ for (Size i = numLongs; i < num; ++i)
1463+ dest[i] = (double ) src[i] * multiplier;
1464+ #else
1465+ for (Size i = 0 ; i < num; ++i)
1466+ dest[i] = (double ) src[i] * multiplier;
1467+
1468+ #endif
1469+ }
1470+
1471+ template <typename Size>
1472+ void convertFloatToFixed (int * dest, const double * src, double multiplier, Size num) noexcept
1473+ {
1474+ #if YUP_USE_VDSP_FRAMEWORK
1475+ constexpr Size kStackBufferSize = 256 ;
1476+ double stackBuffer[kStackBufferSize ];
1477+
1478+ if (num <= kStackBufferSize )
1479+ {
1480+ vDSP_vsmulD (src, 1 , &multiplier, stackBuffer, 1 , (vDSP_Length) num);
1481+ vDSP_vfix32D (stackBuffer, 1 , reinterpret_cast <int *> (dest), 1 , (vDSP_Length) num);
1482+ }
1483+ else
1484+ {
1485+ for (Size i = 0 ; i < num; i += kStackBufferSize )
1486+ {
1487+ const Size currentChunk = jmin (kStackBufferSize , num - i);
1488+ vDSP_vsmulD (src + i, 1 , &multiplier, stackBuffer, 1 , (vDSP_Length) currentChunk);
1489+ vDSP_vfix32D (stackBuffer, 1 , reinterpret_cast <int *> (dest + i), 1 , (vDSP_Length) currentChunk);
1490+ }
1491+ }
1492+ #elif YUP_USE_ARM_NEON
1493+ const auto numLongs = num & ~1 ;
1494+
1495+ if (numLongs != 0 )
1496+ {
1497+ for (Size i = 0 ; i < numLongs; i += 2 )
1498+ {
1499+ float64x2_t doubleVec = vld1q_f64 (src + i);
1500+ float64x2_t scaledVec = vmulq_n_f64 (doubleVec, multiplier);
1501+ double d0 = vgetq_lane_f64 (scaledVec, 0 );
1502+ double d1 = vgetq_lane_f64 (scaledVec, 1 );
1503+ dest[i] = (int ) d0;
1504+ dest[i + 1 ] = (int ) d1;
1505+ }
1506+ }
1507+
1508+ for (Size i = numLongs; i < num; ++i)
1509+ dest[i] = (int ) (src[i] * multiplier);
1510+ #elif YUP_USE_SSE_INTRINSICS
1511+ const auto numLongs = num & ~1 ;
1512+ const __m128d mult = _mm_set1_pd (multiplier);
1513+
1514+ if (numLongs != 0 )
1515+ {
1516+ for (Size i = 0 ; i < numLongs; i += 2 )
1517+ {
1518+ __m128d doubleVec = _mm_loadu_pd (src + i);
1519+ __m128d scaledVec = _mm_mul_pd (doubleVec, mult);
1520+ __m128i intVec = _mm_cvtpd_epi32 (scaledVec);
1521+ _mm_storel_epi64 (reinterpret_cast <__m128i*> (dest + i), intVec);
1522+ }
1523+ }
13951524
1525+ for (Size i = numLongs; i < num; ++i)
1526+ dest[i] = (int ) (src[i] * multiplier);
13961527#else
13971528 for (Size i = 0 ; i < num; ++i)
13981529 dest[i] = (int ) (src[i] * multiplier);
@@ -1639,30 +1770,32 @@ FloatType YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::findMaxi
16391770 return FloatVectorHelpers::findMaximum (src, numValues);
16401771}
16411772
1642- template struct FloatVectorOperationsBase <float , int >;
1643- template struct FloatVectorOperationsBase <float , size_t >;
1644- template struct FloatVectorOperationsBase <double , int >;
1645- template struct FloatVectorOperationsBase <double , size_t >;
1646-
1647- void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float * dest, const int * src, float multiplier, size_t num) noexcept
1773+ template <typename FloatType, typename CountType>
1774+ void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::convertFixedToFloat (FloatType* dest,
1775+ const int * src,
1776+ FloatType multiplier,
1777+ CountType numValues) noexcept
16481778{
1649- FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num );
1779+ FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, numValues );
16501780}
16511781
1652- void YUP_CALLTYPE FloatVectorOperations::convertFixedToFloat (float * dest, const int * src, float multiplier, int num) noexcept
1782+ template <typename FloatType, typename CountType>
1783+ void YUP_CALLTYPE FloatVectorOperationsBase<FloatType, CountType>::convertFloatToFixed (int * dest,
1784+ const FloatType* src,
1785+ FloatType multiplier,
1786+ CountType numValues) noexcept
16531787{
1654- FloatVectorHelpers::convertFixedToFloat (dest, src, multiplier, num );
1788+ FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, numValues );
16551789}
16561790
1657- void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int * dest, const float * src, float multiplier, size_t num) noexcept
1658- {
1659- FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num);
1660- }
1791+ // ==============================================================================
16611792
1662- void YUP_CALLTYPE FloatVectorOperations::convertFloatToFixed (int * dest, const float * src, float multiplier, int num) noexcept
1663- {
1664- FloatVectorHelpers::convertFloatToFixed (dest, src, multiplier, num);
1665- }
1793+ template struct FloatVectorOperationsBase <float , int >;
1794+ template struct FloatVectorOperationsBase <float , size_t >;
1795+ template struct FloatVectorOperationsBase <double , int >;
1796+ template struct FloatVectorOperationsBase <double , size_t >;
1797+
1798+ // ==============================================================================
16661799
16671800intptr_t YUP_CALLTYPE FloatVectorOperations::getFpStatusRegister () noexcept
16681801{
@@ -1770,6 +1903,8 @@ bool YUP_CALLTYPE FloatVectorOperations::areDenormalsDisabled() noexcept
17701903#endif
17711904}
17721905
1906+ // ==============================================================================
1907+
17731908ScopedNoDenormals::ScopedNoDenormals () noexcept
17741909{
17751910#if YUP_USE_SSE_INTRINSICS || (YUP_USE_ARM_NEON || (YUP_64BIT && YUP_ARM))
0 commit comments