@@ -2314,7 +2314,7 @@ inline XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept
23142314 return Result.v;
23152315
23162316#elif defined(_XM_ARM_NEON_INTRINSICS_)
2317- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2317+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
23182318 return vrndnq_f32(V);
23192319#else
23202320 uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(V), g_XMNegativeZero);
@@ -2374,7 +2374,7 @@ inline XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept
23742374 return Result;
23752375
23762376#elif defined(_XM_ARM_NEON_INTRINSICS_)
2377- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2377+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
23782378 return vrndq_f32(V);
23792379#else
23802380 float32x4_t vTest = vabsq_f32(V);
@@ -2421,7 +2421,7 @@ inline XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept
24212421 } } };
24222422 return Result.v;
24232423#elif defined(_XM_ARM_NEON_INTRINSICS_)
2424- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2424+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
24252425 return vrndmq_f32(V);
24262426#else
24272427 float32x4_t vTest = vabsq_f32(V);
@@ -2472,7 +2472,7 @@ inline XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept
24722472 } } };
24732473 return Result.v;
24742474#elif defined(_XM_ARM_NEON_INTRINSICS_)
2475- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2475+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
24762476 return vrndpq_f32(V);
24772477#else
24782478 float32x4_t vTest = vabsq_f32(V);
@@ -2765,7 +2765,7 @@ inline XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept
27652765 return Result.v;
27662766
27672767#elif defined(_XM_ARM_NEON_INTRINSICS_)
2768- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2768+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
27692769 float32x4_t vTemp = vpaddq_f32(V, V);
27702770 return vpaddq_f32(vTemp, vTemp);
27712771#else
@@ -2970,7 +2970,7 @@ inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
29702970 } } };
29712971 return Result.v;
29722972#elif defined(_XM_ARM_NEON_INTRINSICS_)
2973- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2973+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
29742974 return vfmaq_f32(V3, V1, V2);
29752975#else
29762976 return vmlaq_f32(V3, V1, V2);
@@ -2997,7 +2997,7 @@ inline XMVECTOR XM_CALLCONV XMVectorDivide
29972997 } } };
29982998 return Result.v;
29992999#elif defined(_XM_ARM_NEON_INTRINSICS_)
3000- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3000+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
30013001 return vdivq_f32(V1, V2);
30023002#else
30033003 // 2 iterations of Newton-Raphson refinement of reciprocal
@@ -3031,7 +3031,7 @@ inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
30313031 } } };
30323032 return Result;
30333033#elif defined(_XM_ARM_NEON_INTRINSICS_)
3034- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3034+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
30353035 return vfmsq_f32(V3, V1, V2);
30363036#else
30373037 return vmlsq_f32(V3, V1, V2);
@@ -3097,7 +3097,7 @@ inline XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept
30973097 } } };
30983098 return Result.v;
30993099#elif defined(_XM_ARM_NEON_INTRINSICS_)
3100- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3100+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
31013101 float32x4_t one = vdupq_n_f32(1.0f);
31023102 return vdivq_f32(one, V);
31033103#else
@@ -8040,7 +8040,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
80408040
80418041 XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3));
80428042
8043- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
8043+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
80448044 V.val[0] = vdivq_f32(vResult0, W);
80458045 V.val[1] = vdivq_f32(vResult1, W);
80468046#else
@@ -8074,7 +8074,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
80748074 V = vget_high_f32(vResult);
80758075 float32x2_t W = vdup_lane_f32(V, 1);
80768076
8077- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
8077+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
80788078 V = vget_low_f32(vResult);
80798079 V = vdiv_f32(V, W);
80808080#else
@@ -10818,7 +10818,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
1081810818
1081910819 XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1082010820
10821- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
10821+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1082210822 V.val[0] = vdivq_f32(vResult0, W);
1082310823 V.val[1] = vdivq_f32(vResult1, W);
1082410824 V.val[2] = vdivq_f32(vResult2, W);
@@ -10857,7 +10857,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
1085710857 VH = vget_high_f32(vResult);
1085810858 XMVECTOR W = vdupq_lane_f32(VH, 1);
1085910859
10860- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
10860+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1086110861 vResult = vdivq_f32(vResult, W);
1086210862#else
1086310863 // 2 iterations of Newton-Raphson refinement of reciprocal for W
@@ -11768,7 +11768,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
1176811768
1176911769 XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1177011770
11771- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
11771+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1177211772 vResult0 = vdivq_f32(vResult0, W);
1177311773 vResult1 = vdivq_f32(vResult1, W);
1177411774 vResult2 = vdivq_f32(vResult2, W);
@@ -11816,7 +11816,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
1181611816 VH = vget_high_f32(vResult);
1181711817 XMVECTOR W = vdupq_lane_f32(VH, 1);
1181811818
11819- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
11819+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1182011820 vResult = vdivq_f32(vResult, W);
1182111821#else
1182211822 // 2 iterations of Newton-Raphson refinement of reciprocal for W
@@ -12327,7 +12327,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
1232712327
1232812328 XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1232912329
12330- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
12330+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1233112331 V.val[0] = vdivq_f32(vResult0, W);
1233212332 V.val[1] = vdivq_f32(vResult1, W);
1233312333 V.val[2] = vdivq_f32(vResult2, W);
@@ -12381,7 +12381,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
1238112381 VH = vget_high_f32(vResult);
1238212382 XMVECTOR W = vdupq_lane_f32(VH, 1);
1238312383
12384- #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
12384+ #if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1238512385 vResult = vdivq_f32(vResult, W);
1238612386#else
1238712387 // 2 iterations of Newton-Raphson refinement of reciprocal for W
0 commit comments