Skip to content

Commit d3c5cbd

Browse files
committed
Added _M_ARM64EC from windows build
1 parent 0e6ccda commit d3c5cbd

File tree

4 files changed

+30
-30
lines changed

4 files changed

+30
-30
lines changed

Inc/DirectXMath.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#error DirectX Math requires Visual C++ 2017 or later.
2020
#endif
2121

22-
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_M_HYBRID_X86_ARM64) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
22+
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC) && (!_MANAGED) && (!_M_CEE) && (!defined(_M_IX86_FP) || (_M_IX86_FP > 1)) && !defined(_XM_NO_INTRINSICS_) && !defined(_XM_VECTORCALL_)
2323
#define _XM_VECTORCALL_ 1
2424
#endif
2525

@@ -80,9 +80,9 @@
8080
#endif
8181

8282
#if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
83-
#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64)
83+
#if (defined(_M_IX86) || defined(_M_X64) || __i386__ || __x86_64__) && !defined(_M_HYBRID_X86_ARM64) && !defined(_M_ARM64EC)
8484
#define _XM_SSE_INTRINSICS_
85-
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __arm__ || __aarch64__
85+
#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __arm__ || __aarch64__
8686
#define _XM_ARM_NEON_INTRINSICS_
8787
#elif !defined(_XM_NO_INTRINSICS_)
8888
#error DirectX Math does not support this target
@@ -135,7 +135,7 @@
135135
#endif
136136

137137
#elif defined(_XM_ARM_NEON_INTRINSICS_)
138-
#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64))
138+
#if defined(_MSC_VER) && !defined(__clang__) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC))
139139
#include <arm64_neon.h>
140140
#else
141141
#include <arm_neon.h>
@@ -352,14 +352,14 @@ namespace DirectX
352352
#endif
353353

354354
// Fix-up for (4th) XMVECTOR parameter to pass in-register for ARM, ARM64, and vector call; by reference otherwise
355-
#if ( defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __arm__ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
355+
#if ( defined(_M_ARM) || defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || _XM_VECTORCALL_ || __arm__ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
356356
typedef const XMVECTOR GXMVECTOR;
357357
#else
358358
typedef const XMVECTOR& GXMVECTOR;
359359
#endif
360360

361361
// Fix-up for (5th & 6th) XMVECTOR parameter to pass in-register for ARM64 and vector call; by reference otherwise
362-
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
362+
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
363363
typedef const XMVECTOR HXMVECTOR;
364364
#else
365365
typedef const XMVECTOR& HXMVECTOR;
@@ -478,7 +478,7 @@ namespace DirectX
478478
struct XMMATRIX;
479479

480480
// Fix-up for (1st) XMMATRIX parameter to pass in-register for ARM64 and vector call; by reference otherwise
481-
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
481+
#if ( defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || _XM_VECTORCALL_ || __aarch64__ ) && !defined(_XM_NO_INTRINSICS_)
482482
typedef const XMMATRIX FXMMATRIX;
483483
#else
484484
typedef const XMMATRIX& FXMMATRIX;

Inc/DirectXMathMatrix.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3173,7 +3173,7 @@ inline XMMATRIX& XMMATRIX::operator/= (float S) noexcept
31733173
r[3] = XMVectorDivide(r[3], vS);
31743174
return *this;
31753175
#elif defined(_XM_ARM_NEON_INTRINSICS_)
3176-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3176+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
31773177
float32x4_t vS = vdupq_n_f32(S);
31783178
r[0] = vdivq_f32(r[0], vS);
31793179
r[1] = vdivq_f32(r[1], vS);
@@ -3260,7 +3260,7 @@ inline XMMATRIX XMMATRIX::operator/ (float S) const noexcept
32603260
R.r[3] = XMVectorDivide(r[3], vS);
32613261
return R;
32623262
#elif defined(_XM_ARM_NEON_INTRINSICS_)
3263-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3263+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
32643264
float32x4_t vS = vdupq_n_f32(S);
32653265
XMMATRIX R;
32663266
R.r[0] = vdivq_f32(r[0], vS);

Inc/DirectXMathVector.inl

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2314,7 +2314,7 @@ inline XMVECTOR XM_CALLCONV XMVectorRound(FXMVECTOR V) noexcept
23142314
return Result.v;
23152315

23162316
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2317-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2317+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
23182318
return vrndnq_f32(V);
23192319
#else
23202320
uint32x4_t sign = vandq_u32(vreinterpretq_u32_f32(V), g_XMNegativeZero);
@@ -2374,7 +2374,7 @@ inline XMVECTOR XM_CALLCONV XMVectorTruncate(FXMVECTOR V) noexcept
23742374
return Result;
23752375

23762376
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2377-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2377+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
23782378
return vrndq_f32(V);
23792379
#else
23802380
float32x4_t vTest = vabsq_f32(V);
@@ -2421,7 +2421,7 @@ inline XMVECTOR XM_CALLCONV XMVectorFloor(FXMVECTOR V) noexcept
24212421
} } };
24222422
return Result.v;
24232423
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2424-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2424+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
24252425
return vrndmq_f32(V);
24262426
#else
24272427
float32x4_t vTest = vabsq_f32(V);
@@ -2472,7 +2472,7 @@ inline XMVECTOR XM_CALLCONV XMVectorCeiling(FXMVECTOR V) noexcept
24722472
} } };
24732473
return Result.v;
24742474
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2475-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2475+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
24762476
return vrndpq_f32(V);
24772477
#else
24782478
float32x4_t vTest = vabsq_f32(V);
@@ -2765,7 +2765,7 @@ inline XMVECTOR XM_CALLCONV XMVectorSum(FXMVECTOR V) noexcept
27652765
return Result.v;
27662766

27672767
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2768-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2768+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
27692769
float32x4_t vTemp = vpaddq_f32(V, V);
27702770
return vpaddq_f32(vTemp, vTemp);
27712771
#else
@@ -2970,7 +2970,7 @@ inline XMVECTOR XM_CALLCONV XMVectorMultiplyAdd
29702970
} } };
29712971
return Result.v;
29722972
#elif defined(_XM_ARM_NEON_INTRINSICS_)
2973-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
2973+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
29742974
return vfmaq_f32(V3, V1, V2);
29752975
#else
29762976
return vmlaq_f32(V3, V1, V2);
@@ -2997,7 +2997,7 @@ inline XMVECTOR XM_CALLCONV XMVectorDivide
29972997
} } };
29982998
return Result.v;
29992999
#elif defined(_XM_ARM_NEON_INTRINSICS_)
3000-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3000+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
30013001
return vdivq_f32(V1, V2);
30023002
#else
30033003
// 2 iterations of Newton-Raphson refinement of reciprocal
@@ -3031,7 +3031,7 @@ inline XMVECTOR XM_CALLCONV XMVectorNegativeMultiplySubtract
30313031
} } };
30323032
return Result;
30333033
#elif defined(_XM_ARM_NEON_INTRINSICS_)
3034-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3034+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
30353035
return vfmsq_f32(V3, V1, V2);
30363036
#else
30373037
return vmlsq_f32(V3, V1, V2);
@@ -3097,7 +3097,7 @@ inline XMVECTOR XM_CALLCONV XMVectorReciprocal(FXMVECTOR V) noexcept
30973097
} } };
30983098
return Result.v;
30993099
#elif defined(_XM_ARM_NEON_INTRINSICS_)
3100-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
3100+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
31013101
float32x4_t one = vdupq_n_f32(1.0f);
31023102
return vdivq_f32(one, V);
31033103
#else
@@ -8040,7 +8040,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
80408040

80418041
XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 3));
80428042

8043-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
8043+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
80448044
V.val[0] = vdivq_f32(vResult0, W);
80458045
V.val[1] = vdivq_f32(vResult1, W);
80468046
#else
@@ -8074,7 +8074,7 @@ inline XMFLOAT2* XM_CALLCONV XMVector2TransformCoordStream
80748074
V = vget_high_f32(vResult);
80758075
float32x2_t W = vdup_lane_f32(V, 1);
80768076

8077-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
8077+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
80788078
V = vget_low_f32(vResult);
80798079
V = vdiv_f32(V, W);
80808080
#else
@@ -10818,7 +10818,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
1081810818

1081910819
XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1082010820

10821-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
10821+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1082210822
V.val[0] = vdivq_f32(vResult0, W);
1082310823
V.val[1] = vdivq_f32(vResult1, W);
1082410824
V.val[2] = vdivq_f32(vResult2, W);
@@ -10857,7 +10857,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3TransformCoordStream
1085710857
VH = vget_high_f32(vResult);
1085810858
XMVECTOR W = vdupq_lane_f32(VH, 1);
1085910859

10860-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
10860+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1086110861
vResult = vdivq_f32(vResult, W);
1086210862
#else
1086310863
// 2 iterations of Newton-Raphson refinement of reciprocal for W
@@ -11768,7 +11768,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
1176811768

1176911769
XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1177011770

11771-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
11771+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1177211772
vResult0 = vdivq_f32(vResult0, W);
1177311773
vResult1 = vdivq_f32(vResult1, W);
1177411774
vResult2 = vdivq_f32(vResult2, W);
@@ -11816,7 +11816,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3ProjectStream
1181611816
VH = vget_high_f32(vResult);
1181711817
XMVECTOR W = vdupq_lane_f32(VH, 1);
1181811818

11819-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
11819+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1182011820
vResult = vdivq_f32(vResult, W);
1182111821
#else
1182211822
// 2 iterations of Newton-Raphson refinement of reciprocal for W
@@ -12327,7 +12327,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
1232712327

1232812328
XM_PREFETCH(pInputVector + (XM_CACHE_LINE_SIZE * 5));
1232912329

12330-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
12330+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1233112331
V.val[0] = vdivq_f32(vResult0, W);
1233212332
V.val[1] = vdivq_f32(vResult1, W);
1233312333
V.val[2] = vdivq_f32(vResult2, W);
@@ -12381,7 +12381,7 @@ inline XMFLOAT3* XM_CALLCONV XMVector3UnprojectStream
1238112381
VH = vget_high_f32(vResult);
1238212382
XMVECTOR W = vdupq_lane_f32(VH, 1);
1238312383

12384-
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__
12384+
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
1238512385
vResult = vdivq_f32(vResult, W);
1238612386
#else
1238712387
// 2 iterations of Newton-Raphson refinement of reciprocal for W

Inc/DirectXPackedVector.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ inline float XMConvertHalfToFloat(HALF Value) noexcept
2323
__m128i V1 = _mm_cvtsi32_si128(static_cast<int>(Value));
2424
__m128 V2 = _mm_cvtph_ps(V1);
2525
return _mm_cvtss_f32(V2);
26-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
26+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
2727
uint16x4_t vHalf = vdup_n_u16(Value);
2828
float32x4_t vFloat = vcvt_f32_f16(vreinterpret_f16_u16(vHalf));
2929
return vgetq_lane_f32(vFloat, 0);
@@ -255,7 +255,7 @@ inline float* XMConvertHalfToFloatStream
255255
XM_SFENCE();
256256

257257
return pOutputStream;
258-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
258+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) ||__aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
259259
auto pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
260260
auto pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
261261

@@ -389,7 +389,7 @@ inline HALF XMConvertFloatToHalf(float Value) noexcept
389389
__m128 V1 = _mm_set_ss(Value);
390390
__m128i V2 = _mm_cvtps_ph(V1, _MM_FROUND_TO_NEAREST_INT);
391391
return static_cast<HALF>(_mm_extract_epi16(V2, 0));
392-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
392+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
393393
float32x4_t vFloat = vdupq_n_f32(Value);
394394
float16x4_t vHalf = vcvt_f16_f32(vFloat);
395395
return vget_lane_u16(vreinterpret_u16_f16(vHalf), 0);
@@ -609,7 +609,7 @@ inline HALF* XMConvertFloatToHalfStream
609609
}
610610

611611
return pOutputStream;
612-
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
612+
#elif defined(_XM_ARM_NEON_INTRINSICS_) && (defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__) && !defined(_XM_NO_INTRINSICS_) && (!defined(__GNUC__) || (__ARM_FP & 2))
613613
auto pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
614614
auto pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
615615

0 commit comments

Comments
 (0)