|
38 | 38 | #define XM_CTOR_DEFAULT =default; |
39 | 39 | #endif |
40 | 40 |
|
| 41 | +#if !defined(_XM_F16C_INTRINSICS_) && defined(__AVX2__) && !defined(_XM_NO_INTRINSICS_) |
| 42 | +#define _XM_F16C_INTRINSICS_ |
| 43 | +#endif |
| 44 | + |
| 45 | +#ifdef _XM_F16C_INTRINSICS_ |
| 46 | +#if defined(_MSC_VER) && (_MSC_VER < 1700) |
| 47 | +#error DirectX Math use of F16C intrinsics requires Visual C++ 2012 or later. |
| 48 | +#endif |
| 49 | +#ifndef _XM_AVX_INTRINSICS_ |
| 50 | +#define _XM_AVX_INTRINSICS_ |
| 51 | +#endif |
| 52 | +#endif // _XM_F16C_INTRINSICS_ |
| 53 | + |
| 54 | +#if !defined(_XM_AVX_INTRINSICS_) && defined(__AVX__) && !defined(_XM_NO_INTRINSICS_) |
| 55 | +#define _XM_AVX_INTRINSICS_ |
| 56 | +#endif |
| 57 | + |
| 58 | +#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_SSE4_INTRINSICS_) |
| 59 | +#define _XM_SSE4_INTRINSICS_ |
| 60 | +#endif |
41 | 61 |
|
| 62 | +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) |
| 63 | +#define _XM_SSE_INTRINSICS_ |
| 64 | +#endif |
42 | 65 |
|
43 | 66 | #if !defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
44 | 67 | #if defined(_M_IX86) || defined(_M_X64) |
|
77 | 100 | #endif |
78 | 101 | #endif |
79 | 102 |
|
| 103 | +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
| 104 | +#pragma warning(push) |
| 105 | +#pragma warning(disable : 4987) |
| 106 | +#include <intrin.h> |
| 107 | +#pragma warning(pop) |
| 108 | +#include <smmintrin.h> |
| 109 | +#endif |
80 | 110 |
|
| 111 | +#if defined(_XM_AVX_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
| 112 | +#include <immintrin.h> |
| 113 | +#endif |
81 | 114 |
|
82 | 115 | #include <sal.h> |
83 | 116 | #include <assert.h> |
|
129 | 162 | #define XM_SFENCE() _mm_sfence() |
130 | 163 | #endif |
131 | 164 |
|
| 165 | +#if defined(_XM_AVX_INTRINSICS_) |
| 166 | +#define XM_PERMUTE_PS( v, c ) _mm_permute_ps( v, c ) |
| 167 | +#else |
132 | 168 | #define XM_PERMUTE_PS( v, c ) _mm_shuffle_ps( v, v, c ) |
| 169 | +#endif |
133 | 170 |
|
134 | 171 | #endif // _XM_SSE_INTRINSICS_ && !_XM_NO_INTRINSICS_ |
135 | 172 |
|
@@ -1506,6 +1543,22 @@ template<uint32_t PermuteX, uint32_t PermuteY, uint32_t PermuteZ, uint32_t Permu |
1506 | 1543 | template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { (V2); return V1; } |
1507 | 1544 | template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { (V1); return V2; } |
1508 | 1545 |
|
| 1546 | +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
| 1547 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x1); } |
| 1548 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x2); } |
| 1549 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x3); } |
| 1550 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x4); } |
| 1551 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x5); } |
| 1552 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x6); } |
| 1553 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,6,3>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x7); } |
| 1554 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x8); } |
| 1555 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0x9); } |
| 1556 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xA); } |
| 1557 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,5,2,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xB); } |
| 1558 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xC); } |
| 1559 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<4,1,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xD); } |
| 1560 | +template<> inline XMVECTOR XM_CALLCONV XMVectorPermute<0,5,6,7>(FXMVECTOR V1, FXMVECTOR V2) { return _mm_blend_ps(V1,V2,0xE); } |
| 1561 | +#endif |
1509 | 1562 |
|
1510 | 1563 | #if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
1511 | 1564 |
|
@@ -1570,6 +1623,10 @@ template<uint32_t SwizzleX, uint32_t SwizzleY, uint32_t SwizzleZ, uint32_t Swizz |
1570 | 1623 | // Specialized swizzles |
1571 | 1624 | template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,1,2,3>(FXMVECTOR V) { return V; } |
1572 | 1625 |
|
| 1626 | +#if defined(_XM_SSE4_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
| 1627 | +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<0,0,2,2>(FXMVECTOR V) { return _mm_moveldup_ps(V); } |
| 1628 | +template<> inline XMVECTOR XM_CALLCONV XMVectorSwizzle<1,1,3,3>(FXMVECTOR V) { return _mm_movehdup_ps(V); } |
| 1629 | +#endif |
1573 | 1630 |
|
1574 | 1631 | #if defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) |
1575 | 1632 |
|
|
0 commit comments