Skip to content

Commit c124f25

Browse files
committed
REVIEWED: SIMD instrinsics must be explicitly enabled by developer, only SSE supported at the moment #5316
1 parent 66755da commit c124f25

File tree

1 file changed

+46
-14
lines changed

1 file changed

+46
-14
lines changed

src/raymath.h

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,22 @@
1919
*
2020
* CONFIGURATION:
2121
* #define RAYMATH_IMPLEMENTATION
22-
* Generates the implementation of the library into the included file.
22+
* Generates the implementation of the library into the included file
2323
* If not defined, the library is in header only mode and can be included in other headers
24-
* or source files without problems. But only ONE file should hold the implementation.
24+
* or source files without problems. But only ONE file should hold the implementation
2525
*
2626
* #define RAYMATH_STATIC_INLINE
27-
* Define static inline functions code, so #include header suffices for use.
28-
* This may use up lots of memory.
27+
* Define static inline functions code, so #include header suffices for use
28+
* This may use up lots of memory
2929
*
3030
* #define RAYMATH_DISABLE_CPP_OPERATORS
3131
* Disables C++ operator overloads for raymath types.
3232
*
33+
* #define RAYMATH_USE_SIMD_INTRINSICS
34+
* Try to enable SIMD intrinsics for MatrixMultiply()
35+
* Note that users enabling it must be aware of the target platform where application will
36+
* run to support the selected SIMD intrinsic, for now, only SSE is supported
37+
*
3338
* LICENSE: zlib/libpng
3439
*
3540
* Copyright (c) 2015-2025 Ramon Santamaria (@raysan5)
@@ -79,7 +84,6 @@
7984
#endif
8085
#endif
8186

82-
8387
//----------------------------------------------------------------------------------
8488
// Defines and Macros
8589
//----------------------------------------------------------------------------------
@@ -170,9 +174,35 @@ typedef struct float16 {
170174

171175
#include <math.h> // Required for: sinf(), cosf(), tan(), atan2f(), sqrtf(), floor(), fminf(), fmaxf(), fabsf()
172176

173-
#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP >= 1)
174-
#include <xmmintrin.h>
175-
#define RAYMATH_SSE_ENABLED
177+
#if defined(RAYMATH_USE_SIMD_INTRINSICS)
178+
// SIMD is used on the most costly raymath function MatrixMultiply()
179+
// NOTE: Only SSE intrinsics support implemented
180+
// TODO: Consider support for other SIMD instrinsics
181+
/*
182+
#if defined(__SSE4_2__)
183+
#define SW_HAS_SSE42
184+
#include <nmmintrin.h>
185+
#elif defined(__SSE4_1__)
186+
#define SW_HAS_SSE41
187+
#include <smmintrin.h>
188+
#elif defined(__SSSE3__)
189+
#define SW_HAS_SSSE3
190+
#include <tmmintrin.h>
191+
#elif defined(__SSE3__)
192+
#define SW_HAS_SSE3
193+
#include <pmmintrin.h>
194+
#elif defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64
195+
#define SW_HAS_SSE2
196+
#include <emmintrin.h>
197+
#elif defined(__SSE__)
198+
#define SW_HAS_SSE
199+
#include <xmmintrin.h>
200+
#endif
201+
*/
202+
#if defined(__SSE__) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1))
203+
#include <xmmintrin.h>
204+
#define RAYMATH_SSE_ENABLED
205+
#endif
176206
#endif
177207

178208
//----------------------------------------------------------------------------------
@@ -1652,18 +1682,20 @@ RMAPI Matrix MatrixSubtract(Matrix left, Matrix right)
16521682
RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
16531683
{
16541684
Matrix result = { 0 };
1655-
#ifdef RAYMATH_SSE_ENABLED
1656-
// Load left side and right side.
1685+
1686+
#if defined(RAYMATH_SSE_ENABLED)
1687+
// Load left side and right side
16571688
__m128 c0 = _mm_set_ps(right.m12, right.m8, right.m4, right.m0);
16581689
__m128 c1 = _mm_set_ps(right.m13, right.m9, right.m5, right.m1);
16591690
__m128 c2 = _mm_set_ps(right.m14, right.m10, right.m6, right.m2);
16601691
__m128 c3 = _mm_set_ps(right.m15, right.m11, right.m7, right.m3);
1661-
// Transpose so c0..c3 become *rows* of the right matrix in semantic order.
1692+
1693+
// Transpose so c0..c3 become *rows* of the right matrix in semantic order
16621694
_MM_TRANSPOSE4_PS(c0, c1, c2, c3);
16631695

1696+
float tmp[4] = { 0 };
16641697
__m128 row;
1665-
float tmp[4];
1666-
1698+
16671699
// Row 0 of result: [m0, m1, m2, m3]
16681700
row = _mm_mul_ps(_mm_set1_ps(left.m0), c0);
16691701
row = _mm_add_ps(row, _mm_mul_ps(_mm_set1_ps(left.m1), c1));
@@ -1707,7 +1739,6 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
17071739
result.m13 = tmp[1];
17081740
result.m14 = tmp[2];
17091741
result.m15 = tmp[3];
1710-
17111742
#else
17121743
result.m0 = left.m0*right.m0 + left.m1*right.m4 + left.m2*right.m8 + left.m3*right.m12;
17131744
result.m1 = left.m0*right.m1 + left.m1*right.m5 + left.m2*right.m9 + left.m3*right.m13;
@@ -1726,6 +1757,7 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
17261757
result.m14 = left.m12*right.m2 + left.m13*right.m6 + left.m14*right.m10 + left.m15*right.m14;
17271758
result.m15 = left.m12*right.m3 + left.m13*right.m7 + left.m14*right.m11 + left.m15*right.m15;
17281759
#endif
1760+
17291761
return result;
17301762
}
17311763

0 commit comments

Comments
 (0)