1919*
2020* CONFIGURATION:
2121* #define RAYMATH_IMPLEMENTATION
22- * Generates the implementation of the library into the included file.
22+ * Generates the implementation of the library into the included file
2323* If not defined, the library is in header only mode and can be included in other headers
24- * or source files without problems. But only ONE file should hold the implementation.
24+ * or source files without problems. But only ONE file should hold the implementation
2525*
2626* #define RAYMATH_STATIC_INLINE
27- * Define static inline functions code, so #include header suffices for use.
28- * This may use up lots of memory.
27+ * Define static inline functions code, so #include header suffices for use
28+ * This may use up lots of memory
2929*
3030* #define RAYMATH_DISABLE_CPP_OPERATORS
3131* Disables C++ operator overloads for raymath types.
3232*
33+ * #define RAYMATH_USE_SIMD_INTRINSICS
34+ * Try to enable SIMD intrinsics for MatrixMultiply()
35+ * Note that users enabling it must be aware of the target platform where application will
36+ * run to support the selected SIMD intrinsic, for now, only SSE is supported
37+ *
3338* LICENSE: zlib/libpng
3439*
3540* Copyright (c) 2015-2025 Ramon Santamaria (@raysan5)
7984 #endif
8085#endif
8186
82-
8387//----------------------------------------------------------------------------------
8488// Defines and Macros
8589//----------------------------------------------------------------------------------
@@ -170,9 +174,35 @@ typedef struct float16 {
170174
171175#include <math.h> // Required for: sinf(), cosf(), tan(), atan2f(), sqrtf(), floor(), fminf(), fmaxf(), fabsf()
172176
173- #if defined(__SSE__ ) || defined(_M_X64 ) || (defined(_M_IX86_FP ) && _M_IX86_FP >= 1 )
174- #include <xmmintrin.h>
175- #define RAYMATH_SSE_ENABLED
177+ #if defined(RAYMATH_USE_SIMD_INTRINSICS )
178+ // SIMD is used on the most costly raymath function MatrixMultiply()
179+ // NOTE: Only SSE intrinsics support implemented
180+ // TODO: Consider support for other SIMD instrinsics
181+ /*
182+ #if defined(__SSE4_2__)
183+ #define SW_HAS_SSE42
184+ #include <nmmintrin.h>
185+ #elif defined(__SSE4_1__)
186+ #define SW_HAS_SSE41
187+ #include <smmintrin.h>
188+ #elif defined(__SSSE3__)
189+ #define SW_HAS_SSSE3
190+ #include <tmmintrin.h>
191+ #elif defined(__SSE3__)
192+ #define SW_HAS_SSE3
193+ #include <pmmintrin.h>
194+ #elif defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64
195+ #define SW_HAS_SSE2
196+ #include <emmintrin.h>
197+ #elif defined(__SSE__)
198+ #define SW_HAS_SSE
199+ #include <xmmintrin.h>
200+ #endif
201+ */
202+ #if defined(__SSE__ ) || defined(_M_X64 ) || (defined(_M_IX86_FP ) && (_M_IX86_FP >= 1 ))
203+ #include <xmmintrin.h>
204+ #define RAYMATH_SSE_ENABLED
205+ #endif
176206#endif
177207
178208//----------------------------------------------------------------------------------
@@ -1652,18 +1682,20 @@ RMAPI Matrix MatrixSubtract(Matrix left, Matrix right)
16521682RMAPI Matrix MatrixMultiply (Matrix left , Matrix right )
16531683{
16541684 Matrix result = { 0 };
1655- #ifdef RAYMATH_SSE_ENABLED
1656- // Load left side and right side.
1685+
1686+ #if defined(RAYMATH_SSE_ENABLED )
1687+ // Load left side and right side
16571688 __m128 c0 = _mm_set_ps (right .m12 , right .m8 , right .m4 , right .m0 );
16581689 __m128 c1 = _mm_set_ps (right .m13 , right .m9 , right .m5 , right .m1 );
16591690 __m128 c2 = _mm_set_ps (right .m14 , right .m10 , right .m6 , right .m2 );
16601691 __m128 c3 = _mm_set_ps (right .m15 , right .m11 , right .m7 , right .m3 );
1661- // Transpose so c0..c3 become *rows* of the right matrix in semantic order.
1692+
1693+ // Transpose so c0..c3 become *rows* of the right matrix in semantic order
16621694 _MM_TRANSPOSE4_PS (c0 , c1 , c2 , c3 );
16631695
1696+ float tmp [4 ] = { 0 };
16641697 __m128 row ;
1665- float tmp [4 ];
1666-
1698+
16671699 // Row 0 of result: [m0, m1, m2, m3]
16681700 row = _mm_mul_ps (_mm_set1_ps (left .m0 ), c0 );
16691701 row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m1 ), c1 ));
@@ -1707,7 +1739,6 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
17071739 result .m13 = tmp [1 ];
17081740 result .m14 = tmp [2 ];
17091741 result .m15 = tmp [3 ];
1710-
17111742#else
17121743 result .m0 = left .m0 * right .m0 + left .m1 * right .m4 + left .m2 * right .m8 + left .m3 * right .m12 ;
17131744 result .m1 = left .m0 * right .m1 + left .m1 * right .m5 + left .m2 * right .m9 + left .m3 * right .m13 ;
@@ -1726,6 +1757,7 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
17261757 result .m14 = left .m12 * right .m2 + left .m13 * right .m6 + left .m14 * right .m10 + left .m15 * right .m14 ;
17271758 result .m15 = left .m12 * right .m3 + left .m13 * right .m7 + left .m14 * right .m11 + left .m15 * right .m15 ;
17281759#endif
1760+
17291761 return result ;
17301762}
17311763
0 commit comments