@@ -170,6 +170,11 @@ typedef struct float16 {
170170
171171#include <math.h> // Required for: sinf(), cosf(), tan(), atan2f(), sqrtf(), floor(), fminf(), fmaxf(), fabsf()
172172
173+ #if defined(__SSE__ ) || defined(_M_X64 ) || (defined(_M_IX86_FP ) && _M_IX86_FP >= 1 )
174+ #include <xmmintrin.h>
175+ #define RAYMATH_SSE_ENABLED
176+ #endif
177+
173178//----------------------------------------------------------------------------------
174179// Module Functions Definition - Utils math
175180//----------------------------------------------------------------------------------
@@ -1647,7 +1652,63 @@ RMAPI Matrix MatrixSubtract(Matrix left, Matrix right)
16471652RMAPI Matrix MatrixMultiply (Matrix left , Matrix right )
16481653{
16491654 Matrix result = { 0 };
1655+ #ifdef RAYMATH_SSE_ENABLED
1656+ // Load left side and right side.
1657+ __m128 c0 = _mm_set_ps (right .m12 , right .m8 , right .m4 , right .m0 );
1658+ __m128 c1 = _mm_set_ps (right .m13 , right .m9 , right .m5 , right .m1 );
1659+ __m128 c2 = _mm_set_ps (right .m14 , right .m10 , right .m6 , right .m2 );
1660+ __m128 c3 = _mm_set_ps (right .m15 , right .m11 , right .m7 , right .m3 );
1661+ // Transpose so c0..c3 become *rows* of the right matrix in semantic order.
1662+ _MM_TRANSPOSE4_PS (c0 , c1 , c2 , c3 );
1663+
1664+ __m128 row ;
1665+ float tmp [4 ];
1666+
1667+ // Row 0 of result: [m0, m1, m2, m3]
1668+ row = _mm_mul_ps (_mm_set1_ps (left .m0 ), c0 );
1669+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m1 ), c1 ));
1670+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m2 ), c2 ));
1671+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m3 ), c3 ));
1672+ _mm_storeu_ps (tmp , row );
1673+ result .m0 = tmp [0 ];
1674+ result .m1 = tmp [1 ];
1675+ result .m2 = tmp [2 ];
1676+ result .m3 = tmp [3 ];
1677+
1678+ // Row 1 of result: [m4, m5, m6, m7]
1679+ row = _mm_mul_ps (_mm_set1_ps (left .m4 ), c0 );
1680+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m5 ), c1 ));
1681+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m6 ), c2 ));
1682+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m7 ), c3 ));
1683+ _mm_storeu_ps (tmp , row );
1684+ result .m4 = tmp [0 ];
1685+ result .m5 = tmp [1 ];
1686+ result .m6 = tmp [2 ];
1687+ result .m7 = tmp [3 ];
1688+
1689+ // Row 2 of result: [m8, m9, m10, m11]
1690+ row = _mm_mul_ps (_mm_set1_ps (left .m8 ), c0 );
1691+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m9 ), c1 ));
1692+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m10 ), c2 ));
1693+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m11 ), c3 ));
1694+ _mm_storeu_ps (tmp , row );
1695+ result .m8 = tmp [0 ];
1696+ result .m9 = tmp [1 ];
1697+ result .m10 = tmp [2 ];
1698+ result .m11 = tmp [3 ];
1699+
1700+ // Row 3 of result: [m12, m13, m14, m15]
1701+ row = _mm_mul_ps (_mm_set1_ps (left .m12 ), c0 );
1702+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m13 ), c1 ));
1703+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m14 ), c2 ));
1704+ row = _mm_add_ps (row , _mm_mul_ps (_mm_set1_ps (left .m15 ), c3 ));
1705+ _mm_storeu_ps (tmp , row );
1706+ result .m12 = tmp [0 ];
1707+ result .m13 = tmp [1 ];
1708+ result .m14 = tmp [2 ];
1709+ result .m15 = tmp [3 ];
16501710
1711+ #else
16511712 result .m0 = left .m0 * right .m0 + left .m1 * right .m4 + left .m2 * right .m8 + left .m3 * right .m12 ;
16521713 result .m1 = left .m0 * right .m1 + left .m1 * right .m5 + left .m2 * right .m9 + left .m3 * right .m13 ;
16531714 result .m2 = left .m0 * right .m2 + left .m1 * right .m6 + left .m2 * right .m10 + left .m3 * right .m14 ;
@@ -1664,7 +1725,7 @@ RMAPI Matrix MatrixMultiply(Matrix left, Matrix right)
16641725 result .m13 = left .m12 * right .m1 + left .m13 * right .m5 + left .m14 * right .m9 + left .m15 * right .m13 ;
16651726 result .m14 = left .m12 * right .m2 + left .m13 * right .m6 + left .m14 * right .m10 + left .m15 * right .m14 ;
16661727 result .m15 = left .m12 * right .m3 + left .m13 * right .m7 + left .m14 * right .m11 + left .m15 * right .m15 ;
1667-
1728+ #endif
16681729 return result ;
16691730}
16701731
0 commit comments