9494#ifndef SW_MALLOC
9595 #define SW_MALLOC (sz ) malloc(sz)
9696#endif
97-
9897#ifndef SW_REALLOC
9998 #define SW_REALLOC (ptr , newSz ) realloc(ptr, newSz)
10099#endif
101-
102100#ifndef SW_FREE
103101 #define SW_FREE (ptr ) free(ptr)
104102#endif
152150 #define SW_CLIP_EPSILON 1e-4f
153151#endif
154152
155- #ifdef __cplusplus
156- #define CURLY_INIT (name ) name
157- #else
158- #define CURLY_INIT (name ) (name)
159- #endif
160-
161153//----------------------------------------------------------------------------------
162154// OpenGL Compatibility Types
163155//----------------------------------------------------------------------------------
@@ -610,9 +602,19 @@ SWAPI void swBindTexture(uint32_t id);
610602#define RLSW_IMPLEMENTATION
611603#if defined(RLSW_IMPLEMENTATION )
612604
613- #include <stdlib.h>
614- #include <stddef.h>
615- #include <math.h> // Required for: floorf(), fabsf()
605+ #include <stdlib.h> // Required for: malloc(), free()
606+ #include <stddef.h> // Required for: NULL, size_t, uint8_t, uint16_t, uint32_t...
607+ #include <math.h> // Required for: sinf(), cosf(), floorf(), fabsf(), sqrtf(), roundf()
608+
609+ // Simple log system to avoid printf() calls if required
610+ // NOTE: Avoiding those calls, also avoids const strings memory usage
611+ #define SW_SUPPORT_LOG_INFO
612+ #if defined(SW_SUPPORT_LOG_INFO ) //&& defined(_DEBUG) // WARNING: LOG() output required for this tool
613+ #include <stdio.h>
614+ #define SW_LOG (...) printf(__VA_ARGS__)
615+ #else
616+ #define SW_LOG (...)
617+ #endif
616618
617619#if defined(_MSC_VER )
618620 #define SW_ALIGN (x ) __declspec(align(x))
@@ -634,56 +636,47 @@ SWAPI void swBindTexture(uint32_t id);
634636 #define SW_ARCH_RISCV
635637#endif
636638
639+ // Check for SIMD vector instructions
637640#if defined(__FMA__ ) && defined(__AVX2__ )
638641 #define SW_HAS_FMA_AVX2
639642 #include <immintrin.h>
640643#endif
641-
642644#if defined(__FMA__ ) && defined(__AVX__ )
643645 #define SW_HAS_FMA_AVX
644646 #include <immintrin.h>
645647#endif
646-
647648#if defined(__AVX2__ )
648649 #define SW_HAS_AVX2
649650 #include <immintrin.h>
650651#endif
651-
652652#if defined(__AVX__ )
653653 #define SW_HAS_AVX
654654 #include <immintrin.h>
655655#endif
656-
657656#if defined(__SSE4_2__ )
658657 #define SW_HAS_SSE42
659658 #include <nmmintrin.h>
660659#endif
661-
662660#if defined(__SSE4_1__ )
663661 #define SW_HAS_SSE41
664662 #include <smmintrin.h>
665663#endif
666-
667664#if defined(__SSSE3__ )
668665 #define SW_HAS_SSSE3
669666 #include <tmmintrin.h>
670667#endif
671-
672668#if defined(__SSE3__ )
673669 #define SW_HAS_SSE3
674670 #include <pmmintrin.h>
675671#endif
676-
677- #if defined(__SSE2__ )
672+ #if defined(__SSE2__ ) || (defined(_M_AMD64 ) || defined(_M_X64 )) // SSE2 x64
678673 #define SW_HAS_SSE2
679674 #include <emmintrin.h>
680675#endif
681-
682676#if defined(__SSE__ )
683677 #define SW_HAS_SSE
684678 #include <xmmintrin.h>
685679#endif
686-
687680#if defined(__ARM_NEON ) || defined(__aarch64__ )
688681 #if defined(__ARM_FEATURE_FMA )
689682 #define SW_HAS_NEON_FMA
@@ -692,12 +685,17 @@ SWAPI void swBindTexture(uint32_t id);
692685 #endif
693686 #include <arm_neon.h>
694687#endif
695-
696- #ifdef __riscv_vector
688+ #if defined(__riscv_vector )
697689 #define SW_HAS_RVV
698690 #include <riscv_vector.h>
699691#endif
700692
693+ #ifdef __cplusplus
694+ #define SW_CURLY_INIT (name ) name
695+ #else
696+ #define SW_CURLY_INIT (name ) (name)
697+ #endif
698+
701699//----------------------------------------------------------------------------------
702700// Defines and Macros
703701//----------------------------------------------------------------------------------
@@ -717,68 +715,68 @@ SWAPI void swBindTexture(uint32_t id);
717715#endif
718716
719717#if (SW_COLOR_BUFFER_BITS == 8 )
720- #define SW_COLOR_TYPE uint8_t
721- #define SW_COLOR_IS_PACKED 1
722- #define SW_COLOR_PACK_COMP 1
718+ #define SW_COLOR_TYPE uint8_t
719+ #define SW_COLOR_IS_PACKED 1
720+ #define SW_COLOR_PACK_COMP 1
723721 #define SW_PACK_COLOR (r ,g ,b ) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03)
724- #define SW_UNPACK_R (p ) (((p)>>5)&0x07)
725- #define SW_UNPACK_G (p ) (((p)>>2)&0x07)
726- #define SW_UNPACK_B (p ) ((p)&0x03)
727- #define SW_SCALE_R (v ) ((v)*255+3)/7
728- #define SW_SCALE_G (v ) ((v)*255+3)/7
729- #define SW_SCALE_B (v ) ((v)*255+1)/3
730- #define SW_TO_FLOAT_R (v ) ((v)*(1.0f/7.0f))
731- #define SW_TO_FLOAT_G (v ) ((v)*(1.0f/7.0f))
732- #define SW_TO_FLOAT_B (v ) ((v)*(1.0f/3.0f))
722+ #define SW_UNPACK_R (p ) (((p)>>5)&0x07)
723+ #define SW_UNPACK_G (p ) (((p)>>2)&0x07)
724+ #define SW_UNPACK_B (p ) ((p)&0x03)
725+ #define SW_SCALE_R (v ) ((v)*255+3)/7
726+ #define SW_SCALE_G (v ) ((v)*255+3)/7
727+ #define SW_SCALE_B (v ) ((v)*255+1)/3
728+ #define SW_TO_FLOAT_R (v ) ((v)*(1.0f/7.0f))
729+ #define SW_TO_FLOAT_G (v ) ((v)*(1.0f/7.0f))
730+ #define SW_TO_FLOAT_B (v ) ((v)*(1.0f/3.0f))
733731#elif (SW_COLOR_BUFFER_BITS == 16 )
734- #define SW_COLOR_TYPE uint16_t
735- #define SW_COLOR_IS_PACKED 1
736- #define SW_COLOR_PACK_COMP 1
732+ #define SW_COLOR_TYPE uint16_t
733+ #define SW_COLOR_IS_PACKED 1
734+ #define SW_COLOR_PACK_COMP 1
737735 #define SW_PACK_COLOR (r ,g ,b ) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F)
738- #define SW_UNPACK_R (p ) (((p)>>11)&0x1F)
739- #define SW_UNPACK_G (p ) (((p)>>5)&0x3F)
740- #define SW_UNPACK_B (p ) ((p)&0x1F)
741- #define SW_SCALE_R (v ) ((v)*255+15)/31
742- #define SW_SCALE_G (v ) ((v)*255+31)/63
743- #define SW_SCALE_B (v ) ((v)*255+15)/31
744- #define SW_TO_FLOAT_R (v ) ((v)*(1.0f/31.0f))
745- #define SW_TO_FLOAT_G (v ) ((v)*(1.0f/63.0f))
746- #define SW_TO_FLOAT_B (v ) ((v)*(1.0f/31.0f))
736+ #define SW_UNPACK_R (p ) (((p)>>11)&0x1F)
737+ #define SW_UNPACK_G (p ) (((p)>>5)&0x3F)
738+ #define SW_UNPACK_B (p ) ((p)&0x1F)
739+ #define SW_SCALE_R (v ) ((v)*255+15)/31
740+ #define SW_SCALE_G (v ) ((v)*255+31)/63
741+ #define SW_SCALE_B (v ) ((v)*255+15)/31
742+ #define SW_TO_FLOAT_R (v ) ((v)*(1.0f/31.0f))
743+ #define SW_TO_FLOAT_G (v ) ((v)*(1.0f/63.0f))
744+ #define SW_TO_FLOAT_B (v ) ((v)*(1.0f/31.0f))
747745#else // 32 bits
748- #define SW_COLOR_TYPE uint8_t
749- #define SW_COLOR_IS_PACKED 0
750- #define SW_COLOR_PACK_COMP 4
746+ #define SW_COLOR_TYPE uint8_t
747+ #define SW_COLOR_IS_PACKED 0
748+ #define SW_COLOR_PACK_COMP 4
751749#endif
752750
753751#if (SW_DEPTH_BUFFER_BITS == 8 )
754- #define SW_DEPTH_TYPE uint8_t
755- #define SW_DEPTH_IS_PACKED 1
756- #define SW_DEPTH_PACK_COMP 1
757- #define SW_DEPTH_MAX UINT8_MAX
758- #define SW_DEPTH_SCALE (1.0f/UINT8_MAX)
759- #define SW_PACK_DEPTH (d ) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
760- #define SW_UNPACK_DEPTH (p ) (p)
752+ #define SW_DEPTH_TYPE uint8_t
753+ #define SW_DEPTH_IS_PACKED 1
754+ #define SW_DEPTH_PACK_COMP 1
755+ #define SW_DEPTH_MAX UINT8_MAX
756+ #define SW_DEPTH_SCALE (1.0f/UINT8_MAX)
757+ #define SW_PACK_DEPTH (d ) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
758+ #define SW_UNPACK_DEPTH (p ) (p)
761759#elif (SW_DEPTH_BUFFER_BITS == 16 )
762- #define SW_DEPTH_TYPE uint16_t
763- #define SW_DEPTH_IS_PACKED 1
764- #define SW_DEPTH_PACK_COMP 1
765- #define SW_DEPTH_MAX UINT16_MAX
766- #define SW_DEPTH_SCALE (1.0f/UINT16_MAX)
767- #define SW_PACK_DEPTH (d ) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
768- #define SW_UNPACK_DEPTH (p ) (p)
760+ #define SW_DEPTH_TYPE uint16_t
761+ #define SW_DEPTH_IS_PACKED 1
762+ #define SW_DEPTH_PACK_COMP 1
763+ #define SW_DEPTH_MAX UINT16_MAX
764+ #define SW_DEPTH_SCALE (1.0f/UINT16_MAX)
765+ #define SW_PACK_DEPTH (d ) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
766+ #define SW_UNPACK_DEPTH (p ) (p)
769767#else // 24 bits
770- #define SW_DEPTH_TYPE uint8_t
771- #define SW_DEPTH_IS_PACKED 0
772- #define SW_DEPTH_PACK_COMP 3
773- #define SW_DEPTH_MAX 0xFFFFFF
774- #define SW_DEPTH_SCALE (1.0f/0xFFFFFF)
775- #define SW_PACK_DEPTH_0 (d ) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF)
776- #define SW_PACK_DEPTH_1 (d ) (((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFF)
777- #define SW_PACK_DEPTH_2 (d ) ((uint32_t)((d)*SW_DEPTH_MAX)&0xFF)
778- #define SW_UNPACK_DEPTH (p ) (((p)[0]<<16)|((p)[1]<<8)|(p)[2])
768+ #define SW_DEPTH_TYPE uint8_t
769+ #define SW_DEPTH_IS_PACKED 0
770+ #define SW_DEPTH_PACK_COMP 3
771+ #define SW_DEPTH_MAX 0xFFFFFF
772+ #define SW_DEPTH_SCALE (1.0f/0xFFFFFF)
773+ #define SW_PACK_DEPTH_0 (d ) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF)
774+ #define SW_PACK_DEPTH_1 (d ) (((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFF)
775+ #define SW_PACK_DEPTH_2 (d ) ((uint32_t)((d)*SW_DEPTH_MAX)&0xFF)
776+ #define SW_UNPACK_DEPTH (p ) (((p)[0]<<16)|((p)[1]<<8)|(p)[2])
779777#endif
780778
781- #define SW_STATE_CHECK (flags ) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags)))
779+ #define SW_STATE_CHECK (flags ) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags)))
782780#define SW_STATE_CHECK_EX (state , flags ) (((state) & (flags)) == (flags))
783781
784782#define SW_STATE_SCISSOR_TEST (1 << 0)
@@ -3607,6 +3605,23 @@ bool swInit(int w, int h)
36073605 RLSW .loadedTextures [0 ].ty = 0.5f ;
36083606
36093607 RLSW .loadedTextureCount = 1 ;
3608+
3609+ SW_LOG ("INFO: RLSW: Software renderer initialized successfully\n" );
3610+ #if defined(SW_HAS_FMA_AVX ) && defined(SW_HAS_FMA_AVX2 )
3611+ SW_LOG ("INFO: RLSW: Using SIMD instructions: FMA AVX\n" );
3612+ #endif
3613+ #if defined(SW_HAS_AVX ) || defined(SW_HAS_AVX2 )
3614+ SW_LOG ("INFO: RLSW: Using SIMD instructions: AVX\n" );
3615+ #endif
3616+ #if defined(SW_HAS_SSE ) || defined(SW_HAS_SSE2 ) || defined(SW_HAS_SSE3 ) || defined(SW_HAS_SSE41 ) || defined(SW_HAS_SSE42 )
3617+ SW_LOG ("INFO: RLSW: Using SIMD instructions: SSE\n" );
3618+ #endif
3619+ #if defined(SW_HAS_NEON_FMA ) || defined(SW_HAS_NEON )
3620+ SW_LOG ("INFO: RLSW: Using SIMD instructions: NEON\n" );
3621+ #endif
3622+ #if defined(SW_HAS_RVV )
3623+ SW_LOG ("INFO: RLSW: Using SIMD instructions: RVV\n" );
3624+ #endif
36103625
36113626 return true;
36123627}
@@ -3626,7 +3641,7 @@ void swClose(void)
36263641 SW_FREE (RLSW .loadedTextures );
36273642 SW_FREE (RLSW .freeTextureIds );
36283643
3629- RLSW = CURLY_INIT (sw_context_t ) { 0 };
3644+ RLSW = SW_CURLY_INIT (sw_context_t ) { 0 };
36303645}
36313646
36323647bool swResizeFramebuffer (int w , int h )
0 commit comments