Skip to content

Commit f16d5ce

Browse files
committed
REVIEWED: Make sure SSE is being used when compiling with MSVC
Added log info and some formatting for visibility
1 parent cbff0fa commit f16d5ce

File tree

1 file changed

+91
-76
lines changed

1 file changed

+91
-76
lines changed

src/external/rlsw.h

Lines changed: 91 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,9 @@
9494
#ifndef SW_MALLOC
9595
#define SW_MALLOC(sz) malloc(sz)
9696
#endif
97-
9897
#ifndef SW_REALLOC
9998
#define SW_REALLOC(ptr, newSz) realloc(ptr, newSz)
10099
#endif
101-
102100
#ifndef SW_FREE
103101
#define SW_FREE(ptr) free(ptr)
104102
#endif
@@ -152,12 +150,6 @@
152150
#define SW_CLIP_EPSILON 1e-4f
153151
#endif
154152

155-
#ifdef __cplusplus
156-
#define CURLY_INIT(name) name
157-
#else
158-
#define CURLY_INIT(name) (name)
159-
#endif
160-
161153
//----------------------------------------------------------------------------------
162154
// OpenGL Compatibility Types
163155
//----------------------------------------------------------------------------------
@@ -610,9 +602,19 @@ SWAPI void swBindTexture(uint32_t id);
610602
#define RLSW_IMPLEMENTATION
611603
#if defined(RLSW_IMPLEMENTATION)
612604

613-
#include <stdlib.h>
614-
#include <stddef.h>
615-
#include <math.h> // Required for: floorf(), fabsf()
605+
#include <stdlib.h> // Required for: malloc(), free()
606+
#include <stddef.h> // Required for: NULL, size_t, uint8_t, uint16_t, uint32_t...
607+
#include <math.h> // Required for: sinf(), cosf(), floorf(), fabsf(), sqrtf(), roundf()
608+
609+
// Simple log system to avoid printf() calls if required
610+
// NOTE: Avoiding those calls, also avoids const strings memory usage
611+
#define SW_SUPPORT_LOG_INFO
612+
#if defined(SW_SUPPORT_LOG_INFO) //&& defined(_DEBUG) // WARNING: LOG() output required for this tool
613+
#include <stdio.h>
614+
#define SW_LOG(...) printf(__VA_ARGS__)
615+
#else
616+
#define SW_LOG(...)
617+
#endif
616618

617619
#if defined(_MSC_VER)
618620
#define SW_ALIGN(x) __declspec(align(x))
@@ -634,56 +636,47 @@ SWAPI void swBindTexture(uint32_t id);
634636
#define SW_ARCH_RISCV
635637
#endif
636638

639+
// Check for SIMD vector instructions
637640
#if defined(__FMA__) && defined(__AVX2__)
638641
#define SW_HAS_FMA_AVX2
639642
#include <immintrin.h>
640643
#endif
641-
642644
#if defined(__FMA__) && defined(__AVX__)
643645
#define SW_HAS_FMA_AVX
644646
#include <immintrin.h>
645647
#endif
646-
647648
#if defined(__AVX2__)
648649
#define SW_HAS_AVX2
649650
#include <immintrin.h>
650651
#endif
651-
652652
#if defined(__AVX__)
653653
#define SW_HAS_AVX
654654
#include <immintrin.h>
655655
#endif
656-
657656
#if defined(__SSE4_2__)
658657
#define SW_HAS_SSE42
659658
#include <nmmintrin.h>
660659
#endif
661-
662660
#if defined(__SSE4_1__)
663661
#define SW_HAS_SSE41
664662
#include <smmintrin.h>
665663
#endif
666-
667664
#if defined(__SSSE3__)
668665
#define SW_HAS_SSSE3
669666
#include <tmmintrin.h>
670667
#endif
671-
672668
#if defined(__SSE3__)
673669
#define SW_HAS_SSE3
674670
#include <pmmintrin.h>
675671
#endif
676-
677-
#if defined(__SSE2__)
672+
#if defined(__SSE2__) || (defined(_M_AMD64) || defined(_M_X64)) // SSE2 x64
678673
#define SW_HAS_SSE2
679674
#include <emmintrin.h>
680675
#endif
681-
682676
#if defined(__SSE__)
683677
#define SW_HAS_SSE
684678
#include <xmmintrin.h>
685679
#endif
686-
687680
#if defined(__ARM_NEON) || defined(__aarch64__)
688681
#if defined(__ARM_FEATURE_FMA)
689682
#define SW_HAS_NEON_FMA
@@ -692,12 +685,17 @@ SWAPI void swBindTexture(uint32_t id);
692685
#endif
693686
#include <arm_neon.h>
694687
#endif
695-
696-
#ifdef __riscv_vector
688+
#if defined(__riscv_vector)
697689
#define SW_HAS_RVV
698690
#include <riscv_vector.h>
699691
#endif
700692

693+
#ifdef __cplusplus
694+
#define SW_CURLY_INIT(name) name
695+
#else
696+
#define SW_CURLY_INIT(name) (name)
697+
#endif
698+
701699
//----------------------------------------------------------------------------------
702700
// Defines and Macros
703701
//----------------------------------------------------------------------------------
@@ -717,68 +715,68 @@ SWAPI void swBindTexture(uint32_t id);
717715
#endif
718716

719717
#if (SW_COLOR_BUFFER_BITS == 8)
720-
#define SW_COLOR_TYPE uint8_t
721-
#define SW_COLOR_IS_PACKED 1
722-
#define SW_COLOR_PACK_COMP 1
718+
#define SW_COLOR_TYPE uint8_t
719+
#define SW_COLOR_IS_PACKED 1
720+
#define SW_COLOR_PACK_COMP 1
723721
#define SW_PACK_COLOR(r,g,b) ((((uint8_t)((r)*7+0.5f))&0x07)<<5 | (((uint8_t)((g)*7+0.5f))&0x07)<<2 | ((uint8_t)((b)*3+0.5f))&0x03)
724-
#define SW_UNPACK_R(p) (((p)>>5)&0x07)
725-
#define SW_UNPACK_G(p) (((p)>>2)&0x07)
726-
#define SW_UNPACK_B(p) ((p)&0x03)
727-
#define SW_SCALE_R(v) ((v)*255+3)/7
728-
#define SW_SCALE_G(v) ((v)*255+3)/7
729-
#define SW_SCALE_B(v) ((v)*255+1)/3
730-
#define SW_TO_FLOAT_R(v) ((v)*(1.0f/7.0f))
731-
#define SW_TO_FLOAT_G(v) ((v)*(1.0f/7.0f))
732-
#define SW_TO_FLOAT_B(v) ((v)*(1.0f/3.0f))
722+
#define SW_UNPACK_R(p) (((p)>>5)&0x07)
723+
#define SW_UNPACK_G(p) (((p)>>2)&0x07)
724+
#define SW_UNPACK_B(p) ((p)&0x03)
725+
#define SW_SCALE_R(v) ((v)*255+3)/7
726+
#define SW_SCALE_G(v) ((v)*255+3)/7
727+
#define SW_SCALE_B(v) ((v)*255+1)/3
728+
#define SW_TO_FLOAT_R(v) ((v)*(1.0f/7.0f))
729+
#define SW_TO_FLOAT_G(v) ((v)*(1.0f/7.0f))
730+
#define SW_TO_FLOAT_B(v) ((v)*(1.0f/3.0f))
733731
#elif (SW_COLOR_BUFFER_BITS == 16)
734-
#define SW_COLOR_TYPE uint16_t
735-
#define SW_COLOR_IS_PACKED 1
736-
#define SW_COLOR_PACK_COMP 1
732+
#define SW_COLOR_TYPE uint16_t
733+
#define SW_COLOR_IS_PACKED 1
734+
#define SW_COLOR_PACK_COMP 1
737735
#define SW_PACK_COLOR(r,g,b) ((((uint16_t)((r)*31+0.5f))&0x1F)<<11 | (((uint16_t)((g)*63+0.5f))&0x3F)<<5 | ((uint16_t)((b)*31+0.5f))&0x1F)
738-
#define SW_UNPACK_R(p) (((p)>>11)&0x1F)
739-
#define SW_UNPACK_G(p) (((p)>>5)&0x3F)
740-
#define SW_UNPACK_B(p) ((p)&0x1F)
741-
#define SW_SCALE_R(v) ((v)*255+15)/31
742-
#define SW_SCALE_G(v) ((v)*255+31)/63
743-
#define SW_SCALE_B(v) ((v)*255+15)/31
744-
#define SW_TO_FLOAT_R(v) ((v)*(1.0f/31.0f))
745-
#define SW_TO_FLOAT_G(v) ((v)*(1.0f/63.0f))
746-
#define SW_TO_FLOAT_B(v) ((v)*(1.0f/31.0f))
736+
#define SW_UNPACK_R(p) (((p)>>11)&0x1F)
737+
#define SW_UNPACK_G(p) (((p)>>5)&0x3F)
738+
#define SW_UNPACK_B(p) ((p)&0x1F)
739+
#define SW_SCALE_R(v) ((v)*255+15)/31
740+
#define SW_SCALE_G(v) ((v)*255+31)/63
741+
#define SW_SCALE_B(v) ((v)*255+15)/31
742+
#define SW_TO_FLOAT_R(v) ((v)*(1.0f/31.0f))
743+
#define SW_TO_FLOAT_G(v) ((v)*(1.0f/63.0f))
744+
#define SW_TO_FLOAT_B(v) ((v)*(1.0f/31.0f))
747745
#else // 32 bits
748-
#define SW_COLOR_TYPE uint8_t
749-
#define SW_COLOR_IS_PACKED 0
750-
#define SW_COLOR_PACK_COMP 4
746+
#define SW_COLOR_TYPE uint8_t
747+
#define SW_COLOR_IS_PACKED 0
748+
#define SW_COLOR_PACK_COMP 4
751749
#endif
752750

753751
#if (SW_DEPTH_BUFFER_BITS == 8)
754-
#define SW_DEPTH_TYPE uint8_t
755-
#define SW_DEPTH_IS_PACKED 1
756-
#define SW_DEPTH_PACK_COMP 1
757-
#define SW_DEPTH_MAX UINT8_MAX
758-
#define SW_DEPTH_SCALE (1.0f/UINT8_MAX)
759-
#define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
760-
#define SW_UNPACK_DEPTH(p) (p)
752+
#define SW_DEPTH_TYPE uint8_t
753+
#define SW_DEPTH_IS_PACKED 1
754+
#define SW_DEPTH_PACK_COMP 1
755+
#define SW_DEPTH_MAX UINT8_MAX
756+
#define SW_DEPTH_SCALE (1.0f/UINT8_MAX)
757+
#define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
758+
#define SW_UNPACK_DEPTH(p) (p)
761759
#elif (SW_DEPTH_BUFFER_BITS == 16)
762-
#define SW_DEPTH_TYPE uint16_t
763-
#define SW_DEPTH_IS_PACKED 1
764-
#define SW_DEPTH_PACK_COMP 1
765-
#define SW_DEPTH_MAX UINT16_MAX
766-
#define SW_DEPTH_SCALE (1.0f/UINT16_MAX)
767-
#define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
768-
#define SW_UNPACK_DEPTH(p) (p)
760+
#define SW_DEPTH_TYPE uint16_t
761+
#define SW_DEPTH_IS_PACKED 1
762+
#define SW_DEPTH_PACK_COMP 1
763+
#define SW_DEPTH_MAX UINT16_MAX
764+
#define SW_DEPTH_SCALE (1.0f/UINT16_MAX)
765+
#define SW_PACK_DEPTH(d) ((SW_DEPTH_TYPE)((d)*SW_DEPTH_MAX))
766+
#define SW_UNPACK_DEPTH(p) (p)
769767
#else // 24 bits
770-
#define SW_DEPTH_TYPE uint8_t
771-
#define SW_DEPTH_IS_PACKED 0
772-
#define SW_DEPTH_PACK_COMP 3
773-
#define SW_DEPTH_MAX 0xFFFFFF
774-
#define SW_DEPTH_SCALE (1.0f/0xFFFFFF)
775-
#define SW_PACK_DEPTH_0(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF)
776-
#define SW_PACK_DEPTH_1(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFF)
777-
#define SW_PACK_DEPTH_2(d) ((uint32_t)((d)*SW_DEPTH_MAX)&0xFF)
778-
#define SW_UNPACK_DEPTH(p) (((p)[0]<<16)|((p)[1]<<8)|(p)[2])
768+
#define SW_DEPTH_TYPE uint8_t
769+
#define SW_DEPTH_IS_PACKED 0
770+
#define SW_DEPTH_PACK_COMP 3
771+
#define SW_DEPTH_MAX 0xFFFFFF
772+
#define SW_DEPTH_SCALE (1.0f/0xFFFFFF)
773+
#define SW_PACK_DEPTH_0(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>16)&0xFF)
774+
#define SW_PACK_DEPTH_1(d) (((uint32_t)((d)*SW_DEPTH_MAX)>>8)&0xFF)
775+
#define SW_PACK_DEPTH_2(d) ((uint32_t)((d)*SW_DEPTH_MAX)&0xFF)
776+
#define SW_UNPACK_DEPTH(p) (((p)[0]<<16)|((p)[1]<<8)|(p)[2])
779777
#endif
780778

781-
#define SW_STATE_CHECK(flags) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags)))
779+
#define SW_STATE_CHECK(flags) (SW_STATE_CHECK_EX(RLSW.stateFlags, (flags)))
782780
#define SW_STATE_CHECK_EX(state, flags) (((state) & (flags)) == (flags))
783781

784782
#define SW_STATE_SCISSOR_TEST (1 << 0)
@@ -3607,6 +3605,23 @@ bool swInit(int w, int h)
36073605
RLSW.loadedTextures[0].ty = 0.5f;
36083606

36093607
RLSW.loadedTextureCount = 1;
3608+
3609+
SW_LOG("INFO: RLSW: Software renderer initialized successfully\n");
3610+
#if defined(SW_HAS_FMA_AVX) && defined(SW_HAS_FMA_AVX2)
3611+
SW_LOG("INFO: RLSW: Using SIMD instructions: FMA AVX\n");
3612+
#endif
3613+
#if defined(SW_HAS_AVX) || defined(SW_HAS_AVX2)
3614+
SW_LOG("INFO: RLSW: Using SIMD instructions: AVX\n");
3615+
#endif
3616+
#if defined(SW_HAS_SSE) || defined(SW_HAS_SSE2) || defined(SW_HAS_SSE3) || defined(SW_HAS_SSE41) || defined(SW_HAS_SSE42)
3617+
SW_LOG("INFO: RLSW: Using SIMD instructions: SSE\n");
3618+
#endif
3619+
#if defined(SW_HAS_NEON_FMA) || defined(SW_HAS_NEON)
3620+
SW_LOG("INFO: RLSW: Using SIMD instructions: NEON\n");
3621+
#endif
3622+
#if defined(SW_HAS_RVV)
3623+
SW_LOG("INFO: RLSW: Using SIMD instructions: RVV\n");
3624+
#endif
36103625

36113626
return true;
36123627
}
@@ -3626,7 +3641,7 @@ void swClose(void)
36263641
SW_FREE(RLSW.loadedTextures);
36273642
SW_FREE(RLSW.freeTextureIds);
36283643

3629-
RLSW = CURLY_INIT(sw_context_t) { 0 };
3644+
RLSW = SW_CURLY_INIT(sw_context_t) { 0 };
36303645
}
36313646

36323647
bool swResizeFramebuffer(int w, int h)

0 commit comments

Comments
 (0)