@@ -531,7 +531,6 @@ SWAPI void swClose(void);
531531SWAPI bool swResizeFramebuffer (int w , int h );
532532SWAPI void swCopyFramebuffer (int x , int y , int w , int h , SWformat format , SWtype type , void * pixels );
533533SWAPI void swBlitFramebuffer (int xDst , int yDst , int wDst , int hDst , int xSrc , int ySrc , int wSrc , int hSrc , SWformat format , SWtype type , void * pixels );
534- SWAPI void * swGetColorBuffer (int * w , int * h );
535534
536535SWAPI void swEnable (SWstate state );
537536SWAPI void swDisable (SWstate state );
@@ -616,12 +615,23 @@ SWAPI void swBindTexture(uint32_t id);
616615#include <math.h> // Required for: floorf(), fabsf()
617616
618617#if defined(_MSC_VER )
619- #define ALIGNAS (x ) __declspec(align(x))
618+ #define SW_ALIGN (x ) __declspec(align(x))
620619#elif defined(__GNUC__ ) || defined(__clang__ )
621- #define ALIGNAS (x ) __attribute__((aligned(x)))
620+ #define SW_ALIGN (x ) __attribute__((aligned(x)))
622621#else
623- #include <stdalign.h>
624- #define ALIGNAS (x ) alignas(x)
622+ #define SW_ALIGN (x ) // Do nothing if not available
623+ #endif
624+
625+ #if defined(_M_X64 ) || defined(__x86_64__ )
626+ #define SW_ARCH_X86_64
627+ #elif defined(_M_IX86 ) || defined(__i386__ )
628+ #define SW_ARCH_X86
629+ #elif defined(_M_ARM ) || defined(__arm__ )
630+ #define SW_ARCH_ARM32
631+ #elif defined(_M_ARM64 ) || defined(__aarch64__ )
632+ #define SW_ARCH_ARM64
633+ #elif defined(__riscv )
634+ #define SW_ARCH_RISCV
625635#endif
626636
627637#if defined(__FMA__ ) && defined(__AVX2__ )
@@ -696,8 +706,15 @@ SWAPI void swBindTexture(uint32_t id);
696706#define SW_DEG2RAD (SW_PI/180.0f)
697707#define SW_RAD2DEG (180.0f/SW_PI)
698708
699- #define SW_COLOR_PIXEL_SIZE 4 // (SW_COLOR_BUFFER_BITS >> 3)
709+ #define SW_COLOR_PIXEL_SIZE (SW_COLOR_BUFFER_BITS >> 3)
700710#define SW_DEPTH_PIXEL_SIZE (SW_DEPTH_BUFFER_BITS >> 3)
711+ #define SW_PIXEL_SIZE (SW_COLOR_PIXEL_SIZE + SW_DEPTH_PIXEL_SIZE)
712+
713+ #if (SW_PIXEL_SIZE <= 4 )
714+ #define SW_PIXEL_ALIGNMENT 4
715+ #else // if (SW_PIXEL_SIZE <= 8)
716+ #define SW_PIXEL_ALIGNMENT 8
717+ #endif
701718
702719#if (SW_COLOR_BUFFER_BITS == 8 )
703720 #define SW_COLOR_TYPE uint8_t
@@ -827,10 +844,12 @@ typedef struct {
827844} sw_texture_t ;
828845
829846// Pixel data type
830- // WARNING: ALIGNAS() macro requires a constant value (not operand)
831- typedef ALIGNAS (SW_COLOR_PIXEL_SIZE ) struct {
847+ typedef SW_ALIGN (SW_PIXEL_ALIGNMENT ) struct {
832848 SW_COLOR_TYPE color [SW_COLOR_PACK_COMP ];
833849 SW_DEPTH_TYPE depth [SW_DEPTH_PACK_COMP ];
850+ #if (SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT != 0 )
851+ uint8_t padding [SW_PIXEL_ALIGNMENT - SW_PIXEL_SIZE % SW_PIXEL_ALIGNMENT ];
852+ #endif
834853} sw_pixel_t ;
835854
836855typedef struct {
@@ -2624,25 +2643,38 @@ static inline void sw_quad_clip_and_project(void)
26242643
26252644static inline bool sw_quad_is_axis_aligned (void )
26262645{
2627- int horizontal = 0 ;
2628- int vertical = 0 ;
2629-
2646+ // Reject quads with perspective projection
2647+ // The fast path assumes affine (non-perspective) quads,
2648+ // so we require all vertices to have homogeneous w = 1.0
26302649 for (int i = 0 ; i < 4 ; i ++ )
26312650 {
26322651 if (RLSW .vertexBuffer [i ].homogeneous [3 ] != 1.0f ) return false;
2633-
2634- const float * v0 = RLSW .vertexBuffer [i ].position ;
2635- const float * v1 = RLSW .vertexBuffer [(i + 1 )%4 ].position ;
2636-
2637- float dx = v1 [0 ] - v0 [0 ];
2638- float dy = v1 [1 ] - v0 [1 ];
2639-
2640- if ((fabsf (dx ) > 1e-6f ) && (fabsf (dy ) < 1e-6f )) horizontal ++ ;
2641- else if ((fabsf (dy ) > 1e-6f ) && (fabsf (dx ) < 1e-6f )) vertical ++ ;
2642- else return false; // Diagonal edge -> not axis-aligned
26432652 }
26442653
2645- return ((horizontal == 2 ) && (vertical == 2 ));
2654+ // Epsilon tolerance in screen space (pixels)
2655+ const float epsilon = 0.5f ;
2656+
2657+ // Fetch screen-space positions for the four quad vertices
2658+ const float * p0 = RLSW .vertexBuffer [0 ].screen ;
2659+ const float * p1 = RLSW .vertexBuffer [1 ].screen ;
2660+ const float * p2 = RLSW .vertexBuffer [2 ].screen ;
2661+ const float * p3 = RLSW .vertexBuffer [3 ].screen ;
2662+
2663+ // Compute edge vectors between consecutive vertices
2664+ // These define the four sides of the quad in screen space
2665+ float dx01 = p1 [0 ] - p0 [0 ], dy01 = p1 [1 ] - p0 [1 ];
2666+ float dx12 = p2 [0 ] - p1 [0 ], dy12 = p2 [1 ] - p1 [1 ];
2667+ float dx23 = p3 [0 ] - p2 [0 ], dy23 = p3 [1 ] - p2 [1 ];
2668+ float dx30 = p0 [0 ] - p3 [0 ], dy30 = p0 [1 ] - p3 [1 ];
2669+
2670+ // Each edge must be either horizontal or vertical within epsilon tolerance
2671+ // If any edge deviates significantly from either axis, the quad is not axis-aligned
2672+ if (!((fabsf (dy01 ) < epsilon ) || (fabsf (dx01 ) < epsilon ))) return false;
2673+ if (!((fabsf (dy12 ) < epsilon ) || (fabsf (dx12 ) < epsilon ))) return false;
2674+ if (!((fabsf (dy23 ) < epsilon ) || (fabsf (dx23 ) < epsilon ))) return false;
2675+ if (!((fabsf (dy30 ) < epsilon ) || (fabsf (dx30 ) < epsilon ))) return false;
2676+
2677+ return true;
26462678}
26472679
26482680static inline void sw_quad_sort_cw (const sw_vertex_t * * output )
@@ -3660,11 +3692,6 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySr
36603692{
36613693 sw_pixelformat_t pFormat = (sw_pixelformat_t )sw_get_pixel_format (format , type );
36623694
3663- if (xDst == xSrc && yDst == ySrc && wDst == wSrc && hDst == hSrc )
3664- {
3665- swCopyFramebuffer (xSrc , ySrc , wSrc , hSrc , format , type , pixels );
3666- }
3667-
36683695 if (wSrc <= 0 ) { RLSW .errCode = SW_INVALID_VALUE ; return ; }
36693696 if (hSrc <= 0 ) { RLSW .errCode = SW_INVALID_VALUE ; return ; }
36703697
@@ -3674,6 +3701,13 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySr
36743701 xSrc = sw_clampi (xSrc , 0 , wSrc );
36753702 ySrc = sw_clampi (ySrc , 0 , hSrc );
36763703
3704+ // Check if the sizes are identical after clamping the source to avoid unexpected issues
3705+ // REVIEW: This repeats the operations if true, so we could make a copy function without these checks
3706+ if (xDst == xSrc && yDst == ySrc && wDst == wSrc && hDst == hSrc )
3707+ {
3708+ swCopyFramebuffer (xSrc , ySrc , wSrc , hSrc , format , type , pixels );
3709+ }
3710+
36773711 switch (pFormat )
36783712 {
36793713 case SW_PIXELFORMAT_UNCOMPRESSED_GRAYSCALE : sw_framebuffer_blit_to_GRAYALPHA (xDst , yDst , wDst , hDst , xSrc , ySrc , wSrc , hSrc , (uint8_t * )pixels ); break ;
@@ -3696,14 +3730,6 @@ void swBlitFramebuffer(int xDst, int yDst, int wDst, int hDst, int xSrc, int ySr
36963730 }
36973731}
36983732
3699- void * swGetColorBuffer (int * w , int * h )
3700- {
3701- if (w ) * w = RLSW .framebuffer .width ;
3702- if (h ) * h = RLSW .framebuffer .height ;
3703-
3704- return (void * )RLSW .framebuffer .pixels -> color ;
3705- }
3706-
37073733void swEnable (SWstate state )
37083734{
37093735 switch (state )
0 commit comments