Skip to content

Commit e422ffc

Browse files
committed
Fast math optimization
Implement Quake III fast inverse sqrt and Taylor series sin/cos approximations for non-critical paths. Also, set jack texture to nearest-neighbor to prevent any edge artifacts (white pixels at cube corners). Signed-off-by: Joachim Wiberg <[email protected]>
1 parent 98b6771 commit e422ffc

File tree

1 file changed

+51
-7
lines changed

1 file changed

+51
-7
lines changed

demo.c

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,47 @@ static int HEIGHT = 600;
3535
#define NUM_STARS 200
3636
#define MAX_LOGO_PARTICLES 8192
3737

38+
/* Fast math approximations for better performance */
39+
40+
/* Fast inverse square root (Quake III style) - for normalized vectors */
41+
static inline float fast_inv_sqrt(float x)
42+
{
43+
float xhalf = 0.5f * x;
44+
union {
45+
float f;
46+
int i;
47+
} u;
48+
u.f = x;
49+
u.i = 0x5f3759df - (u.i >> 1);
50+
u.f = u.f * (1.5f - xhalf * u.f * u.f); /* One Newton iteration */
51+
return u.f;
52+
}
53+
54+
/* Fast sqrt using inverse sqrt */
55+
static inline float fast_sqrt(float x)
56+
{
57+
if (x <= 0.0f) return 0.0f;
58+
return x * fast_inv_sqrt(x);
59+
}
60+
61+
/* Fast sine approximation using Taylor series (good for -PI to PI) */
62+
static inline float fast_sin(float x)
63+
{
64+
/* Wrap to -PI..PI range */
65+
while (x > PI) x -= 2.0f * PI;
66+
while (x < -PI) x += 2.0f * PI;
67+
68+
/* Taylor series: sin(x) ≈ x - x³/6 + x⁵/120 */
69+
float x2 = x * x;
70+
return x * (1.0f - x2 * (1.0f/6.0f - x2/120.0f));
71+
}
72+
73+
/* Fast cosine using sin(x + PI/2) */
74+
static inline float fast_cos(float x)
75+
{
76+
return fast_sin(x + PI/2.0f);
77+
}
78+
3879
typedef enum {
3980
SCROLL_NONE,
4081
SCROLL_SINE_WAVE,
@@ -692,7 +733,7 @@ void render_cube(DemoContext *ctx)
692733
float y = vertices[i][1];
693734
float z = vertices[i][2];
694735

695-
/* Rotate around X */
736+
/* Rotate around X (need accuracy for texture mapping) */
696737
float y1 = y * cos(angle_x) - z * sin(angle_x);
697738
float z1 = y * sin(angle_x) + z * cos(angle_x);
698739
y = y1;
@@ -780,11 +821,12 @@ void render_cube(DemoContext *ctx)
780821
verts[v].color.a = 255;
781822
}
782823

783-
/* UV coordinates - map texture to quad */
784-
verts[0].tex_coord.x = 0.0f; verts[0].tex_coord.y = 0.0f;
785-
verts[1].tex_coord.x = 1.0f; verts[1].tex_coord.y = 0.0f;
786-
verts[2].tex_coord.x = 1.0f; verts[2].tex_coord.y = 1.0f;
787-
verts[3].tex_coord.x = 0.0f; verts[3].tex_coord.y = 1.0f;
824+
/* UV coordinates - map texture to quad with tiny inset to avoid edge artifacts */
825+
float inset = 0.001f;
826+
verts[0].tex_coord.x = inset; verts[0].tex_coord.y = inset;
827+
verts[1].tex_coord.x = 1.0f - inset; verts[1].tex_coord.y = inset;
828+
verts[2].tex_coord.x = 1.0f - inset; verts[2].tex_coord.y = 1.0f - inset;
829+
verts[3].tex_coord.x = inset; verts[3].tex_coord.y = 1.0f - inset;
788830

789831
/* Render two triangles to form the quad */
790832
int indices[6] = {0, 1, 2, 0, 2, 3};
@@ -812,7 +854,7 @@ void render_tunnel(DemoContext *ctx)
812854
float dx = x - eye_x;
813855
float dy = y - eye_y;
814856

815-
float distance = sqrtf(dx * dx + dy * dy);
857+
float distance = fast_sqrt(dx * dx + dy * dy);
816858
if (distance < 1.0f) distance = 1.0f; /* Avoid division by zero */
817859

818860
/* Use pre-calculated angle from LUT (reduces atan2 calls) */
@@ -2640,6 +2682,8 @@ int main(int argc, char *argv[])
26402682
ctx.jack_texture = SDL_CreateTextureFromSurface(ctx.renderer, ctx.jack_surface);
26412683
SDL_SetTextureBlendMode(ctx.jack_texture, SDL_BLENDMODE_NONE);
26422684
SDL_SetTextureAlphaMod(ctx.jack_texture, 255);
2685+
/* Use nearest neighbor to prevent edge artifacts from linear filtering */
2686+
SDL_SetTextureScaleMode(ctx.jack_texture, SDL_ScaleModeNearest);
26432687
}
26442688
}
26452689

0 commit comments

Comments
 (0)