Skip to content

Commit 6fede0b

Browse files
committed
Optimize SkyMaterials by removing uses of acos and simplifying logic
The results looks almost the same and run much faster.
1 parent 09ea7bc commit 6fede0b

File tree

4 files changed

+159
-73
lines changed

4 files changed

+159
-73
lines changed

drivers/gles3/shaders/sky.glsl

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,32 @@ vec4 fog_process(vec3 view, vec3 sky_color) {
151151
}
152152
#endif // !DISABLE_FOG
153153

154+
// Eberly approximations from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/.
155+
// input [-1, 1] and output [0, PI]
156+
float acos_approx(float p_x) {
157+
float x = abs(p_x);
158+
float res = -0.156583f * x + (M_PI / 2.0);
159+
res *= sqrt(1.0f - x);
160+
return (p_x >= 0) ? res : M_PI - res;
161+
}
162+
163+
// Based on https://math.stackexchange.com/questions/1098487/atan2-faster-approximation
164+
// but using the Eberly coefficients from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/.
165+
float atan2_approx(float y, float x) {
166+
float a = min(abs(x), abs(y)) / max(abs(x), abs(y));
167+
float s = a * a;
168+
float poly = 0.0872929f;
169+
poly = -0.301895f + poly * s;
170+
poly = 1.0f + poly * s;
171+
poly = poly * a;
172+
173+
float r = abs(y) > abs(x) ? (M_PI / 2.0) - poly : poly;
174+
r = x < 0.0 ? M_PI - r : r;
175+
r = y < 0.0 ? -r : r;
176+
177+
return r;
178+
}
179+
154180
void main() {
155181
vec3 cube_normal;
156182
#ifdef USE_MULTIVIEW
@@ -171,7 +197,7 @@ void main() {
171197

172198
vec2 uv = gl_FragCoord.xy; // uv_interp * 0.5 + 0.5;
173199

174-
vec2 panorama_coords = vec2(atan(cube_normal.x, -cube_normal.z), acos(cube_normal.y));
200+
vec2 panorama_coords = vec2(atan2_approx(cube_normal.x, -cube_normal.z), acos_approx(cube_normal.y));
175201

176202
if (panorama_coords.x < 0.0) {
177203
panorama_coords.x += M_PI * 2.0;

0 commit comments

Comments
 (0)