@@ -27,7 +27,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2727
2828#include "spirv.h"
2929#include "IMF/FP32/asin_s_la.cl"
30- #include "IMF/FP32/atan_s_la.cl"
3130
3231INLINE float __builtin_spirv_OpenCL_fclamp_f32_f32_f32 (float x , float minval , float maxval ){
3332 return __builtin_spirv_OpenCL_fmin_f32_f32 (__builtin_spirv_OpenCL_fmax_f32_f32 (x , minval ), maxval );
@@ -405,7 +404,65 @@ float __builtin_spirv_OpenCL_asin_f32(float value ){
405404
406405INLINE
407406float __builtin_spirv_OpenCL_atan_f32 (float value ){
408- return __ocl_svml_atanf (value );
407+ // The LA atan implementation (IMF/FP32/atan_s_la.cl)
408+ // seems to be slower on Mandelbulb algorithm..
409+ float temp1 = 0.0f ;
410+ float temp2 = 0.0f ;
411+ float temp3 = 0.0f ;
412+ float temp4 = 0.0f ;
413+
414+ float destTemp = 0.0f ;
415+
416+ bool flag = __builtin_spirv_OpenCL_fabs_f32 (value ) > 1.0f ;
417+
418+ temp1 = __builtin_spirv_OpenCL_fabs_f32 (value );
419+
420+ if (flag )
421+ {
422+ temp1 = __builtin_spirv_OpenCL_native_recip_f32 (temp1 );
423+ }
424+
425+ temp2 = temp1 * temp1 ;
426+
427+ destTemp = temp2 * -0.8233629465103149f ;
428+
429+ temp4 = temp2 + 11.33538818359375f ;
430+
431+ destTemp = destTemp + -5.674867153167725f ;
432+
433+ temp4 = temp4 * temp2 ;
434+
435+ destTemp = temp2 * destTemp ;
436+
437+ temp4 = temp4 + 28.84246826171875f ;
438+
439+ destTemp = destTemp + -6.565555095672607f ;
440+
441+ temp4 = temp4 * temp2 ;
442+
443+ destTemp = temp2 * destTemp ;
444+
445+ temp4 = temp4 + 19.696670532226562f ;
446+
447+ destTemp = temp1 * destTemp ;
448+
449+ temp4 = __builtin_spirv_OpenCL_native_recip_f32 (temp4 );
450+
451+ destTemp = temp4 * destTemp ;
452+
453+ destTemp = destTemp + temp1 ;
454+
455+ if (flag )
456+ {
457+ destTemp = - destTemp + 1.5707963705062866f ;
458+ }
459+
460+ if (value < 0.0f )
461+ {
462+ destTemp = - __builtin_spirv_OpenCL_fabs_f32 (destTemp );
463+ }
464+
465+ return destTemp ;
409466}
410467
411468INLINE
0 commit comments