Skip to content

Commit 8a6cb57

Browse files
grey-eminenceigcbot
authored andcommitted
IMF LA open-sourcing. Switch back to previous FP32 atan implementation.
1 parent aa2e32e commit 8a6cb57

File tree

1 file changed

+59
-2
lines changed

1 file changed

+59
-2
lines changed

IGC/BiFModule/Implementation/IBiF_Intrinsics_Impl.cl

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2727

2828
#include "spirv.h"
2929
#include "IMF/FP32/asin_s_la.cl"
30-
#include "IMF/FP32/atan_s_la.cl"
3130

3231
INLINE float __builtin_spirv_OpenCL_fclamp_f32_f32_f32(float x, float minval, float maxval ){
3332
return __builtin_spirv_OpenCL_fmin_f32_f32(__builtin_spirv_OpenCL_fmax_f32_f32(x, minval), maxval);
@@ -405,7 +404,65 @@ float __builtin_spirv_OpenCL_asin_f32(float value ){
405404

406405
INLINE
407406
float __builtin_spirv_OpenCL_atan_f32(float value ){
408-
return __ocl_svml_atanf(value);
407+
// The LA atan implementation (IMF/FP32/atan_s_la.cl)
408+
// seems to be slower on Mandelbulb algorithm..
409+
float temp1 = 0.0f;
410+
float temp2 = 0.0f;
411+
float temp3 = 0.0f;
412+
float temp4 = 0.0f;
413+
414+
float destTemp = 0.0f;
415+
416+
bool flag = __builtin_spirv_OpenCL_fabs_f32(value) > 1.0f;
417+
418+
temp1 = __builtin_spirv_OpenCL_fabs_f32(value);
419+
420+
if(flag)
421+
{
422+
temp1 = __builtin_spirv_OpenCL_native_recip_f32(temp1);
423+
}
424+
425+
temp2 = temp1 * temp1;
426+
427+
destTemp = temp2 * -0.8233629465103149f;
428+
429+
temp4 = temp2 + 11.33538818359375f;
430+
431+
destTemp = destTemp + -5.674867153167725f;
432+
433+
temp4 = temp4 * temp2;
434+
435+
destTemp = temp2 * destTemp;
436+
437+
temp4 = temp4 + 28.84246826171875f;
438+
439+
destTemp = destTemp + -6.565555095672607f;
440+
441+
temp4 = temp4 * temp2;
442+
443+
destTemp = temp2 * destTemp;
444+
445+
temp4 = temp4 + 19.696670532226562f;
446+
447+
destTemp = temp1 * destTemp;
448+
449+
temp4 = __builtin_spirv_OpenCL_native_recip_f32(temp4);
450+
451+
destTemp = temp4 * destTemp;
452+
453+
destTemp = destTemp + temp1;
454+
455+
if(flag)
456+
{
457+
destTemp = -destTemp + 1.5707963705062866f;
458+
}
459+
460+
if(value < 0.0f)
461+
{
462+
destTemp = -__builtin_spirv_OpenCL_fabs_f32(destTemp);
463+
}
464+
465+
return destTemp;
409466
}
410467

411468
INLINE

0 commit comments

Comments
 (0)