diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index fb5f3638bc4e4..8f076be1599db 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -29,6 +29,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS # CLC internal libraries clc/lib/generic/SOURCES; clc/lib/amdgcn/SOURCES; + clc/lib/amdgpu/SOURCES; clc/lib/clspv/SOURCES; clc/lib/spirv/SOURCES; ) diff --git a/libclc/amdgpu/lib/SOURCES b/libclc/amdgpu/lib/SOURCES index 24f099d049cd3..d7782a2ae14dc 100644 --- a/libclc/amdgpu/lib/SOURCES +++ b/libclc/amdgpu/lib/SOURCES @@ -10,4 +10,3 @@ math/half_log2.cl math/half_recip.cl math/half_rsqrt.cl math/half_sqrt.cl -math/sqrt.cl diff --git a/libclc/clc/include/clc/float/definitions.h b/libclc/clc/include/clc/float/definitions.h index 618d02ab1c090..82ae90155be1d 100644 --- a/libclc/clc/include/clc/float/definitions.h +++ b/libclc/clc/include/clc/float/definitions.h @@ -1,7 +1,6 @@ #define MAXFLOAT 0x1.fffffep127f #define HUGE_VALF __builtin_huge_valf() #define INFINITY __builtin_inff() -#define NAN __builtin_nanf("") #define FLT_DIG 6 #define FLT_MANT_DIG 24 @@ -13,6 +12,7 @@ #define FLT_MAX MAXFLOAT #define FLT_MIN 0x1.0p-126f #define FLT_EPSILON 0x1.0p-23f +#define FLT_NAN __builtin_nanf("") #define FP_ILOGB0 (-2147483647 - 1) #define FP_ILOGBNAN 2147483647 @@ -46,6 +46,7 @@ #define DBL_MAX 0x1.fffffffffffffp1023 #define DBL_MIN 0x1.0p-1022 #define DBL_EPSILON 0x1.0p-52 +#define DBL_NAN __builtin_nan("") #define M_E 0x1.5bf0a8b145769p+1 #define M_LOG2E 0x1.71547652b82fep+0 @@ -80,6 +81,7 @@ #define HALF_MAX 0x1.ffcp15h #define HALF_MIN 0x1.0p-14h #define HALF_EPSILON 0x1.0p-10h +#define HALF_NAN __builtin_nanf16("") #define M_LOG2E_H 0x1.714p+0h diff --git a/libclc/generic/include/math/clc_sqrt.h b/libclc/clc/include/clc/math/clc_sqrt.h similarity index 60% rename from libclc/generic/include/math/clc_sqrt.h rename to libclc/clc/include/clc/math/clc_sqrt.h index 90a7c575c9bad..c16edf196d9f6 100644 --- a/libclc/generic/include/math/clc_sqrt.h +++ b/libclc/clc/include/clc/math/clc_sqrt.h @@ -1,8 +1,12 @@ -#include -#include +#ifndef __CLC_MATH_CLC_SQRT_H__ +#define __CLC_MATH_CLC_SQRT_H__ -#define __CLC_FUNCTION __clc_sqrt #define __CLC_BODY +#define __CLC_FUNCTION __clc_sqrt + #include + #undef __CLC_BODY #undef __CLC_FUNCTION + +#endif // __CLC_MATH_CLC_SQRT_H__ diff --git a/libclc/clc/lib/amdgpu/SOURCES b/libclc/clc/lib/amdgpu/SOURCES new file mode 100644 index 0000000000000..fd64a862021e8 --- /dev/null +++ b/libclc/clc/lib/amdgpu/SOURCES @@ -0,0 +1 @@ +math/clc_sqrt_fp64.cl diff --git a/libclc/amdgpu/lib/math/sqrt.cl b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl similarity index 64% rename from libclc/amdgpu/lib/math/sqrt.cl rename to libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl index 17d77e50d44d3..c5614659eb5ce 100644 --- a/libclc/amdgpu/lib/math/sqrt.cl +++ b/libclc/clc/lib/amdgpu/math/clc_sqrt_fp64.cl @@ -20,52 +20,43 @@ * THE SOFTWARE. */ -#include "math/clc_sqrt.h" -#include #include - -_CLC_DEFINE_UNARY_BUILTIN(float, sqrt, __clc_sqrt, float) - -#ifdef cl_khr_fp16 - -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -_CLC_DEFINE_UNARY_BUILTIN(half, sqrt, __clc_sqrt, half) - -#endif +#include +#include +#include #ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable #ifdef __AMDGCN__ - #define __clc_builtin_rsq __builtin_amdgcn_rsq +#define __clc_builtin_rsq __builtin_amdgcn_rsq #else - #define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee +#define __clc_builtin_rsq __builtin_r600_recipsqrt_ieee #endif -_CLC_OVERLOAD _CLC_DEF double sqrt(double x) { - +_CLC_OVERLOAD _CLC_DEF double __clc_sqrt(double x) { uint vcc = x < 0x1p-767; uint exp0 = vcc ? 0x100 : 0; unsigned exp1 = vcc ? 0xffffff80 : 0; - double v01 = ldexp(x, exp0); + double v01 = __clc_ldexp(x, exp0); double v23 = __clc_builtin_rsq(v01); double v45 = v01 * v23; v23 = v23 * 0.5; - double v67 = fma(-v23, v45, 0.5); - v45 = fma(v45, v67, v45); - double v89 = fma(-v45, v45, v01); - v23 = fma(v23, v67, v23); - v45 = fma(v89, v23, v45); - v67 = fma(-v45, v45, v01); - v23 = fma(v67, v23, v45); + double v67 = __clc_fma(-v23, v45, 0.5); + v45 = __clc_fma(v45, v67, v45); + double v89 = __clc_fma(-v45, v45, v01); + v23 = __clc_fma(v23, v67, v23); + v45 = __clc_fma(v89, v23, v45); + v67 = __clc_fma(-v45, v45, v01); + v23 = __clc_fma(v67, v23, v45); - v23 = ldexp(v23, exp1); - return ((x == __builtin_inf()) || (x == 0.0)) ? v01 : v23; + v23 = __clc_ldexp(v23, exp1); + return (x == __builtin_inf() || (x == 0.0)) ? v01 : v23; } -_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sqrt, double); +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_sqrt, double); #endif diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES index 1d16cd5e2d18a..97c504d21aa88 100644 --- a/libclc/clc/lib/generic/SOURCES +++ b/libclc/clc/lib/generic/SOURCES @@ -33,6 +33,7 @@ math/clc_nan.cl math/clc_nextafter.cl math/clc_rint.cl math/clc_round.cl +math/clc_sqrt.cl math/clc_sw_fma.cl math/clc_trunc.cl relational/clc_all.cl diff --git a/libclc/generic/lib/math/clc_sqrt.cl b/libclc/clc/lib/generic/math/clc_sqrt.cl similarity index 80% rename from libclc/generic/lib/math/clc_sqrt.cl rename to libclc/clc/lib/generic/math/clc_sqrt.cl index 92c7f6e73b11e..620c367dd8510 100644 --- a/libclc/generic/lib/math/clc_sqrt.cl +++ b/libclc/clc/lib/generic/math/clc_sqrt.cl @@ -20,14 +20,8 @@ * THE SOFTWARE. */ -#include +#include +#include -// Map the llvm sqrt intrinsic to an OpenCL function. -#define __CLC_FUNCTION __clc_llvm_intr_sqrt -#define __CLC_INTRINSIC "llvm.sqrt" -#include -#undef __CLC_FUNCTION -#undef __CLC_INTRINSIC - -#define __CLC_BODY +#define __CLC_BODY #include diff --git a/libclc/generic/lib/math/clc_sqrt_impl.inc b/libclc/clc/lib/generic/math/clc_sqrt.inc similarity index 75% rename from libclc/generic/lib/math/clc_sqrt_impl.inc rename to libclc/clc/lib/generic/math/clc_sqrt.inc index fe724e8c14394..6c1c1bb7960e5 100644 --- a/libclc/generic/lib/math/clc_sqrt_impl.inc +++ b/libclc/clc/lib/generic/math/clc_sqrt.inc @@ -20,20 +20,7 @@ * THE SOFTWARE. */ -#if __CLC_FPSIZE == 64 -#define __CLC_NAN __builtin_nan("") -#define ZERO 0.0 -#elif __CLC_FPSIZE == 32 -#define __CLC_NAN NAN -#define ZERO 0.0f -#elif __CLC_FPSIZE == 16 -#define __CLC_NAN (half)NAN -#define ZERO 0.0h -#endif - -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sqrt(__CLC_GENTYPE val) { - return val < ZERO ? __CLC_NAN : __clc_llvm_intr_sqrt(val); +__attribute__((weak)) _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE +__clc_sqrt(__CLC_GENTYPE val) { + return __builtin_elementwise_sqrt(val); } - -#undef __CLC_NAN -#undef ZERO diff --git a/libclc/generic/lib/SOURCES b/libclc/generic/lib/SOURCES index eb14fd84c96b3..5b1c85d6ef75b 100644 --- a/libclc/generic/lib/SOURCES +++ b/libclc/generic/lib/SOURCES @@ -179,7 +179,6 @@ math/sincos.cl math/sincos_helpers.cl math/sinh.cl math/sinpi.cl -math/clc_sqrt.cl math/sqrt.cl math/clc_tan.cl math/tan.cl diff --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl index 5e6a99b70f22a..fdf1e7ffa1def 100644 --- a/libclc/generic/lib/math/clc_hypot.cl +++ b/libclc/generic/lib/math/clc_hypot.cl @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -49,7 +50,7 @@ _CLC_DEF _CLC_OVERLOAD float __clc_hypot(float x, float y) { float fi_exp = as_float((-xexp + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); float fx = as_float(ux) * fi_exp; float fy = as_float(uy) * fi_exp; - retval = sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp; + retval = __clc_sqrt(__clc_mad(fx, fx, fy * fy)) * fx_exp; retval = ux > PINFBITPATT_SP32 | uy == 0 ? as_float(ux) : retval; retval = ux == PINFBITPATT_SP32 | uy == PINFBITPATT_SP32 @@ -81,7 +82,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) { double ay = y * preadjust; // The post adjust may overflow, but this can't be avoided in any case - double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust; + double r = __clc_sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust; // If the difference in exponents between x and y is large double s = x + y; diff --git a/libclc/generic/lib/math/sqrt.cl b/libclc/generic/lib/math/sqrt.cl index a9192a9493d17..d60d304fa1e1f 100644 --- a/libclc/generic/lib/math/sqrt.cl +++ b/libclc/generic/lib/math/sqrt.cl @@ -21,7 +21,9 @@ */ #include -#include "math/clc_sqrt.h" +#include -#define __CLC_FUNCTION sqrt -#include +#define FUNCTION sqrt +#define __CLC_BODY + +#include