Skip to content

Commit 45ecca8

Browse files
authored
device-libs: Stop using CORRECTLY_ROUNDED_SQRT32 (llvm#1549)
2 parents 02adca3 + 878c760 commit 45ecca8

File tree

10 files changed

+16
-57
lines changed

10 files changed

+16
-57
lines changed

amd/comgr/test-lit/device-lib-linking.cl

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717

1818
extern const __constant bool __oclc_finite_only_opt;
1919
extern const __constant bool __oclc_unsafe_math_opt;
20-
extern const __constant bool __oclc_correctly_rounded_sqrt32;
2120
extern const __constant bool __oclc_wavefrontsize64;
2221
extern const __constant int __oclc_ISA_version;
2322
extern const __constant int __oclc_ABI_version;
@@ -26,10 +25,9 @@ void kernel device_libs(__global float *status, float x, float y, float z) {
2625

2726
if (__oclc_finite_only_opt) status[0] = 1.0;
2827
if (__oclc_unsafe_math_opt) status[1] = 1.0;
29-
if (__oclc_correctly_rounded_sqrt32) status[2] = 1.0;
30-
if (__oclc_wavefrontsize64) status[3] = 1.0;
31-
if (__oclc_ISA_version) status[4] = 1.0;
32-
if (__oclc_ABI_version) status[5] = 1.0;
28+
if (__oclc_wavefrontsize64) status[2] = 1.0;
29+
if (__oclc_ISA_version) status[3] = 1.0;
30+
if (__oclc_ABI_version) status[4] = 1.0;
3331

3432
// Math functions to test AMDGPULibCalls Folding optimizations
3533
// fold_sincos()

amd/comgr/test/source/device_libs.cl

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
extern const __constant bool __oclc_finite_only_opt;
22
extern const __constant bool __oclc_unsafe_math_opt;
3-
extern const __constant bool __oclc_correctly_rounded_sqrt32;
43
extern const __constant bool __oclc_wavefrontsize64;
54
extern const __constant int __oclc_ISA_version;
65
extern const __constant int __oclc_ABI_version;
@@ -11,8 +10,6 @@ void kernel device_libs(__global float *status) {
1110
status[0] = 1.0;
1211
if (__oclc_unsafe_math_opt)
1312
status[1] = 1.0;
14-
if (__oclc_correctly_rounded_sqrt32)
15-
status[3] = 1.0;
1613
if (__oclc_wavefrontsize64)
1714
status[4] = 1.0;
1815
if (__oclc_ISA_version)

amd/device-libs/doc/OCKL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ taken with no control flow overhead. These functions all have the form (in C)
4040
The currently supported control are
4141
* `finite_only_opt` - floating point Inf and NaN are never expected to be consumed or produced
4242
* `unsafe_math_opt` - lower accuracy results may be produced with higher performance
43-
* `correctly_rounded_sqrt32` - float square root must be correctly rounded
4443
* `ISA_version` - an integer representation of the ISA version of the target device
4544
* `daz_opt` - unused and deprecated. Will be removed in the future.
45+
* `correctly_rounded_sqrt32` - unused and deprecated. Will be removed in the future.
4646

4747
### Versioning
4848

amd/device-libs/doc/OCML.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,11 @@ The currently supported control `<name>`s and values `N` are
4343
* `finite_only_opt` - floating point Inf and NaN are never expected to be consumed or produced. `N` may be 1 (on/true/enabled), or 0 (off/false/disabled).
4444
* `unsafe_math_opt` - lower accuracy results may be produced with higher performance. `N` may be 1 (on/true/enabled) or 0 (off/false/disabled).
4545
* `daz_opt` - subnormal values consumed and produced may be flushed to zero. `N`may be 1 (on/true/enabled) or 0 (off/false/disabled).
46-
* `correctly_rounded_sqrt32` - float square root must be correctly rounded. `N` may be 1 (on/true/enabled) or 0 (off/false/disabled).
4746
* `wavefrontsize64` - the wave front size is 64. `N` may be 1 (on/true/enabled) or 0 (off/false/disabled). Very few current devices support a value of 0.
4847
* `ISA_version` - an integer representation of the ISA version of the target device
4948

5049
The language runtime can link a specific set of OCLC control libraries to properly configure OCML and other device libraries which also use the controls. If linking OCLC libraries is used to define the control variables, then the runtime must link in:
5150

52-
- Exactly one of `oclc_correctly_rounded_sqrt_on.amdgcn.bc` or `oclc_correctly_rounded_sqrt_off.amdgcn.bc` depending on the kernel's requirements
5351
- Exactly one of `oclc_daz_opt_on.amdgcn.bc` or `oclc_daz_opt_off.amdgcn.bc` depending on the kernel's requirements
5452
- Exactly one of `oclc_finite_only_on.amdgcn.bc` or `oclc_finite_only_off.amdgcn.bc` depending on the kernel's requirements
5553
- Exactly one of `oclc_unsafe_math_on.amdgcn.bc` or `oclc_unsafe_math_off.amdgcn.bc` depending on the kernel's requirements
@@ -84,7 +82,7 @@ where `{function}` is generally the familiar libm name of the function, and `{ty
8482

8583
For example, `__ocml_sqrt_f32` is the name of the OCML single precision square root function.
8684

87-
OCML does not currently support higher precision than double precision due to the lack of hardware support for such precisions.
85+
OCML does not currently support higher precision than double precision due to the lack of hardware support for such precisions.
8886

8987
### Supported functions
9088

amd/device-libs/oclc/inc/oclc.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
// __constant bool __oclc_unsafe_math_opt
2020
// - the application accepts optimizations that may lower the accuracy of the results
2121
//
22-
// __constant bool __oclc_correctly_rounded_sqrt32(void)
23-
// - the application is expecting sqrt(float) to produce a correctly rounded result
24-
//
2522
// __constant bool __oclc_wavefrontsize64
2623
// - the application is being compiled for a wavefront size of 64
2724
//
@@ -40,7 +37,6 @@
4037

4138
extern const __constant bool __oclc_finite_only_opt;
4239
extern const __constant bool __oclc_unsafe_math_opt;
43-
extern const __constant bool __oclc_correctly_rounded_sqrt32;
4440
extern const __constant bool __oclc_wavefrontsize64;
4541
extern const __constant uint __oclc_wavefrontsize_log2;
4642
extern const __constant int __oclc_ISA_version;
Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1 @@
1-
/*===--------------------------------------------------------------------------
2-
* ROCm Device Libraries
3-
*
4-
* This file is distributed under the University of Illinois Open Source
5-
* License. See LICENSE.TXT for details.
6-
*===------------------------------------------------------------------------*/
7-
8-
#include "oclc.h"
9-
10-
const __constant bool __oclc_correctly_rounded_sqrt32 = 0;
1+
// Placeholder until clang stops trying to link this
Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1 @@
1-
/*===--------------------------------------------------------------------------
2-
* ROCm Device Libraries
3-
*
4-
* This file is distributed under the University of Illinois Open Source
5-
* License. See LICENSE.TXT for details.
6-
*===------------------------------------------------------------------------*/
7-
8-
#include "oclc.h"
9-
10-
const __constant bool __oclc_correctly_rounded_sqrt32 = 1;
11-
1+
// Placeholder until clang stops trying to link this

amd/device-libs/ocml/CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,11 @@ set_source_files_properties(
2121
${CMAKE_CURRENT_SOURCE_DIR}/src/native_expF.cl
2222
PROPERTIES COMPILE_FLAGS "${native_func_flags}")
2323

24+
25+
# This implementation of sqrt will not be used through opencl, openmp,
26+
# or hip. Compile to be correctly rounded just in case
27+
set_source_files_properties(
28+
${CMAKE_CURRENT_SOURCE_DIR}/src/sqrtF.cl
29+
PROPERTIES COMPILE_FLAGS -cl-fp32-correctly-rounded-divide-sqrt)
30+
2431
opencl_bc_lib(NAME ocml SOURCES ${sources})

amd/device-libs/ocml/src/opts.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,4 @@
1111
#define FINITE_ONLY_OPT() __oclc_finite_only_opt
1212
#define UNSAFE_MATH_OPT() __oclc_unsafe_math_opt
1313

14-
1514
#define DAZ_OPT() __builtin_isfpclass(__builtin_canonicalizef(0x1p-149f), __FPCLASS_POSZERO)
16-
17-
#define CORRECTLY_ROUNDED_SQRT32() __oclc_correctly_rounded_sqrt32

amd/device-libs/ocml/src/sqrtF.cl

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7,32 +7,17 @@
77

88
#include "mathF.h"
99

10-
// 1ulp sqrt that handles denormals, should be used without
11-
// -cl-fp32-correctly-rounded-divide-sqrt
12-
static float sqrt_scale_denormal(float x) {
13-
bool need_scale = x < 0x1p-126f;
14-
float scaled = BUILTIN_FLDEXP_F32(x, need_scale ? 32 : 0);
15-
float sqrt_scaled = BUILTIN_AMDGPU_SQRT_F32(scaled);
16-
return BUILTIN_FLDEXP_F32(sqrt_scaled, need_scale ? -16 : 0);
17-
}
18-
1910
CONSTATTR float
2011
MATH_MANGLE(sqrt)(float x)
2112
{
22-
if (CORRECTLY_ROUNDED_SQRT32()) {
23-
return MATH_SQRT(x);
24-
} else {
25-
if (DAZ_OPT())
26-
return BUILTIN_AMDGPU_SQRT_F32(x);
27-
return sqrt_scale_denormal(x);
28-
}
13+
return __builtin_elementwise_sqrt(x);
2914
}
3015

3116
#define GEN(LN,UN) \
3217
CONSTATTR float \
3318
MATH_MANGLE(LN)(float x) \
3419
{ \
35-
return BUILTIN_##UN##_F32(x); \
20+
return __builtin_elementwise_sqrt(x); \
3621
}
3722

3823
// GEN(sqrt_rte,SQRT_RTE)

0 commit comments

Comments
 (0)