Skip to content

Commit 143f044

Browse files
committed
device-libs: Implement rsqrt by enabling contract on fdiv+sqrt intrinsic
This was already how the half version is implemented. The test already showed this was preserved, but tighten up the checks to make sure there aren't any additional instructions Change-Id: I0dbb6198e1f8e3a0168c73007cb06a72da6c36d9
1 parent 0b36efe commit 143f044

File tree

2 files changed

+14
-3
lines changed

2 files changed

+14
-3
lines changed

amd/device-libs/ocml/src/nativeF.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ MATH_MANGLE(native_sqrt)(float x)
2424
CONSTATTR float
2525
MATH_MANGLE(native_rsqrt)(float x)
2626
{
27-
return __builtin_amdgcn_rsqf(x);
27+
#pragma clang fp contract(fast)
28+
return 1.0f / __builtin_sqrtf(x);
2829
}
2930

3031
CONSTATTR float

amd/device-libs/test/compile/native_rsqrt.cl

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,24 @@ half __ocml_native_rsqrt_f16(half);
1414
// GFX700: v_sqrt_f32
1515
// GFX700: v_rcp_f32
1616

17-
// GFX803: v_rsq_f16
17+
// GFX803: {{(flat|global|buffer)}}_load_{{(ushort|b16)}} [[VAL:v[0-9+]]],
18+
// GFX803-NOT: [[VAL]]
19+
// GFX803: v_rsq_f16{{(_e32)?}} [[RESULT:v[0-9]+]], [[VAL]]
20+
// GFX803-NOT: [[RESULT]]
21+
// GFX803: [[RESULT]]
22+
// GFX803-NOT: [[RESULT]]
1823
kernel void test_native_rsqrt_f16(global half* restrict out, global half* restrict in) {
1924
int id = get_local_id(0);
2025
out[id] = __ocml_native_rsqrt_f16(in[id]);
2126
}
2227

2328
// GCN-LABEL: {{^}}test_native_rsqrt_f32:
24-
// GCN: v_rsq_f32
29+
// GCN: {{(flat|global|buffer)}}_load_{{(dword|b32)}} [[VAL:v[0-9+]]],
30+
// GCN-NOT: [[VAL]]
31+
// GCN: v_rsq_f32{{(_e32)?}} [[RESULT:v[0-9]+]], [[VAL]]
32+
// GCN-NOT: [[RESULT]]
33+
// GCN: [[RESULT]]
34+
// GCN-NOT: [[RESULT]]
2535
kernel void test_native_rsqrt_f32(global float* restrict out, global float* restrict in) {
2636
int id = get_local_id(0);
2737
out[id] = native_rsqrt(in[id]);

0 commit comments

Comments
 (0)