diff --git a/libclc/amdgpu/lib/SOURCES_3.9 b/libclc/amdgpu/lib/SOURCES_3.9 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/amdgpu/lib/SOURCES_3.9 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/amdgpu/lib/SOURCES_4.0 b/libclc/amdgpu/lib/SOURCES_4.0 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/amdgpu/lib/SOURCES_4.0 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/amdgpu/lib/SOURCES_5.0 b/libclc/amdgpu/lib/SOURCES_5.0 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/amdgpu/lib/SOURCES_5.0 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/amdgpu/lib/shared/vload_half_helpers.ll b/libclc/amdgpu/lib/shared/vload_half_helpers.ll deleted file mode 100644 index bf7d544afbf55..0000000000000 --- a/libclc/amdgpu/lib/shared/vload_half_helpers.ll +++ /dev/null @@ -1,31 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(0)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(1)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(3)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(2)* %ptr - %res = fpext half %data to float - ret float %res -} diff --git a/libclc/amdgpu/lib/shared/vstore_half_helpers.ll b/libclc/amdgpu/lib/shared/vstore_half_helpers.ll deleted file mode 100644 index 5ec193d6d40a0..0000000000000 --- a/libclc/amdgpu/lib/shared/vstore_half_helpers.ll +++ /dev/null @@ -1,43 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(0)* %ptr - ret void -} - -define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(1)* %ptr - ret void -} - -define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(3)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(0)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(1)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(3)* %ptr - ret void -} diff --git a/libclc/generic/lib/shared/vload.cl b/libclc/generic/lib/shared/vload.cl index a0306c500d5cd..05737aaa88b71 100644 --- a/libclc/generic/lib/shared/vload.cl +++ b/libclc/generic/lib/shared/vload.cl @@ -81,18 +81,7 @@ VLOAD_ADDR_SPACES(half) /* vload_half are legal even without cl_khr_fp16 */ /* no vload_half for double */ -#if __clang_major__ < 6 -float __clc_vload_half_float_helper__constant(const __constant half *); -float __clc_vload_half_float_helper__global(const __global half *); -float __clc_vload_half_float_helper__local(const __local half *); -float __clc_vload_half_float_helper__private(const __private half *); - -#define VEC_LOAD1(val, AS) \ - val = __clc_vload_half_float_helper##AS(&mem[offset++]); -#else #define VEC_LOAD1(val, AS) val = __builtin_load_halff(&mem[offset++]); -#endif - #define VEC_LOAD2(val, AS) \ VEC_LOAD1(val.lo, AS) \ VEC_LOAD1(val.hi, AS) diff --git a/libclc/generic/lib/shared/vstore.cl b/libclc/generic/lib/shared/vstore.cl index 525f3d08bf0d8..0c0513f08a778 100644 --- a/libclc/generic/lib/shared/vstore.cl +++ b/libclc/generic/lib/shared/vstore.cl @@ -75,57 +75,35 @@ VSTORE_ADDR_SPACES(double) VSTORE_ADDR_SPACES(half) #endif -/* vstore_half are legal even without cl_khr_fp16 */ -#if __clang_major__ < 6 -#define DECLARE_HELPER(STYPE, AS, builtin) \ - void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *); -#else -#define DECLARE_HELPER(STYPE, AS, __builtin) \ - _CLC_DEF void __clc_vstore_half_##STYPE##_helper##AS(STYPE s, AS half *d) { \ - __builtin(s, d); \ - } -#endif - -DECLARE_HELPER(float, __private, __builtin_store_halff); -DECLARE_HELPER(float, __global, __builtin_store_halff); -DECLARE_HELPER(float, __local, __builtin_store_halff); - -#ifdef cl_khr_fp64 -DECLARE_HELPER(double, __private, __builtin_store_half); -DECLARE_HELPER(double, __global, __builtin_store_half); -DECLARE_HELPER(double, __local, __builtin_store_half); -#endif - -#define VEC_STORE1(STYPE, AS, val, ROUNDF) \ - __clc_vstore_half_##STYPE##_helper##AS(ROUNDF(val), &mem[offset++]); - -#define VEC_STORE2(STYPE, AS, val, ROUNDF) \ - VEC_STORE1(STYPE, AS, val.lo, ROUNDF) \ - VEC_STORE1(STYPE, AS, val.hi, ROUNDF) -#define VEC_STORE3(STYPE, AS, val, ROUNDF) \ - VEC_STORE1(STYPE, AS, val.s0, ROUNDF) \ - VEC_STORE1(STYPE, AS, val.s1, ROUNDF) \ - VEC_STORE1(STYPE, AS, val.s2, ROUNDF) -#define VEC_STORE4(STYPE, AS, val, ROUNDF) \ - VEC_STORE2(STYPE, AS, val.lo, ROUNDF) \ - VEC_STORE2(STYPE, AS, val.hi, ROUNDF) -#define VEC_STORE8(STYPE, AS, val, ROUNDF) \ - VEC_STORE4(STYPE, AS, val.lo, ROUNDF) \ - VEC_STORE4(STYPE, AS, val.hi, ROUNDF) -#define VEC_STORE16(STYPE, AS, val, ROUNDF) \ - VEC_STORE8(STYPE, AS, val.lo, ROUNDF) \ - VEC_STORE8(STYPE, AS, val.hi, ROUNDF) - -#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, ROUNDF) \ +#define VEC_STORE1(val, ROUNDF, BUILTIN) BUILTIN(ROUNDF(val), &mem[offset++]); + +#define VEC_STORE2(val, ROUNDF, BUILTIN) \ + VEC_STORE1(val.lo, ROUNDF, BUILTIN) \ + VEC_STORE1(val.hi, ROUNDF, BUILTIN) +#define VEC_STORE3(val, ROUNDF, BUILTIN) \ + VEC_STORE1(val.s0, ROUNDF, BUILTIN) \ + VEC_STORE1(val.s1, ROUNDF, BUILTIN) \ + VEC_STORE1(val.s2, ROUNDF, BUILTIN) +#define VEC_STORE4(val, ROUNDF, BUILTIN) \ + VEC_STORE2(val.lo, ROUNDF, BUILTIN) \ + VEC_STORE2(val.hi, ROUNDF, BUILTIN) +#define VEC_STORE8(val, ROUNDF, BUILTIN) \ + VEC_STORE4(val.lo, ROUNDF, BUILTIN) \ + VEC_STORE4(val.hi, ROUNDF, BUILTIN) +#define VEC_STORE16(val, ROUNDF, BUILTIN) \ + VEC_STORE8(val.lo, ROUNDF, BUILTIN) \ + VEC_STORE8(val.hi, ROUNDF, BUILTIN) + +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, ROUNDF, BUILTIN) \ _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, \ AS half *mem) { \ offset *= VEC_SIZE; \ - VEC_STORE##VEC_SIZE(STYPE, AS, vec, ROUNDF) \ + VEC_STORE##VEC_SIZE(vec, ROUNDF, BUILTIN) \ } \ _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, \ AS half *mem) { \ offset *= OFFSET; \ - VEC_STORE##VEC_SIZE(STYPE, AS, vec, ROUNDF) \ + VEC_STORE##VEC_SIZE(vec, ROUNDF, BUILTIN) \ } _CLC_DEF _CLC_OVERLOAD float __clc_noop(float x) { return x; } @@ -246,15 +224,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rte(double x) { } #endif -#define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ - __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \ - __FUNC(SUFFIX##_rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \ - __FUNC(SUFFIX##_rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \ - __FUNC(SUFFIX##_rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \ - __FUNC(SUFFIX##_rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte) +#define __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, BUILTIN) \ + __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, __clc_noop, BUILTIN) \ + __FUNC(SUFFIX##_rtz, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtz, BUILTIN) \ + __FUNC(SUFFIX##_rtn, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtn, BUILTIN) \ + __FUNC(SUFFIX##_rtp, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtp, BUILTIN) \ + __FUNC(SUFFIX##_rte, VEC_SIZE, OFFSET, TYPE, AS, __clc_rte, BUILTIN) -#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \ - __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) +#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, BUILTIN) \ + __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, BUILTIN) #define __CLC_BODY "vstore_half.inc" #include diff --git a/libclc/generic/lib/shared/vstore_half.inc b/libclc/generic/lib/shared/vstore_half.inc index 138c19ae78b3f..78f137f76e832 100644 --- a/libclc/generic/lib/shared/vstore_half.inc +++ b/libclc/generic/lib/shared/vstore_half.inc @@ -8,6 +8,15 @@ // This does not exist for fp16 #if __CLC_FPSIZE > 16 + +#if __CLC_FPSIZE == 32 +#define STORE_HALF_BUILTIN __builtin_store_halff +#elif __CLC_FPSIZE == 64 +#define STORE_HALF_BUILTIN __builtin_store_half +#else +#error "Invalid FP size" +#endif + #ifndef __CLC_SCALAR #if __CLC_VECSIZE == 3 @@ -16,17 +25,20 @@ #define __CLC_OFFSET __CLC_VECSIZE #endif -FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, - __CLC_SCALAR_GENTYPE, __private); -FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, - __CLC_SCALAR_GENTYPE, __local); -FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, - __CLC_SCALAR_GENTYPE, __global); +FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __private, + STORE_HALF_BUILTIN); +FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __local, + STORE_HALF_BUILTIN); +FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __global, + STORE_HALF_BUILTIN); #undef __CLC_OFFSET #else -FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private); -FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local); -FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global); +FUNC(, 1, 1, __CLC_GENTYPE, __private, STORE_HALF_BUILTIN); +FUNC(, 1, 1, __CLC_GENTYPE, __local, STORE_HALF_BUILTIN); +FUNC(, 1, 1, __CLC_GENTYPE, __global, STORE_HALF_BUILTIN); #endif + +#undef STORE_HALF_BUILTIN + #endif diff --git a/libclc/ptx/lib/SOURCES_3.9 b/libclc/ptx/lib/SOURCES_3.9 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/ptx/lib/SOURCES_3.9 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/ptx/lib/SOURCES_4.0 b/libclc/ptx/lib/SOURCES_4.0 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/ptx/lib/SOURCES_4.0 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/ptx/lib/SOURCES_5.0 b/libclc/ptx/lib/SOURCES_5.0 deleted file mode 100644 index 69c5e5ce9fbac..0000000000000 --- a/libclc/ptx/lib/SOURCES_5.0 +++ /dev/null @@ -1,2 +0,0 @@ -shared/vload_half_helpers.ll -shared/vstore_half_helpers.ll diff --git a/libclc/ptx/lib/shared/vload_half_helpers.ll b/libclc/ptx/lib/shared/vload_half_helpers.ll deleted file mode 100644 index 5fbae19fde287..0000000000000 --- a/libclc/ptx/lib/shared/vload_half_helpers.ll +++ /dev/null @@ -1,31 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(0)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(1)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(3)* %ptr - %res = fpext half %data to float - ret float %res -} - -define float @__clc_vload_half_float_helper__constant(half addrspace(4)* nocapture %ptr) nounwind alwaysinline { - %data = load half, half addrspace(4)* %ptr - %res = fpext half %data to float - ret float %res -} diff --git a/libclc/ptx/lib/shared/vstore_half_helpers.ll b/libclc/ptx/lib/shared/vstore_half_helpers.ll deleted file mode 100644 index 5ec193d6d40a0..0000000000000 --- a/libclc/ptx/lib/shared/vstore_half_helpers.ll +++ /dev/null @@ -1,43 +0,0 @@ -;;===----------------------------------------------------------------------===;; -; -; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -;;===----------------------------------------------------------------------===;; - -define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(0)* %ptr - ret void -} - -define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(1)* %ptr - ret void -} - -define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc float %data to half - store half %res, half addrspace(3)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(0)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(1)* %ptr - ret void -} - -define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline { - %res = fptrunc double %data to half - store half %res, half addrspace(3)* %ptr - ret void -}