@@ -32,7 +32,7 @@ IN THE SOFTWARE.
3232//*****************************************************************************/
3333
3434#define VLOAD_MACRO (addressSpace , scalarType , numElements , offsetType , mangle ) \
35- INLINE scalarType##numElements __builtin_spirv_OpenCL_vload## numElements##_##mangle(offsetType offset, const addressSpace scalarType *p) \
35+ INLINE scalarType##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, numElements##_##mangle, n_R##scalarType##numElements) (offsetType offset, addressSpace scalarType *p) \
3636{ \
3737 const addressSpace scalarType *pOffset = p + offset * numElements; \
3838 scalarType##numElements ret; \
@@ -49,16 +49,16 @@ INLINE void __builtin_spirv_OpenCL_vstore##numElements##_##mangle(scalarType##nu
4949}
5050
5151#define ELEM_ARG (addressSpace , scalarType , mang ) \
52- VLOAD_MACRO(addressSpace, scalarType, 2, ulong , i64_##mang) \
53- VLOAD_MACRO(addressSpace, scalarType, 2, uint , i32_##mang) \
54- VLOAD_MACRO(addressSpace, scalarType, 3, ulong , i64_##mang) \
55- VLOAD_MACRO(addressSpace, scalarType, 3, uint , i32_##mang) \
56- VLOAD_MACRO(addressSpace, scalarType, 4, ulong , i64_##mang) \
57- VLOAD_MACRO(addressSpace, scalarType, 4, uint , i32_##mang) \
58- VLOAD_MACRO(addressSpace, scalarType, 8, ulong , i64_##mang) \
59- VLOAD_MACRO(addressSpace, scalarType, 8, uint , i32_##mang) \
60- VLOAD_MACRO(addressSpace, scalarType, 16, ulong , i64_##mang) \
61- VLOAD_MACRO(addressSpace, scalarType, 16, uint , i32_##mang)
52+ VLOAD_MACRO(addressSpace, scalarType, 2, long , i64_##mang) \
53+ VLOAD_MACRO(addressSpace, scalarType, 2, int , i32_##mang) \
54+ VLOAD_MACRO(addressSpace, scalarType, 3, long , i64_##mang) \
55+ VLOAD_MACRO(addressSpace, scalarType, 3, int , i32_##mang) \
56+ VLOAD_MACRO(addressSpace, scalarType, 4, long , i64_##mang) \
57+ VLOAD_MACRO(addressSpace, scalarType, 4, int , i32_##mang) \
58+ VLOAD_MACRO(addressSpace, scalarType, 8, long , i64_##mang) \
59+ VLOAD_MACRO(addressSpace, scalarType, 8, int , i32_##mang) \
60+ VLOAD_MACRO(addressSpace, scalarType, 16, long , i64_##mang) \
61+ VLOAD_MACRO(addressSpace, scalarType, 16, int , i32_##mang)
6262
6363#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0 )
6464#define TYPE_ARG (TYPE , TYPEMANG ) \
@@ -75,10 +75,10 @@ ELEM_ARG(local, TYPE, p3##TYPEMANG) \
7575ELEM_ARG(private, TYPE, p0##TYPEMANG)
7676#endif // __OPENCL_C_VERSION__ >= CL_VERSION_2_0
7777
78- TYPE_ARG (uchar , i8 )
79- TYPE_ARG (ushort , i16 )
80- TYPE_ARG (uint , i32 )
81- TYPE_ARG (ulong , i64 )
78+ TYPE_ARG (char , i8 )
79+ TYPE_ARG (short , i16 )
80+ TYPE_ARG (int , i32 )
81+ TYPE_ARG (long , i64 )
8282TYPE_ARG (half , f16 )
8383TYPE_ARG (float , f32 )
8484#if defined(cl_khr_fp64 )
@@ -129,17 +129,17 @@ TYPE_ARG(double, f64)
129129//*****************************************************************************/
130130// vload macros
131131//*****************************************************************************/
132- static OVERLOADABLE float __intel_spirv_half2float (ushort h )
132+ static OVERLOADABLE float __intel_spirv_half2float (short h )
133133{
134134 return SPIRV_BUILTIN (FConvert , _f32_f16 , _Rfloat )(as_half (h ));
135135}
136136
137137#define VLOAD_SHORT (addressSpace , ASNUM ) \
138- INLINE static ushort __builtin_spirv_OpenCL_vload_i64_p ##ASNUM##i16(ulong offset, const addressSpace ushort * p) \
138+ INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p ##ASNUM##i16, n_Rshort)(long offset, addressSpace short * p) \
139139{ \
140140 return *(p + offset); \
141141} \
142- INLINE static ushort __builtin_spirv_OpenCL_vload_i32_p ##ASNUM##i16(uint offset, const addressSpace ushort * p) \
142+ INLINE static short SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p ##ASNUM##i16, n_Rshort)(int offset, addressSpace short * p) \
143143{ \
144144 return *(p + offset); \
145145}
@@ -152,55 +152,55 @@ VLOAD_SHORT(__local, 3)
152152VLOAD_SHORT (__constant , 2 )
153153VLOAD_SHORT (__private , 0 )
154154
155- GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG (__intel_spirv_half2float , float , ushort )
155+ GENERATE_VECTOR_FUNCTIONS_1ARG_NO_MANG (__intel_spirv_half2float , float , short )
156156
157157// Two copies for the i32 and i64 size_t offsets.
158158#define __CLFN_DEF_F_VLOAD_SCALAR_HALF (addressSpace , ASNUM ) \
159- INLINE half __builtin_spirv_OpenCL_vload_i32_p ##ASNUM##f16(uint offset, const addressSpace half* p) { \
159+ INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i32_p ##ASNUM##f16, _Rhalf)(int offset, addressSpace half* p) { \
160160 return *(p + offset); \
161161} \
162- INLINE half __builtin_spirv_OpenCL_vload_i64_p ##ASNUM##f16(ulong offset, const addressSpace half* p) { \
162+ INLINE half SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload, _i64_p ##ASNUM##f16, _Rhalf)(long offset, addressSpace half* p) { \
163163 return *(p + offset); \
164164}
165165
166- #define __CLFN_DEF_F_VLOAD_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , numElements ) \
167- INLINE float##numElements __builtin_spirv_OpenCL_vload_half## numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
168- return __intel_spirv_half2float(__builtin_spirv_OpenCL_vload## numElements##_##MANGSIZE##_p##ASNUM##i16(offset, (const addressSpace ushort *)p)); \
166+ #define __CLFN_DEF_F_VLOAD_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , numElements , postfix ) \
167+ INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vload_half, numElements##_##MANGSIZE##_p##ASNUM##f16, postfix##_Rfloat##numElements) (SIZETYPE offset, addressSpace half* p) { \
168+ return __intel_spirv_half2float(SPIRV_OCL_BUILTIN(vload, numElements##_##MANGSIZE##_p##ASNUM##i16, n_Rshort##numElements) (offset, (addressSpace short *)p)); \
169169}
170170
171171#define __CLFN_DEF_F_VLOADA_HALFX (addressSpace , ASNUM , MANGSIZE , SIZETYPE , step , numElements ) \
172- INLINE float##numElements __builtin_spirv_OpenCL_vloada_half## numElements##_##MANGSIZE##_p##ASNUM##f16(SIZETYPE offset, const addressSpace half* p) { \
173- const addressSpace ushort ##numElements* pHalf = (const addressSpace ushort ##numElements*)(p + offset * step); \
174- return __intel_spirv_half2float(*pHalf); \
172+ INLINE float##numElements SPIRV_OVERLOADABLE SPIRV_OCL_BUILTIN(vloada_half, numElements##_##MANGSIZE##_p##ASNUM##f16, n_Rfloat##numElements) (SIZETYPE offset, addressSpace half* p) { \
173+ const addressSpace short ##numElements* pHalf = (const addressSpace short ##numElements*)(p + offset * step); \
174+ return __intel_spirv_half2float(*pHalf); \
175175}
176176
177177#define __CLFN_DEF_F_VLOAD_HALFX_AS (addressSpace , ASNUM ) \
178- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong, ) \
179- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 2) \
180- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 3) \
181- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 4) \
182- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 8) \
183- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, ulong , 16) \
184- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint, ) \
185- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 2) \
186- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 3) \
187- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 4) \
188- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 8) \
189- __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, uint , 16)
190-
191- #define __CLFN_DEF_F_VLOADA_HALFX_AS (addressSpace , ASNUM ) \
192- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 1, ) \
193- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 2, 2) \
194- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 4, 3) \
195- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 4, 4) \
196- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 8, 8) \
197- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, ulong , 16, 16) \
198- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 1, ) \
199- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 2, 2) \
200- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 4, 3) \
201- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 4, 4) \
202- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 8, 8) \
203- __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, uint , 16, 16)
178+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long, , ) \
179+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 2, n) \
180+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 3, n) \
181+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 4, n) \
182+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 8, n) \
183+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i64, long , 16, n) \
184+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int, , ) \
185+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 2, n) \
186+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 3, n) \
187+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 4, n) \
188+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 8, n) \
189+ __CLFN_DEF_F_VLOAD_HALFX(addressSpace, ASNUM, i32, int , 16, n )
190+
191+ #define __CLFN_DEF_F_VLOADA_HALFX_AS (addressSpace , ASNUM ) \
192+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 1, ) \
193+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 2, 2) \
194+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 4, 3) \
195+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 4, 4) \
196+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 8, 8) \
197+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i64, long , 16, 16) \
198+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 1, ) \
199+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 2, 2) \
200+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 4, 3) \
201+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 4, 4) \
202+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 8, 8) \
203+ __CLFN_DEF_F_VLOADA_HALFX(addressSpace, ASNUM, i32, int , 16, 16)
204204
205205#if (__OPENCL_C_VERSION__ >= CL_VERSION_2_0 )
206206#define __CLFN_DEF_F_VLOAD_HALF_ALL () \
0 commit comments