@@ -75,57 +75,35 @@ VSTORE_ADDR_SPACES(double)
7575VSTORE_ADDR_SPACES (half )
7676#endif
7777
78- /* vstore_half are legal even without cl_khr_fp16 */
79- #if __clang_major__ < 6
80- #define DECLARE_HELPER (STYPE , AS , builtin ) \
81- void __clc_vstore_half_ ##STYPE ##_helper##AS(STYPE, AS half *);
82- #else
83- #define DECLARE_HELPER (STYPE , AS , __builtin ) \
84- _CLC_DEF void __clc_vstore_half_##STYPE##_helper##AS(STYPE s, AS half *d) { \
85- __builtin(s, d); \
86- }
87- #endif
88-
89- DECLARE_HELPER (float , __private , __builtin_store_halff );
90- DECLARE_HELPER (float , __global , __builtin_store_halff );
91- DECLARE_HELPER (float , __local , __builtin_store_halff );
92-
93- #ifdef cl_khr_fp64
94- DECLARE_HELPER (double , __private , __builtin_store_half );
95- DECLARE_HELPER (double , __global , __builtin_store_half );
96- DECLARE_HELPER (double , __local , __builtin_store_half );
97- #endif
98-
99- #define VEC_STORE1 (STYPE , AS , val , ROUNDF ) \
100- __clc_vstore_half_##STYPE##_helper##AS(ROUNDF(val), &mem[offset++]);
101-
102- #define VEC_STORE2 (STYPE , AS , val , ROUNDF ) \
103- VEC_STORE1(STYPE, AS, val.lo, ROUNDF) \
104- VEC_STORE1(STYPE, AS, val.hi, ROUNDF)
105- #define VEC_STORE3 (STYPE , AS , val , ROUNDF ) \
106- VEC_STORE1(STYPE, AS, val.s0, ROUNDF) \
107- VEC_STORE1(STYPE, AS, val.s1, ROUNDF) \
108- VEC_STORE1(STYPE, AS, val.s2, ROUNDF)
109- #define VEC_STORE4 (STYPE , AS , val , ROUNDF ) \
110- VEC_STORE2(STYPE, AS, val.lo, ROUNDF) \
111- VEC_STORE2(STYPE, AS, val.hi, ROUNDF)
112- #define VEC_STORE8 (STYPE , AS , val , ROUNDF ) \
113- VEC_STORE4(STYPE, AS, val.lo, ROUNDF) \
114- VEC_STORE4(STYPE, AS, val.hi, ROUNDF)
115- #define VEC_STORE16 (STYPE , AS , val , ROUNDF ) \
116- VEC_STORE8(STYPE, AS, val.lo, ROUNDF) \
117- VEC_STORE8(STYPE, AS, val.hi, ROUNDF)
118-
119- #define __FUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , STYPE , AS , ROUNDF ) \
78+ #define VEC_STORE1 (val , ROUNDF , BUILTIN ) BUILTIN (ROUNDF (val ), & mem [offset ++ ]);
79+
80+ #define VEC_STORE2 (val , ROUNDF , BUILTIN ) \
81+ VEC_STORE1 (val .lo , ROUNDF , BUILTIN ) \
82+ VEC_STORE1 (val .hi , ROUNDF , BUILTIN )
83+ #define VEC_STORE3 (val , ROUNDF , BUILTIN ) \
84+ VEC_STORE1 (val .s0 , ROUNDF , BUILTIN ) \
85+ VEC_STORE1 (val .s1 , ROUNDF , BUILTIN ) \
86+ VEC_STORE1 (val .s2 , ROUNDF , BUILTIN )
87+ #define VEC_STORE4 (val , ROUNDF , BUILTIN ) \
88+ VEC_STORE2 (val .lo , ROUNDF , BUILTIN ) \
89+ VEC_STORE2 (val .hi , ROUNDF , BUILTIN )
90+ #define VEC_STORE8 (val , ROUNDF , BUILTIN ) \
91+ VEC_STORE4 (val .lo , ROUNDF , BUILTIN ) \
92+ VEC_STORE4 (val .hi , ROUNDF , BUILTIN )
93+ #define VEC_STORE16 (val , ROUNDF , BUILTIN ) \
94+ VEC_STORE8 (val .lo , ROUNDF , BUILTIN ) \
95+ VEC_STORE8 (val .hi , ROUNDF , BUILTIN )
96+
97+ #define __FUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , AS , ROUNDF , BUILTIN ) \
12098 _CLC_OVERLOAD _CLC_DEF void vstore_half ##SUFFIX (TYPE vec, size_t offset, \
12199 AS half *mem) { \
122100 offset *= VEC_SIZE; \
123- VEC_STORE##VEC_SIZE(STYPE, AS, vec, ROUNDF) \
101+ VEC_STORE##VEC_SIZE(vec, ROUNDF, BUILTIN) \
124102 } \
125103 _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, \
126104 AS half *mem) { \
127105 offset *= OFFSET; \
128- VEC_STORE##VEC_SIZE(STYPE, AS, vec, ROUNDF) \
106+ VEC_STORE##VEC_SIZE(vec, ROUNDF, BUILTIN) \
129107 }
130108
131109_CLC_DEF _CLC_OVERLOAD float __clc_noop (float x ) { return x ; }
@@ -246,15 +224,15 @@ _CLC_DEF _CLC_OVERLOAD double __clc_rte(double x) {
246224}
247225#endif
248226
249- #define __XFUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , STYPE , AS ) \
250- __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_noop) \
251- __FUNC(SUFFIX##_rtz, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtz) \
252- __FUNC(SUFFIX##_rtn, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtn) \
253- __FUNC(SUFFIX##_rtp, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rtp) \
254- __FUNC(SUFFIX##_rte, VEC_SIZE, OFFSET, TYPE, STYPE, AS, __clc_rte)
227+ #define __XFUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , AS , BUILTIN ) \
228+ __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, __clc_noop, BUILTIN) \
229+ __FUNC(SUFFIX##_rtz, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtz, BUILTIN) \
230+ __FUNC(SUFFIX##_rtn, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtn, BUILTIN) \
231+ __FUNC(SUFFIX##_rtp, VEC_SIZE, OFFSET, TYPE, AS, __clc_rtp, BUILTIN) \
232+ __FUNC(SUFFIX##_rte, VEC_SIZE, OFFSET, TYPE, AS, __clc_rte, BUILTIN )
255233
256- #define FUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , STYPE , AS ) \
257- __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS )
234+ #define FUNC (SUFFIX , VEC_SIZE , OFFSET , TYPE , AS , BUILTIN ) \
235+ __XFUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, AS, BUILTIN )
258236
259237#define __CLC_BODY "vstore_half.inc"
260238#include <clc/math/gentype.inc>
0 commit comments