22#include <clc/integer/definitions.h>
33#include <clc/internal/clc.h>
44
5+ // TODO: Replace with __clc_convert_<type> when available
6+ #define __CLC_CONVERT_TY (X , TY ) __builtin_convertvector(X, TY)
7+
8+ #define __CLC_MUL_HI_VEC_IMPL (BGENTYPE , GENTYPE , GENSIZE ) \
9+ _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
10+ BGENTYPE large_x = __CLC_CONVERT_TY(x, BGENTYPE); \
11+ BGENTYPE large_y = __CLC_CONVERT_TY(y, BGENTYPE); \
12+ BGENTYPE large_mul_hi = (large_x * large_y) >> (BGENTYPE)GENSIZE; \
13+ return __CLC_CONVERT_TY(large_mul_hi, GENTYPE); \
14+ }
15+
516// For all types EXCEPT long, which is implemented separately
617#define __CLC_MUL_HI_IMPL (BGENTYPE , GENTYPE , GENSIZE ) \
718 _CLC_OVERLOAD _CLC_DEF GENTYPE __clc_mul_hi(GENTYPE x, GENTYPE y) { \
1425// (a+b) * (c+d) where a and c are the high-order parts of x and y respectively
1526// and b and d are the low-order parts of x and y.
1627// Thinking back to algebra, we use FOIL to do the work.
17-
1828_CLC_OVERLOAD _CLC_DEF long __clc_mul_hi (long x , long y ) {
1929 long f , o , i ;
2030 ulong l ;
@@ -92,7 +102,11 @@ _CLC_OVERLOAD _CLC_DEF ulong __clc_mul_hi(ulong x, ulong y) {
92102
93103#define __CLC_MUL_HI_DEC_IMPL (BTYPE , TYPE , BITS ) \
94104 __CLC_MUL_HI_IMPL(BTYPE, TYPE, BITS) \
95- __CLC_MUL_HI_VEC(TYPE)
105+ __CLC_MUL_HI_VEC_IMPL(BTYPE##2, TYPE##2, BITS) \
106+ __CLC_MUL_HI_VEC_IMPL(BTYPE##3, TYPE##3, BITS) \
107+ __CLC_MUL_HI_VEC_IMPL(BTYPE##4, TYPE##4, BITS) \
108+ __CLC_MUL_HI_VEC_IMPL(BTYPE##8, TYPE##8, BITS) \
109+ __CLC_MUL_HI_VEC_IMPL(BTYPE##16, TYPE##16, BITS)
96110
97111#define __CLC_MUL_HI_TYPES () \
98112 __CLC_MUL_HI_DEC_IMPL(short, char, 8) \
@@ -110,4 +124,5 @@ __CLC_MUL_HI_TYPES()
110124#undef __CLC_MUL_HI_DEC_IMPL
111125#undef __CLC_MUL_HI_IMPL
112126#undef __CLC_MUL_HI_VEC
113- #undef __CLC_B32
127+ #undef __CLC_MUL_HI_VEC_IMPL
128+ #undef __CLC_CONVERT_TY
0 commit comments