Skip to content

Commit 00b13c4

Browse files
authored
[NFC][libclc] Replace _CLC_V_V_VP_VECTORIZE macro with use of unary_def_with_ptr_scalarize.inc (#157002)
Commit d50f2ef removes _CLC_V_V_VP_VECTORIZE from header file, but the macro is still used in our downstream code: https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/modf.cl#L30 https://github.com/intel/llvm/blob/0433e4d6f5c9/libclc/libspirv/lib/ptx-nvidiacl/math/sincos.cl#L31 We can either revert d50f2ef or replace macro with use of unary_def_with_ptr_scalarize.inc. This PR uses the latter approach.
1 parent c55708c commit 00b13c4

File tree

2 files changed

+115
-62
lines changed

2 files changed

+115
-62
lines changed
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <clc/utils.h>
10+
11+
#ifdef __CLC_SCALAR
12+
13+
#ifndef __CLC_IMPL_FUNCTION
14+
#define __CLC_IMPL_FUNCTION __CLC_FUNCTION
15+
#endif
16+
17+
#ifndef __CLC_RET_TYPE
18+
#define __CLC_RET_TYPE __CLC_GENTYPE
19+
#endif
20+
21+
#ifndef __CLC_ARG1_TYPE
22+
#define __CLC_ARG1_TYPE __CLC_GENTYPE
23+
#endif
24+
25+
#ifndef __CLC_ARG2_TYPE
26+
#define __CLC_ARG2_TYPE __CLC_GENTYPE
27+
#endif
28+
29+
#define __CLC_RET_VECTYPE __CLC_XCONCAT(__CLC_RET_TYPE, __CLC_VECTOR_SIZE)
30+
#define __CLC_ARG1_VECTYPE __CLC_XCONCAT(__CLC_ARG1_TYPE, __CLC_VECTOR_SIZE)
31+
#define __CLC_ARG2_VECTYPE __CLC_XCONCAT(__CLC_ARG2_TYPE, __CLC_VECTOR_SIZE)
32+
33+
#define __CLC_VECTOR_SIZE 2
34+
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
35+
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
36+
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
37+
38+
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
39+
__CLC_IMPL_FUNCTION(x.s1, p + 1));
40+
}
41+
#undef __CLC_VECTOR_SIZE
42+
43+
#define __CLC_VECTOR_SIZE 3
44+
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
45+
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
46+
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
47+
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
48+
__CLC_IMPL_FUNCTION(x.s1, p + 1),
49+
__CLC_IMPL_FUNCTION(x.s2, p + 2));
50+
}
51+
#undef __CLC_VECTOR_SIZE
52+
53+
#define __CLC_VECTOR_SIZE 4
54+
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
55+
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
56+
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
57+
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
58+
__CLC_IMPL_FUNCTION(x.s1, p + 1),
59+
__CLC_IMPL_FUNCTION(x.s2, p + 2),
60+
__CLC_IMPL_FUNCTION(x.s3, p + 3));
61+
}
62+
#undef __CLC_VECTOR_SIZE
63+
64+
#define __CLC_VECTOR_SIZE 8
65+
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
66+
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
67+
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
68+
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
69+
__CLC_IMPL_FUNCTION(x.s1, p + 1),
70+
__CLC_IMPL_FUNCTION(x.s2, p + 2),
71+
__CLC_IMPL_FUNCTION(x.s3, p + 3),
72+
__CLC_IMPL_FUNCTION(x.s4, p + 4),
73+
__CLC_IMPL_FUNCTION(x.s5, p + 5),
74+
__CLC_IMPL_FUNCTION(x.s6, p + 6),
75+
__CLC_IMPL_FUNCTION(x.s7, p + 7));
76+
}
77+
#undef __CLC_VECTOR_SIZE
78+
79+
#define __CLC_VECTOR_SIZE 16
80+
_CLC_OVERLOAD _CLC_DEF __CLC_RET_VECTYPE
81+
__CLC_FUNCTION(__CLC_ARG1_VECTYPE x, __CLC_ADDRSPACE __CLC_ARG2_VECTYPE *ptr) {
82+
__CLC_ADDRSPACE __CLC_ARG2_TYPE *p = (__CLC_ADDRSPACE __CLC_ARG2_TYPE *)ptr;
83+
return (__CLC_RET_VECTYPE)(__CLC_IMPL_FUNCTION(x.s0, p),
84+
__CLC_IMPL_FUNCTION(x.s1, p + 1),
85+
__CLC_IMPL_FUNCTION(x.s2, p + 2),
86+
__CLC_IMPL_FUNCTION(x.s3, p + 3),
87+
__CLC_IMPL_FUNCTION(x.s4, p + 4),
88+
__CLC_IMPL_FUNCTION(x.s5, p + 5),
89+
__CLC_IMPL_FUNCTION(x.s6, p + 6),
90+
__CLC_IMPL_FUNCTION(x.s7, p + 7),
91+
__CLC_IMPL_FUNCTION(x.s8, p + 8),
92+
__CLC_IMPL_FUNCTION(x.s9, p + 9),
93+
__CLC_IMPL_FUNCTION(x.sa, p + 10),
94+
__CLC_IMPL_FUNCTION(x.sb, p + 11),
95+
__CLC_IMPL_FUNCTION(x.sc, p + 12),
96+
__CLC_IMPL_FUNCTION(x.sd, p + 13),
97+
__CLC_IMPL_FUNCTION(x.se, p + 14),
98+
__CLC_IMPL_FUNCTION(x.sf, p + 15));
99+
}
100+
#undef __CLC_VECTOR_SIZE
101+
102+
#undef __CLC_RET_VECTYPE
103+
#undef __CLC_ARG1_VECTYPE
104+
#undef __CLC_ARG2_VECTYPE
105+
106+
#endif // __CLC_SCALAR

libclc/clc/lib/generic/math/clc_lgamma_r.cl

Lines changed: 9 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -16,60 +16,6 @@
1616
#include <clc/math/clc_sinpi.h>
1717
#include <clc/math/math.h>
1818

19-
#define _CLC_V_V_VP_VECTORIZE(DECLSPEC, RET_TYPE, __CLC_FUNCTION, ARG1_TYPE, \
20-
ADDR_SPACE, ARG2_TYPE) \
21-
DECLSPEC __CLC_XCONCAT(RET_TYPE, 2) \
22-
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 2) x, \
23-
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 2) * y) { \
24-
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
25-
return (__CLC_XCONCAT(RET_TYPE, 2))(__CLC_FUNCTION(x.s0, ptr), \
26-
__CLC_FUNCTION(x.s1, ptr + 1)); \
27-
} \
28-
\
29-
DECLSPEC __CLC_XCONCAT(RET_TYPE, 3) \
30-
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 3) x, \
31-
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 3) * y) { \
32-
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
33-
return (__CLC_XCONCAT(RET_TYPE, 3))(__CLC_FUNCTION(x.s0, ptr), \
34-
__CLC_FUNCTION(x.s1, ptr + 1), \
35-
__CLC_FUNCTION(x.s2, ptr + 2)); \
36-
} \
37-
\
38-
DECLSPEC __CLC_XCONCAT(RET_TYPE, 4) \
39-
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 4) x, \
40-
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 4) * y) { \
41-
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
42-
return (__CLC_XCONCAT(RET_TYPE, 4))( \
43-
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
44-
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3)); \
45-
} \
46-
\
47-
DECLSPEC __CLC_XCONCAT(RET_TYPE, 8) \
48-
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 8) x, \
49-
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 8) * y) { \
50-
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
51-
return (__CLC_XCONCAT(RET_TYPE, 8))( \
52-
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
53-
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
54-
__CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
55-
__CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7)); \
56-
} \
57-
\
58-
DECLSPEC __CLC_XCONCAT(RET_TYPE, 16) \
59-
__CLC_FUNCTION(__CLC_XCONCAT(ARG1_TYPE, 16) x, \
60-
ADDR_SPACE __CLC_XCONCAT(ARG2_TYPE, 16) * y) { \
61-
ADDR_SPACE ARG2_TYPE *ptr = (ADDR_SPACE ARG2_TYPE *)y; \
62-
return (__CLC_XCONCAT(RET_TYPE, 16))( \
63-
__CLC_FUNCTION(x.s0, ptr), __CLC_FUNCTION(x.s1, ptr + 1), \
64-
__CLC_FUNCTION(x.s2, ptr + 2), __CLC_FUNCTION(x.s3, ptr + 3), \
65-
__CLC_FUNCTION(x.s4, ptr + 4), __CLC_FUNCTION(x.s5, ptr + 5), \
66-
__CLC_FUNCTION(x.s6, ptr + 6), __CLC_FUNCTION(x.s7, ptr + 7), \
67-
__CLC_FUNCTION(x.s8, ptr + 8), __CLC_FUNCTION(x.s9, ptr + 9), \
68-
__CLC_FUNCTION(x.sa, ptr + 10), __CLC_FUNCTION(x.sb, ptr + 11), \
69-
__CLC_FUNCTION(x.sc, ptr + 12), __CLC_FUNCTION(x.sd, ptr + 13), \
70-
__CLC_FUNCTION(x.se, ptr + 14), __CLC_FUNCTION(x.sf, ptr + 15)); \
71-
}
72-
7319
// ====================================================
7420
// Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
7521
//
@@ -333,9 +279,6 @@ _CLC_OVERLOAD _CLC_DEF float __clc_lgamma_r(float x, private int *signp) {
333279
return r;
334280
}
335281

336-
_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_lgamma_r, float,
337-
private, int)
338-
339282
#ifdef cl_khr_fp64
340283
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
341284
// ====================================================
@@ -639,8 +582,6 @@ _CLC_OVERLOAD _CLC_DEF double __clc_lgamma_r(double x, private int *ip) {
639582
return r;
640583
}
641584

642-
_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, __clc_lgamma_r, double,
643-
private, int)
644585
#endif
645586

646587
#ifdef cl_khr_fp16
@@ -651,11 +592,17 @@ _CLC_OVERLOAD _CLC_DEF half __clc_lgamma_r(half x, private int *iptr) {
651592
return (half)__clc_lgamma_r((float)x, iptr);
652593
}
653594

654-
_CLC_V_V_VP_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_lgamma_r, half,
655-
private, int);
656-
657595
#endif
658596

597+
#define __CLC_FUNCTION __clc_lgamma_r
598+
#define __CLC_ARG2_TYPE int
599+
#define __CLC_ADDRSPACE private
600+
#define __CLC_BODY <clc/shared/unary_def_with_ptr_scalarize.inc>
601+
#include <clc/math/gentype.inc>
602+
#undef __CLC_ADDRSPACE
603+
#undef __CLC_ARG2_TYPE
604+
#undef __CLC_FUNCTION
605+
659606
#define __CLC_ADDRSPACE global
660607
#define __CLC_BODY <clc_lgamma_r.inc>
661608
#include <clc/math/gentype.inc>

0 commit comments

Comments
 (0)