Skip to content

Commit e4634b0

Browse files
committed
Added Scope param to declns. and removed calls to non-existent entrypoints.
Moved xteamr and xteamr_extended from smoke to smoke-dev since APIs changed.
1 parent e6ad65a commit e4634b0

File tree

10 files changed

+447
-1626
lines changed

10 files changed

+447
-1626
lines changed
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

test/smoke/xteamr/test_xteamr.h renamed to test/smoke-dev/xteamr/test_xteamr.h

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,67 +18,67 @@ extern "C" {
1818
void _INLINE_ATTR_ __kmpc_xteamr_d_16x64
1919
(double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double),
2020
void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv,
21-
const uint64_t k, const uint32_t numteams);
21+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
2222
void _INLINE_ATTR_ __kmpc_xteamr_f_16x64
2323
(float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float),
2424
void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv,
25-
const uint64_t k, const uint32_t numteams);
25+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
2626
void _INLINE_ATTR_ __kmpc_xteamr_cd_16x64
2727
(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD),
2828
void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv,
29-
const uint64_t k, const uint32_t numteams);
29+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
3030
void _INLINE_ATTR_ __kmpc_xteamr_cf_16x64
3131
(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF),
3232
void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv,
33-
const uint64_t k, const uint32_t numteams);
33+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
3434
void _INLINE_ATTR_ __kmpc_xteamr_i_16x64
3535
(int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int),
3636
void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv,
37-
const uint64_t k, const uint32_t numteams);
37+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
3838
void _INLINE_ATTR_ __kmpc_xteamr_ui_16x64
3939
(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI),
4040
void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv,
41-
const uint64_t k, const uint32_t numteams);
41+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
4242
void _INLINE_ATTR_ __kmpc_xteamr_l_16x64
4343
(long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long),
4444
void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv,
45-
const uint64_t k, const uint32_t numteams);
45+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
4646
void _INLINE_ATTR_ __kmpc_xteamr_ul_16x64
4747
(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL),
4848
void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv,
49-
const uint64_t k, const uint32_t numteams);
49+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
5050
void _INLINE_ATTR_ __kmpc_xteamr_d_32x32
5151
(double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double),
5252
void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv,
53-
const uint64_t k, const uint32_t numteams);
53+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
5454
void _INLINE_ATTR_ __kmpc_xteamr_f_32x32
5555
(float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float),
5656
void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv,
57-
const uint64_t k, const uint32_t numteams);
57+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
5858
void _INLINE_ATTR_ __kmpc_xteamr_cd_32x32
5959
(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD),
6060
void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv,
61-
const uint64_t k, const uint32_t numteams);
61+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
6262
void _INLINE_ATTR_ __kmpc_xteamr_cf_32x32
6363
(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF),
6464
void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv,
65-
const uint64_t k, const uint32_t numteams);
65+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
6666
void _INLINE_ATTR_ __kmpc_xteamr_i_32x32
6767
(int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int),
6868
void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv,
69-
const uint64_t k, const uint32_t numteams);
69+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
7070
void _INLINE_ATTR_ __kmpc_xteamr_ui_32x32
7171
(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI),
7272
void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv,
73-
const uint64_t k, const uint32_t numteams);
73+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
7474
void _INLINE_ATTR_ __kmpc_xteamr_l_32x32
7575
(long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long),
7676
void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv,
77-
const uint64_t k, const uint32_t numteams);
77+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
7878
void _INLINE_ATTR_ __kmpc_xteamr_ul_32x32
7979
(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL),
8080
void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv,
81-
const uint64_t k, const uint32_t numteams);
81+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */);
8282
void __kmpc_rfun_sum_d(double *val, double otherval);
8383
void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval);
8484
void __kmpc_rfun_sum_f(float *val, float otherval);
@@ -133,67 +133,67 @@ extern "C" {
133133
void __kmpc_xteamr_d_16x64
134134
(double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double),
135135
void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv,
136-
const uint64_t k, const uint32_t numteams){};
136+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
137137
void __kmpc_xteamr_f_16x64
138138
(float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float),
139139
void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv,
140-
const uint64_t k, const uint32_t numteams){};
140+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
141141
void __kmpc_xteamr_cd_16x64
142142
(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD),
143143
void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv,
144-
const uint64_t k, const uint32_t numteams){};
144+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
145145
void __kmpc_xteamr_cf_16x64
146146
(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF),
147147
void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv,
148-
const uint64_t k, const uint32_t numteams){};
148+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
149149
void __kmpc_xteamr_i_16x64
150150
(int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int),
151151
void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv,
152-
const uint64_t k, const uint32_t numteams){};
152+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
153153
void __kmpc_xteamr_ui_16x64
154154
(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI),
155155
void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv,
156-
const uint64_t k, const uint32_t numteams){};
156+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
157157
void __kmpc_xteamr_l_16x64
158158
(long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long),
159159
void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv,
160-
const uint64_t k, const uint32_t numteams){};
160+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
161161
void __kmpc_xteamr_ul_16x64
162162
(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL),
163163
void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv,
164-
const uint64_t k, const uint32_t numteams){};
164+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
165165
void __kmpc_xteamr_d_32x32
166166
(double v, double *r_ptr, double *tvs, uint32_t *td, void (*_rf)(double *, double),
167167
void (*_rf_lds)(_RF_LDS double *, _RF_LDS double *), const double iv,
168-
const uint64_t k, const uint32_t numteams){};
168+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
169169
void __kmpc_xteamr_f_32x32
170170
(float v, float *r_ptr, float *tvs, uint32_t *td, void (*_rf)(float *, float),
171171
void (*_rf_lds)(_RF_LDS float *, _RF_LDS float *), const float iv,
172-
const uint64_t k, const uint32_t numteams){};
172+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
173173
void __kmpc_xteamr_cd_32x32
174174
(_CD v, _CD *r_ptr, _CD *tvs, uint32_t *td, void (*_rf)(_CD *, _CD),
175175
void (*_rf_lds)(_RF_LDS _CD *, _RF_LDS _CD *), const _CD iv,
176-
const uint64_t k, const uint32_t numteams){};
176+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
177177
void __kmpc_xteamr_cf_32x32
178178
(_CF v, _CF *r_ptr, _CF *tvs, uint32_t *td, void (*_rf)(_CF *, _CF),
179179
void (*_rf_lds)(_RF_LDS _CF *, _RF_LDS _CF *), const _CF iv,
180-
const uint64_t k, const uint32_t numteams){};
180+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
181181
void __kmpc_xteamr_i_32x32
182182
(int v, int *r_ptr, int *tvs, uint32_t *td, void (*_rf)(int *, int),
183183
void (*_rf_lds)(_RF_LDS int *, _RF_LDS int *), const int iv,
184-
const uint64_t k, const uint32_t numteams){};
184+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
185185
void __kmpc_xteamr_ui_32x32
186186
(_UI v, _UI *r_ptr, _UI *tvs, uint32_t *td, void (*_rf)(_UI *, _UI),
187187
void (*_rf_lds)(_RF_LDS _UI *, _RF_LDS _UI *), const _UI iv,
188-
const uint64_t k, const uint32_t numteams){};
188+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
189189
void __kmpc_xteamr_l_32x32
190190
(long v, long *r_ptr, long *tvs, uint32_t *td, void (*_rf)(long *, long),
191191
void (*_rf_lds)(_RF_LDS long *, _RF_LDS long *), const long iv,
192-
const uint64_t k, const uint32_t numteams){};
192+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
193193
void __kmpc_xteamr_ul_32x32
194194
(_UL v, _UL *r_ptr, _UL *tvs, uint32_t *td, void (*_rf)(_UL *, _UL),
195195
void (*_rf_lds)(_RF_LDS _UL *, _RF_LDS _UL *), const _UL iv,
196-
const uint64_t k, const uint32_t numteams){};
196+
const uint64_t k, const uint32_t numteams, int32_t Scope = 1 /* device */){};
197197
void __kmpc_rfun_sum_d(double *val, double otherval){}
198198
void __kmpc_rfun_sum_lds_d(_RF_LDS double *val, _RF_LDS double *otherval){}
199199
void __kmpc_rfun_sum_f(float *val, float otherval){}
File renamed without changes.
File renamed without changes.

test/smoke/xteamr_extended/test_xteamr.cpp renamed to test/smoke-dev/xteamr_extended/test_xteamr.cpp

Lines changed: 3 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -58,43 +58,16 @@ unsigned int ignore_times = 2; // ignore this many timings first
5858
#define _XTEAM_NUM_TEAMS 104
5959
#endif
6060

61-
#if _XTEAM_NUM_THREADS == 1024
61+
#if (_XTEAM_NUM_THREADS <= 1024) && \
62+
((_XTEAM_NUM_THREADS & (_XTEAM_NUM_THREADS - 1)) == 0)
6263
#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_16x64
6364
#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_32x32
6465
#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_16x64
6566
#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_32x32
6667
#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_16x64
6768
#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_32x32
68-
#elif _XTEAM_NUM_THREADS == 512
69-
#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_8x64
70-
#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_16x32
71-
#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_8x64
72-
#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_16x32
73-
#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_8x64
74-
#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_16x32
75-
#elif _XTEAM_NUM_THREADS == 256
76-
#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_4x64
77-
#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_8x32
78-
#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_4x64
79-
#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_8x32
80-
#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_4x64
81-
#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_8x32
82-
#elif _XTEAM_NUM_THREADS == 128
83-
#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_2x64
84-
#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_4x32
85-
#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_2x64
86-
#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_4x32
87-
#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_2x64
88-
#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_4x32
89-
#elif _XTEAM_NUM_THREADS == 64
90-
#define _SUM_OVERLOAD_64_FCT _overload_to_extern_sum_1x64
91-
#define _SUM_OVERLOAD_32_FCT _overload_to_extern_sum_2x32
92-
#define _MAX_OVERLOAD_64_FCT _overload_to_extern_max_1x64
93-
#define _MAX_OVERLOAD_32_FCT _overload_to_extern_max_2x32
94-
#define _MIN_OVERLOAD_64_FCT _overload_to_extern_min_1x64
95-
#define _MIN_OVERLOAD_32_FCT _overload_to_extern_min_2x32
9669
#else
97-
#error Invalid value for _XTEAM_NUM_THREADS. Must be 1024, 512, 256, 128, or 64
70+
#error Invalid value for _XTEAM_NUM_THREADS. Expected upper limit: 1024 and a power of 2.
9871
#endif
9972

10073
// Question to Dhruva, should the limiter include the stride?

0 commit comments

Comments
 (0)