@@ -27,11 +27,6 @@ inline int64_t hdk_double_as_int64_t(const double x) {
27
27
inline double hdk_int64_t_as_double (const int64_t x) {
28
28
return *reinterpret_cast <const double *>(&x);
29
29
}
30
-
31
- template <class T >
32
- inline constexpr T hdk_min (const T lhs, const T rhs) {
33
- return lhs < rhs ? lhs : rhs;
34
- }
35
30
} // namespace
36
31
37
32
extern " C" {
@@ -51,6 +46,8 @@ int64_t get_thread_index();
51
46
int64_t get_block_dim ();
52
47
53
48
int32_t agg_sum_int32_shared (GENERIC_ADDR_SPACE int32_t * agg, const int32_t val);
49
+ void agg_sum_float_shared (GENERIC_ADDR_SPACE int32_t * agg, const float val);
50
+ void agg_sum_double_shared (GENERIC_ADDR_SPACE int64_t * agg, const double val);
54
51
int64_t agg_sum_shared (GENERIC_ADDR_SPACE int64_t * agg, const int64_t val);
55
52
void agg_max_int32_shared (GENERIC_ADDR_SPACE int32_t * agg, const int32_t val);
56
53
void agg_max_shared (GENERIC_ADDR_SPACE int64_t * agg, const int64_t val);
@@ -93,7 +90,7 @@ double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val) {
93
90
old = atomic_cas_int_32 (
94
91
address_as_ull,
95
92
assumed,
96
- hdk_float_as_int32_t (hdk_min (val, hdk_int32_t_as_float (assumed))));
93
+ hdk_float_as_int32_t (std::min (val, hdk_int32_t_as_float (assumed))));
97
94
} while (assumed != old);
98
95
99
96
return hdk_int32_t_as_float (old);
@@ -109,7 +106,7 @@ double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val) {
109
106
old = atomic_cas_int_64 (
110
107
address_as_ull,
111
108
assumed,
112
- hdk_double_as_int64_t (hdk_min (val, hdk_int64_t_as_double (assumed))));
109
+ hdk_double_as_int64_t (std::min (val, hdk_int64_t_as_double (assumed))));
113
110
} while (assumed != old);
114
111
115
112
return hdk_int64_t_as_double (old);
@@ -123,7 +120,7 @@ void atomicMinFltSkipVal(GENERIC_ADDR_SPACE int32_t* addr,
123
120
agg_min_float_shared (addr,
124
121
old == hdk_float_as_int32_t (skip_val)
125
122
? val
126
- : hdk_min (hdk_int32_t_as_float (old), val));
123
+ : std::min (hdk_int32_t_as_float (old), val));
127
124
}
128
125
129
126
void atomicMinDblSkipVal (GENERIC_ADDR_SPACE int64_t * addr,
@@ -134,7 +131,7 @@ void atomicMinDblSkipVal(GENERIC_ADDR_SPACE int64_t* addr,
134
131
agg_min_double_shared (addr,
135
132
old == hdk_double_as_int64_t (skip_val)
136
133
? val
137
- : hdk_min (hdk_int64_t_as_double (old), val));
134
+ : std::min (hdk_int64_t_as_double (old), val));
138
135
}
139
136
140
137
void agg_min_float_skip_val_shared (GENERIC_ADDR_SPACE int32_t * agg,
@@ -163,16 +160,26 @@ void agg_max_double_shared(GENERIC_ADDR_SPACE int64_t* agg, const double val) {
163
160
void agg_max_float_skip_val_shared (GENERIC_ADDR_SPACE int32_t * agg,
164
161
const float val,
165
162
const float skip_val) {
166
- if (val != skip_val) {
167
- agg_max_float_shared (agg, val);
163
+ if (hdk_float_as_int32_t (val) != hdk_float_as_int32_t (skip_val)) {
164
+ const int32_t flt_max = hdk_float_as_int32_t (-HDK_FLT_MAX);
165
+ int32_t old = atomic_xchg_int_32 (agg, flt_max);
166
+ agg_max_float_shared (agg,
167
+ old == hdk_float_as_int32_t (skip_val)
168
+ ? val
169
+ : std::max (hdk_int32_t_as_float (old), val));
168
170
}
169
171
}
170
172
171
173
void agg_max_double_skip_val_shared (GENERIC_ADDR_SPACE int64_t * agg,
172
174
const double val,
173
175
const double skip_val) {
174
- if (val != skip_val) {
175
- agg_max_double_shared (agg, val);
176
+ if (hdk_double_as_int64_t (val) != hdk_double_as_int64_t (skip_val)) {
177
+ const int64_t dbl_max = hdk_double_as_int64_t (-HDK_DBL_MAX);
178
+ int64_t old = atomic_xchg_int_64 (agg, dbl_max);
179
+ agg_max_double_shared (agg,
180
+ old == hdk_double_as_int64_t (skip_val)
181
+ ? val
182
+ : std::max (hdk_int64_t_as_double (old), val));
176
183
}
177
184
}
178
185
@@ -202,6 +209,25 @@ int32_t agg_sum_int32_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
202
209
return 0 ;
203
210
}
204
211
212
+ void agg_sum_float_skip_val_shared (GENERIC_ADDR_SPACE int32_t * agg,
213
+ const float val,
214
+ const float skip_val) {
215
+ if (hdk_float_as_int32_t (val) != hdk_float_as_int32_t (skip_val)) {
216
+ int32_t old = atomic_xchg_int_32 (agg, hdk_float_as_int32_t (0 .f ));
217
+ agg_sum_float_shared (agg, old == hdk_float_as_int32_t (skip_val) ? val : (val + old));
218
+ }
219
+ }
220
+
221
+ void agg_sum_double_skip_val_shared (GENERIC_ADDR_SPACE int64_t * agg,
222
+ const double val,
223
+ const double skip_val) {
224
+ if (hdk_double_as_int64_t (val) != hdk_double_as_int64_t (skip_val)) {
225
+ int64_t old = atomic_xchg_int_64 (agg, hdk_double_as_int64_t (0 .));
226
+ agg_sum_double_shared (agg,
227
+ old == hdk_double_as_int64_t (skip_val) ? val : (val + old));
228
+ }
229
+ }
230
+
205
231
int64_t agg_sum_int64_skip_val_shared (GENERIC_ADDR_SPACE int64_t * agg,
206
232
const int64_t val,
207
233
const int64_t skip_val) {
0 commit comments