Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit da49f54

Browse files
committed
Make float sums not depend on zero initialization
1 parent d1ec6a4 commit da49f54

File tree

2 files changed

+39
-34
lines changed

2 files changed

+39
-34
lines changed

omniscidb/QueryEngine/Compiler/genx.cpp

Lines changed: 39 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ inline int64_t hdk_double_as_int64_t(const double x) {
2727
inline double hdk_int64_t_as_double(const int64_t x) {
2828
return *reinterpret_cast<const double*>(&x);
2929
}
30-
31-
template <class T>
32-
inline constexpr T hdk_min(const T lhs, const T rhs) {
33-
return lhs < rhs ? lhs : rhs;
34-
}
3530
} // namespace
3631

3732
extern "C" {
@@ -51,6 +46,8 @@ int64_t get_thread_index();
5146
int64_t get_block_dim();
5247

5348
int32_t agg_sum_int32_shared(GENERIC_ADDR_SPACE int32_t* agg, const int32_t val);
49+
void agg_sum_float_shared(GENERIC_ADDR_SPACE int32_t* agg, const float val);
50+
void agg_sum_double_shared(GENERIC_ADDR_SPACE int64_t* agg, const double val);
5451
int64_t agg_sum_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
5552
void agg_max_int32_shared(GENERIC_ADDR_SPACE int32_t* agg, const int32_t val);
5653
void agg_max_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
@@ -93,7 +90,7 @@ double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val) {
9390
old = atomic_cas_int_32(
9491
address_as_ull,
9592
assumed,
96-
hdk_float_as_int32_t(hdk_min(val, hdk_int32_t_as_float(assumed))));
93+
hdk_float_as_int32_t(std::min(val, hdk_int32_t_as_float(assumed))));
9794
} while (assumed != old);
9895

9996
return hdk_int32_t_as_float(old);
@@ -109,7 +106,7 @@ double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val) {
109106
old = atomic_cas_int_64(
110107
address_as_ull,
111108
assumed,
112-
hdk_double_as_int64_t(hdk_min(val, hdk_int64_t_as_double(assumed))));
109+
hdk_double_as_int64_t(std::min(val, hdk_int64_t_as_double(assumed))));
113110
} while (assumed != old);
114111

115112
return hdk_int64_t_as_double(old);
@@ -123,7 +120,7 @@ void atomicMinFltSkipVal(GENERIC_ADDR_SPACE int32_t* addr,
123120
agg_min_float_shared(addr,
124121
old == hdk_float_as_int32_t(skip_val)
125122
? val
126-
: hdk_min(hdk_int32_t_as_float(old), val));
123+
: std::min(hdk_int32_t_as_float(old), val));
127124
}
128125

129126
void atomicMinDblSkipVal(GENERIC_ADDR_SPACE int64_t* addr,
@@ -134,7 +131,7 @@ void atomicMinDblSkipVal(GENERIC_ADDR_SPACE int64_t* addr,
134131
agg_min_double_shared(addr,
135132
old == hdk_double_as_int64_t(skip_val)
136133
? val
137-
: hdk_min(hdk_int64_t_as_double(old), val));
134+
: std::min(hdk_int64_t_as_double(old), val));
138135
}
139136

140137
void agg_min_float_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
@@ -163,16 +160,26 @@ void agg_max_double_shared(GENERIC_ADDR_SPACE int64_t* agg, const double val) {
163160
void agg_max_float_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
164161
const float val,
165162
const float skip_val) {
166-
if (val != skip_val) {
167-
agg_max_float_shared(agg, val);
163+
if (hdk_float_as_int32_t(val) != hdk_float_as_int32_t(skip_val)) {
164+
const int32_t flt_max = hdk_float_as_int32_t(-HDK_FLT_MAX);
165+
int32_t old = atomic_xchg_int_32(agg, flt_max);
166+
agg_max_float_shared(agg,
167+
old == hdk_float_as_int32_t(skip_val)
168+
? val
169+
: std::max(hdk_int32_t_as_float(old), val));
168170
}
169171
}
170172

171173
void agg_max_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
172174
const double val,
173175
const double skip_val) {
174-
if (val != skip_val) {
175-
agg_max_double_shared(agg, val);
176+
if (hdk_double_as_int64_t(val) != hdk_double_as_int64_t(skip_val)) {
177+
const int64_t dbl_max = hdk_double_as_int64_t(-HDK_DBL_MAX);
178+
int64_t old = atomic_xchg_int_64(agg, dbl_max);
179+
agg_max_double_shared(agg,
180+
old == hdk_double_as_int64_t(skip_val)
181+
? val
182+
: std::max(hdk_int64_t_as_double(old), val));
176183
}
177184
}
178185

@@ -202,6 +209,25 @@ int32_t agg_sum_int32_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
202209
return 0;
203210
}
204211

212+
void agg_sum_float_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
213+
const float val,
214+
const float skip_val) {
215+
if (hdk_float_as_int32_t(val) != hdk_float_as_int32_t(skip_val)) {
216+
int32_t old = atomic_xchg_int_32(agg, hdk_float_as_int32_t(0.f));
217+
agg_sum_float_shared(agg, old == hdk_float_as_int32_t(skip_val) ? val : (val + old));
218+
}
219+
}
220+
221+
void agg_sum_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
222+
const double val,
223+
const double skip_val) {
224+
if (hdk_double_as_int64_t(val) != hdk_double_as_int64_t(skip_val)) {
225+
int64_t old = atomic_xchg_int_64(agg, hdk_double_as_int64_t(0.));
226+
agg_sum_double_shared(agg,
227+
old == hdk_double_as_int64_t(skip_val) ? val : (val + old));
228+
}
229+
}
230+
205231
int64_t agg_sum_int64_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
206232
const int64_t val,
207233
const int64_t skip_val) {

omniscidb/QueryEngine/Compiler/genx.ll

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -128,17 +128,6 @@ define void @agg_sum_float_shared(i32 addrspace(4)* %agg, float noundef %val) {
128128
ret void
129129
}
130130

131-
; fixme
132-
define void @agg_sum_float_skip_val_shared(i32 addrspace(4)* %agg, float noundef %val, float noundef %skip_val) {
133-
%no_skip = fcmp one float %val, %skip_val
134-
br i1 %no_skip, label %.noskip, label %.skip
135-
.noskip:
136-
call void @agg_sum_float_shared(i32 addrspace(4)* %agg, float noundef %val)
137-
br label %.skip
138-
.skip:
139-
ret void
140-
}
141-
142131
define void @agg_sum_double_shared(i64 addrspace(4)* %agg, double noundef %val) {
143132
.entry:
144133
%orig = load atomic i64, i64 addrspace(4)* %agg unordered, align 8
@@ -157,16 +146,6 @@ define void @agg_sum_double_shared(i64 addrspace(4)* %agg, double noundef %val)
157146
ret void
158147
}
159148

160-
define void @agg_sum_double_skip_val_shared(i64 addrspace(4)* %agg, double noundef %val, double noundef %skip_val) {
161-
%no_skip = fcmp one double %val, %skip_val
162-
br i1 %no_skip, label %.noskip, label %.skip
163-
.noskip:
164-
call void @agg_sum_double_shared(i64 addrspace(4)* %agg, double noundef %val)
165-
br label %.skip
166-
.skip:
167-
ret void
168-
}
169-
170149
define void @atomic_or(i32 addrspace(4)* %addr, i32 noundef %val) {
171150
.entry:
172151
%orig = load atomic i32, i32 addrspace(4)* %addr unordered, align 8

0 commit comments

Comments
 (0)