Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit f01cc56

Browse files
committed
Add C++ agg_min_*_shared implementations
1 parent 8758084 commit f01cc56

File tree

2 files changed

+84
-43
lines changed

2 files changed

+84
-43
lines changed

omniscidb/QueryEngine/Compiler/genx.cpp

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,34 @@
66

77
#include <algorithm>
88
#include <cstdint>
9+
#include <limits>
910

1011
#include "Shared/funcannotations.h"
1112

13+
namespace {
14+
constexpr float HDK_FLT_MIN = std::numeric_limits<float>::min();
15+
constexpr float HDK_FLT_MAX = std::numeric_limits<float>::max();
16+
constexpr double HDK_DBL_MIN = std::numeric_limits<double>::min();
17+
constexpr double HDK_DBL_MAX = std::numeric_limits<double>::max();
18+
inline int32_t hdk_float_as_int32_t(const float x) {
19+
return *reinterpret_cast<const int32_t*>(&x);
20+
}
21+
inline float hdk_int32_t_as_float(const int32_t x) {
22+
return *reinterpret_cast<const float*>(&x);
23+
}
24+
inline int64_t hdk_double_as_int64_t(const double x) {
25+
return *reinterpret_cast<const int64_t*>(&x);
26+
}
27+
inline double hdk_int64_t_as_double(const int64_t x) {
28+
return *reinterpret_cast<const double*>(&x);
29+
}
30+
31+
template <class T>
32+
inline constexpr T hdk_min(const T lhs, const T rhs) {
33+
return lhs < rhs ? lhs : rhs;
34+
}
35+
} // namespace
36+
1237
extern "C" {
1338
int64_t atomic_cas_int_64(GENERIC_ADDR_SPACE int64_t*, int64_t, int64_t);
1439
int32_t atomic_cas_int_32(GENERIC_ADDR_SPACE int32_t*, int32_t, int32_t);
@@ -58,19 +83,73 @@ void agg_min_double_shared(GENERIC_ADDR_SPACE int64_t* agg, const double val) {
5883
atomic_min_double(reinterpret_cast<GENERIC_ADDR_SPACE double*>(agg), val);
5984
}
6085

86+
double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val) {
87+
GENERIC_ADDR_SPACE int32_t* address_as_ull =
88+
reinterpret_cast<GENERIC_ADDR_SPACE int32_t*>(addr);
89+
int32_t old = *address_as_ull, assumed;
90+
91+
do {
92+
assumed = old;
93+
old = atomic_cas_int_32(
94+
address_as_ull,
95+
assumed,
96+
hdk_float_as_int32_t(hdk_min(val, hdk_int32_t_as_float(assumed))));
97+
} while (assumed != old);
98+
99+
return hdk_int32_t_as_float(old);
100+
}
101+
102+
double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val) {
103+
GENERIC_ADDR_SPACE int64_t* address_as_ull =
104+
reinterpret_cast<GENERIC_ADDR_SPACE int64_t*>(addr);
105+
int64_t old = *address_as_ull, assumed;
106+
107+
do {
108+
assumed = old;
109+
old = atomic_cas_int_64(
110+
address_as_ull,
111+
assumed,
112+
hdk_double_as_int64_t(hdk_min(val, hdk_int64_t_as_double(assumed))));
113+
} while (assumed != old);
114+
115+
return hdk_int64_t_as_double(old);
116+
}
117+
118+
void atomicMinFltSkipVal(GENERIC_ADDR_SPACE int32_t* addr,
119+
const float val,
120+
const float skip_val) {
121+
const int32_t flt_max = hdk_float_as_int32_t(HDK_FLT_MAX);
122+
int32_t old = atomic_xchg_int_32(addr, flt_max);
123+
agg_min_float_shared(addr,
124+
old == hdk_float_as_int32_t(skip_val)
125+
? val
126+
: hdk_min(hdk_int32_t_as_float(old), val));
127+
}
128+
129+
void atomicMinDblSkipVal(GENERIC_ADDR_SPACE int64_t* addr,
130+
const double val,
131+
const double skip_val) {
132+
const int64_t dbl_max = hdk_double_as_int64_t(HDK_DBL_MAX);
133+
int64_t old = atomic_xchg_int_64(addr, dbl_max);
134+
agg_min_double_shared(addr,
135+
old == hdk_double_as_int64_t(skip_val)
136+
? val
137+
: hdk_min(hdk_int64_t_as_double(old), val));
138+
}
139+
61140
void agg_min_float_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
62141
const float val,
63142
const float skip_val) {
64-
if (val != skip_val) {
65-
agg_min_float_shared(agg, val);
143+
if (hdk_float_as_int32_t(val) != hdk_float_as_int32_t(skip_val)) {
144+
atomicMinFltSkipVal(agg, val, skip_val);
66145
}
67146
}
68147

69148
void agg_min_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
70149
const double val,
71150
const double skip_val) {
72-
if (val != skip_val) {
73-
agg_min_double_shared(agg, val);
151+
if (hdk_double_as_int64_t(val) != hdk_double_as_int64_t(skip_val)) {
152+
atomicMinDblSkipVal(agg, val, skip_val);
74153
}
75154
}
76155

omniscidb/QueryEngine/Compiler/genx.ll

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ define void @agg_sum_float_shared(i32 addrspace(4)* %agg, float noundef %val) {
128128
ret void
129129
}
130130

131+
; fixme
131132
define void @agg_sum_float_skip_val_shared(i32 addrspace(4)* %agg, float noundef %val, float noundef %skip_val) {
132133
%no_skip = fcmp one float %val, %skip_val
133134
br i1 %no_skip, label %.noskip, label %.skip
@@ -195,45 +196,6 @@ define void @atomic_or(i32 addrspace(4)* %addr, i32 noundef %val) {
195196
ret void
196197
}
197198

198-
define double @atomic_min_float(float addrspace(4)* %addr, float noundef %val) {
199-
.entry:
200-
%orig = load float, float addrspace(4)* %addr, align 8
201-
br label %.loop
202-
.loop:
203-
%loaded = phi float [ %orig, %.entry], [ %old.cst, %.loop ]
204-
%isless = fcmp olt float %val, %loaded
205-
%min = select i1 %isless, float %val, float %loaded
206-
%min.cst = bitcast float %min to i32
207-
%loaded.cst = bitcast float %loaded to i32
208-
%addr.cst = bitcast float addrspace(4)* %addr to i32 addrspace(4)*
209-
%old = call i32 @atomic_cas_int_32(i32 addrspace(4)* %addr.cst, i32 %loaded.cst, i32 %min.cst)
210-
%old.cst = bitcast i32 %old to float
211-
%success = icmp eq i32 %old, %loaded.cst
212-
br i1 %success, label %.exit, label %.loop
213-
.exit:
214-
%res = fpext float %old.cst to double
215-
ret double %res
216-
}
217-
218-
define double @atomic_min_double(double addrspace(4)* %addr, double noundef %val) {
219-
.entry:
220-
%orig = load double, double addrspace(4)* %addr, align 8
221-
br label %.loop
222-
.loop:
223-
%loaded = phi double [ %orig, %.entry], [ %old.cst, %.loop ]
224-
%isless = fcmp olt double %val, %loaded
225-
%min = select i1 %isless, double %val, double %loaded
226-
%min.cst = bitcast double %min to i64
227-
%loaded.cst = bitcast double %loaded to i64
228-
%addr.cst = bitcast double addrspace(4)* %addr to i64 addrspace(4)*
229-
%old = call i64 @atomic_cas_int_64(i64 addrspace(4)* %addr.cst, i64 %loaded.cst, i64 %min.cst)
230-
%old.cst = bitcast i64 %old to double
231-
%success = icmp eq i64 %old, %loaded.cst
232-
br i1 %success, label %.exit, label %.loop
233-
.exit:
234-
ret double %old.cst
235-
}
236-
237199
define double @atomic_max_float(float addrspace(4)* %addr, float noundef %val) {
238200
.entry:
239201
%orig = load float, float addrspace(4)* %addr, align 8

0 commit comments

Comments
 (0)