Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 8758084

Browse files
committed
Add smem aggregations implementations
1 parent 7c45fa5 commit 8758084

File tree

2 files changed

+71
-0
lines changed

2 files changed

+71
-0
lines changed

omniscidb/QueryEngine/Compiler/genx.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ void sync_threadblock();
2525
int64_t get_thread_index();
2626
int64_t get_block_dim();
2727

28+
int32_t agg_sum_int32_shared(GENERIC_ADDR_SPACE int32_t* agg, const int32_t val);
29+
int64_t agg_sum_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
30+
void agg_max_int32_shared(GENERIC_ADDR_SPACE int32_t* agg, const int32_t val);
2831
void agg_max_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
2932
int64_t agg_count_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
3033
uint32_t agg_count_int32_shared(GENERIC_ADDR_SPACE uint32_t* agg, const int32_t val);
@@ -94,6 +97,50 @@ void agg_max_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
9497
}
9598
}
9699

100+
int32_t atomicSum32SkipVal(GENERIC_ADDR_SPACE int32_t* addr,
101+
const int32_t val,
102+
const int32_t skip_val) {
103+
int32_t old = atomic_xchg_int_32(addr, 0);
104+
int32_t old2 = agg_sum_int32_shared(addr, old == skip_val ? val : (val + old));
105+
return old == skip_val ? old2 : (old2 + old);
106+
}
107+
108+
int64_t atomicSum64SkipVal(GENERIC_ADDR_SPACE int64_t* addr,
109+
const int64_t val,
110+
const int64_t skip_val) {
111+
int32_t old = atomic_xchg_int_64(addr, 0);
112+
int32_t old2 = agg_sum_shared(addr, old == skip_val ? val : (val + old));
113+
return old == skip_val ? old2 : (old2 + old);
114+
}
115+
116+
int32_t agg_sum_int32_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
117+
const int32_t val,
118+
const int32_t skip_val) {
119+
if (val != skip_val) {
120+
const int32_t old = atomicSum32SkipVal(agg, val, skip_val);
121+
return old;
122+
}
123+
return 0;
124+
}
125+
126+
int64_t agg_sum_int64_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
127+
const int64_t val,
128+
const int64_t skip_val) {
129+
if (val != skip_val) {
130+
const int64_t old = atomicSum64SkipVal(agg, val, skip_val);
131+
return old;
132+
}
133+
return 0;
134+
}
135+
136+
void agg_max_int32_skip_val_shared(GENERIC_ADDR_SPACE int32_t* agg,
137+
const int32_t val,
138+
const int32_t skip_val) {
139+
if (val != skip_val) {
140+
agg_max_int32_shared(agg, val);
141+
}
142+
}
143+
97144
const GENERIC_ADDR_SPACE int64_t* init_shared_mem(
98145
const GENERIC_ADDR_SPACE int64_t* global_groups_buffer,
99146
const int32_t groups_buffer_size) {

omniscidb/QueryEngine/Compiler/genx.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,11 @@ define i32 @atomic_xchg_int_32(i32 addrspace(4)* %p, i32 %val) {
309309
ret i32 %old
310310
}
311311

312+
define void @agg_max_int32_shared(i32 addrspace(4)* %agg, i32 noundef %val) {
313+
%old = atomicrmw max i32 addrspace(4)* %agg, i32 %val monotonic
314+
ret void
315+
}
316+
312317
define void @agg_max_shared(i64 addrspace(4)* %agg, i64 noundef %val) {
313318
%old = atomicrmw max i64 addrspace(4)* %agg, i64 %val monotonic
314319
ret void
@@ -330,6 +335,7 @@ define void @agg_min_shared(i64 addrspace(4)* %agg, i64 noundef %val) {
330335
}
331336

332337
declare i64 @llvm.smin.i64(i64, i64)
338+
declare i32 @llvm.smin.i32(i32, i32)
333339

334340
define void @agg_min_skip_val_shared(i64 addrspace(4)* %agg, i64 noundef %val, i64 noundef %skip_val) {
335341
%no_skip = icmp ne i64 %val, %skip_val
@@ -348,3 +354,21 @@ define void @agg_min_skip_val_shared(i64 addrspace(4)* %agg, i64 noundef %val, i
348354
.skip:
349355
ret void
350356
}
357+
358+
define void @agg_min_int32_skip_val_shared(i32 addrspace(4)* %agg, i32 noundef %val, i32 noundef %skip_val) {
359+
%no_skip = icmp ne i32 %val, %skip_val
360+
br i1 %no_skip, label %.noskip, label %.skip
361+
.noskip:
362+
%orig = load atomic i32, i32 addrspace(4)* %agg unordered, align 8
363+
br label %.loop
364+
.loop:
365+
%loaded = phi i32 [ %orig, %.noskip ], [ %old, %.loop ]
366+
%isnull = icmp eq i32 %loaded, %skip_val
367+
%min = call i32 @llvm.smin.i32(i32 %loaded, i32 %val)
368+
%st = select i1 %isnull, i32 %val, i32 %min
369+
%old = call i32 @atomic_cas_int_32(i32 addrspace(4)* %agg, i32 %loaded, i32 %st)
370+
%success = icmp eq i32 %old, %loaded
371+
br i1 %success, label %.skip, label %.loop
372+
.skip:
373+
ret void
374+
}

0 commit comments

Comments
 (0)