Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 65346ab

Browse files
committed
Replace init_shared_mem definition with a C++ one
1 parent 1dac6a1 commit 65346ab

File tree

2 files changed

+23
-24
lines changed

2 files changed

+23
-24
lines changed

omniscidb/QueryEngine/Compiler/genx.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val);
1818
double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val);
1919
double atomic_max_double(GENERIC_ADDR_SPACE double* addr, const double val);
2020
double atomic_max_float(GENERIC_ADDR_SPACE float* addr, const float val);
21+
GENERIC_ADDR_SPACE int64_t* declare_dynamic_shared_memory();
22+
23+
void sync_threadblock();
24+
int64_t get_thread_index();
25+
int64_t get_block_dim();
2126

2227
void agg_max_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
2328
int64_t agg_count_shared(GENERIC_ADDR_SPACE int64_t* agg, const int64_t val);
@@ -87,4 +92,17 @@ void agg_max_double_skip_val_shared(GENERIC_ADDR_SPACE int64_t* agg,
8792
agg_max_double_shared(agg, val);
8893
}
8994
}
95+
96+
const GENERIC_ADDR_SPACE int64_t* init_shared_mem(
97+
const GENERIC_ADDR_SPACE int64_t* global_groups_buffer,
98+
const int32_t groups_buffer_size) {
99+
auto shared_groups_buffer = declare_dynamic_shared_memory();
100+
const int32_t buffer_units = groups_buffer_size >> 3;
101+
102+
for (int32_t pos = get_thread_index(); pos < buffer_units; pos += get_block_dim()) {
103+
shared_groups_buffer[pos] = global_groups_buffer[pos];
104+
}
105+
sync_threadblock();
106+
return shared_groups_buffer;
107+
}
90108
}

omniscidb/QueryEngine/Compiler/genx.ll

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ define void @sync_threadblock() {
2525
ret void
2626
}
2727

28+
define i64 @get_block_dim() {
29+
%gsize = call i64 @__spirv_BuiltInWorkgroupSize(i32 0)
30+
ret i64 %gsize
31+
}
32+
2833
define i32 @pos_start_impl(i32 addrspace(4)* %0) readnone nounwind alwaysinline {
2934
%gid = call i64 @__spirv_BuiltInWorkgroupId(i32 0)
3035
%gsize = call i64 @__spirv_BuiltInWorkgroupSize(i32 0)
@@ -322,30 +327,6 @@ define void @agg_count_distinct_bitmap_gpu(i64 addrspace(4)* %agg, i64 noundef %
322327
ret void
323328
}
324329

325-
define i64 addrspace(4)* @init_shared_mem(i64 addrspace(4)* %agg_init_val, i32 noundef %groups_buffer_size) {
326-
.entry:
327-
%buf.units = ashr i32 %groups_buffer_size, 3
328-
%buf.units.i64 = sext i32 %buf.units to i64
329-
%pos = call i64 @get_thread_index()
330-
%wgnum = call i64 @__spirv_BuiltInNumWorkgroups(i32 0)
331-
%loop.cond = icmp slt i64 %pos, %buf.units.i64
332-
br i1 %loop.cond, label %.for_body, label %.exit
333-
.for_body:
334-
%pos.idx = phi i64 [ %pos, %.entry ], [ %pos.idx.new, %.for_body ]
335-
%agg_init_val.idx = getelementptr inbounds i64, i64 addrspace(4)* %agg_init_val, i64 %pos.idx
336-
%slm.idx = getelementptr inbounds [4096 x i64], [4096 x i64] addrspace(3)* @slm.buf.i64, i64 0, i64 %pos.idx
337-
%val = load i64, i64 addrspace(4)* %agg_init_val.idx
338-
store i64 %val, i64 addrspace(3)* %slm.idx
339-
%pos.idx.new = add nsw i64 %pos.idx, %wgnum
340-
%cond = icmp slt i64 %pos.idx.new, %buf.units.i64
341-
br i1 %cond, label %.for_body, label %.exit
342-
.exit:
343-
call void @sync_threadblock()
344-
%res.ptr = bitcast [4096 x i64] addrspace(3)* @slm.buf.i64 to i64 addrspace(3)*
345-
%res.ptr.casted = addrspacecast i64 addrspace(3)* %res.ptr to i64 addrspace(4)*
346-
ret i64 addrspace(4)* %res.ptr.casted
347-
}
348-
349330
define void @write_back_non_grouped_agg(i64 addrspace(4)* %input_buffer, i64 addrspace(4)* %output_buffer, i32 noundef %agg_idx) {
350331
%tid = call i64 @get_thread_index()
351332
%agg_idx.i64 = sext i32 %agg_idx to i64

0 commit comments

Comments
 (0)