Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 90d5908

Browse files
committed
Replace agg_count_distinct_bitmap_gpu definition with a C++ one
1 parent 65346ab commit 90d5908

File tree

2 files changed

+35
-54
lines changed

2 files changed

+35
-54
lines changed

omniscidb/QueryEngine/Compiler/genx.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ double atomic_min_double(GENERIC_ADDR_SPACE double* addr, const double val);
1818
double atomic_min_float(GENERIC_ADDR_SPACE float* addr, const float val);
1919
double atomic_max_double(GENERIC_ADDR_SPACE double* addr, const double val);
2020
double atomic_max_float(GENERIC_ADDR_SPACE float* addr, const float val);
21+
void atomic_or(GENERIC_ADDR_SPACE int32_t* addr, const int32_t val);
2122
GENERIC_ADDR_SPACE int64_t* declare_dynamic_shared_memory();
2223

2324
void sync_threadblock();
@@ -105,4 +106,38 @@ const GENERIC_ADDR_SPACE int64_t* init_shared_mem(
105106
sync_threadblock();
106107
return shared_groups_buffer;
107108
}
109+
110+
void agg_count_distinct_bitmap_gpu(GENERIC_ADDR_SPACE int64_t* agg,
111+
const int64_t val,
112+
const int64_t min_val,
113+
const int64_t base_dev_addr,
114+
const int64_t base_host_addr,
115+
const uint64_t sub_bitmap_count,
116+
const uint64_t bitmap_bytes) {
117+
const uint64_t bitmap_idx = val - min_val;
118+
const uint32_t byte_idx = bitmap_idx >> 3;
119+
const uint32_t word_idx = byte_idx >> 2;
120+
const uint32_t byte_word_idx = byte_idx & 3;
121+
const int64_t host_addr = *agg;
122+
GENERIC_ADDR_SPACE int32_t* bitmap =
123+
(GENERIC_ADDR_SPACE int32_t*)(base_dev_addr + host_addr - base_host_addr +
124+
(get_thread_index() & (sub_bitmap_count - 1)) *
125+
bitmap_bytes);
126+
switch (byte_word_idx) {
127+
case 0:
128+
atomic_or(&bitmap[word_idx], 1 << (bitmap_idx & 7));
129+
break;
130+
case 1:
131+
atomic_or(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 8));
132+
break;
133+
case 2:
134+
atomic_or(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 16));
135+
break;
136+
case 3:
137+
atomic_or(&bitmap[word_idx], 1 << ((bitmap_idx & 7) + 24));
138+
break;
139+
default:
140+
break;
141+
}
142+
}
108143
}

omniscidb/QueryEngine/Compiler/genx.ll

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -273,60 +273,6 @@ define double @atomic_max_double(double addrspace(4)* %addr, double noundef %val
273273
ret double %old.cst
274274
}
275275

276-
277-
define void @agg_count_distinct_bitmap_gpu(i64 addrspace(4)* %agg, i64 noundef %val, i64 noundef %min_val, i64 noundef %base_dev_addr, i64 noundef %base_host_addr, i64 noundef %sub_bitmap_count, i64 noundef %bitmap_bytes) {
278-
%bitmap_idx = sub nsw i64 %val, %min_val
279-
%bitmap_idx.i32 = trunc i64 %bitmap_idx to i32
280-
%byte_idx.i64 = lshr i64 %bitmap_idx, 3
281-
%byte_idx = trunc i64 %byte_idx.i64 to i32
282-
%word_idx = lshr i32 %byte_idx, 2
283-
%word_idx.i64 = zext i32 %word_idx to i64
284-
%byte_word_idx = and i32 %byte_idx, 3
285-
%host_addr = load i64, i64 addrspace(4)* %agg
286-
%sub_bm_m1 = sub i64 %sub_bitmap_count, 1
287-
%tid = call i64 @get_thread_index()
288-
%sub_tid = and i64 %sub_bm_m1, %tid
289-
%rhs = mul i64 %sub_tid, %bitmap_bytes
290-
%base_dev = add nsw i64 %base_dev_addr, %host_addr
291-
%lhs = sub nsw i64 %base_dev, %base_host_addr
292-
%bitmap_addr = add i64 %lhs, %rhs
293-
%bitmap = inttoptr i64 %bitmap_addr to i32 addrspace(4)*
294-
switch i32 %byte_word_idx, label %.exit [
295-
i32 0, label %.c0
296-
i32 1, label %.c1
297-
i32 2, label %.c2
298-
i32 3, label %.c3
299-
]
300-
301-
.c0:
302-
%btwi0 = getelementptr inbounds i32, i32 addrspace(4)* %bitmap, i64 %word_idx.i64
303-
%res0 = and i32 %bitmap_idx.i32, 7
304-
br label %.default
305-
.c1:
306-
%btwi1 = getelementptr inbounds i32, i32 addrspace(4)* %bitmap, i64 %word_idx.i64
307-
%btidx71 = and i32 %bitmap_idx.i32, 7
308-
%res1 = or i32 %btidx71, 8
309-
br label %.default
310-
.c2:
311-
%btwi2 = getelementptr inbounds i32, i32 addrspace(4)* %bitmap, i64 %word_idx.i64
312-
%btidx72 = and i32 %bitmap_idx.i32, 7
313-
%res2 = or i32 %btidx72, 16
314-
br label %.default
315-
.c3:
316-
%btwi3 = getelementptr inbounds i32, i32 addrspace(4)* %bitmap, i64 %word_idx.i64
317-
%btidx73 = and i32 %bitmap_idx.i32, 7
318-
%res3 = or i32 %btidx73, 24
319-
br label %.default
320-
.default:
321-
%res = phi i32 [ %res0, %.c0 ], [ %res1, %.c1 ], [ %res2, %.c2], [ %res3, %.c3 ]
322-
%arg0 = phi i32 addrspace(4)* [ %btwi0, %.c0 ], [ %btwi1, %.c1 ], [ %btwi2, %.c2], [ %btwi3, %.c3 ]
323-
%arg1 = shl nuw i32 1, %res
324-
tail call void @atomic_or(i32 addrspace(4)* %arg0, i32 noundef %arg1)
325-
br label %.exit
326-
.exit:
327-
ret void
328-
}
329-
330276
define void @write_back_non_grouped_agg(i64 addrspace(4)* %input_buffer, i64 addrspace(4)* %output_buffer, i32 noundef %agg_idx) {
331277
%tid = call i64 @get_thread_index()
332278
%agg_idx.i64 = sext i32 %agg_idx to i64

0 commit comments

Comments
 (0)