@@ -25,6 +25,11 @@ define void @sync_threadblock() {
25
25
ret void
26
26
}
27
27
28
+ define i64 @get_block_dim () {
29
+ %gsize = call i64 @__spirv_BuiltInWorkgroupSize (i32 0 )
30
+ ret i64 %gsize
31
+ }
32
+
28
33
define i32 @pos_start_impl (i32 addrspace (4 )* %0 ) readnone nounwind alwaysinline {
29
34
%gid = call i64 @__spirv_BuiltInWorkgroupId (i32 0 )
30
35
%gsize = call i64 @__spirv_BuiltInWorkgroupSize (i32 0 )
@@ -322,30 +327,6 @@ define void @agg_count_distinct_bitmap_gpu(i64 addrspace(4)* %agg, i64 noundef %
322
327
ret void
323
328
}
324
329
325
- define i64 addrspace (4 )* @init_shared_mem (i64 addrspace (4 )* %agg_init_val , i32 noundef %groups_buffer_size ) {
326
- .entry:
327
- %buf.units = ashr i32 %groups_buffer_size , 3
328
- %buf.units.i64 = sext i32 %buf.units to i64
329
- %pos = call i64 @get_thread_index ()
330
- %wgnum = call i64 @__spirv_BuiltInNumWorkgroups (i32 0 )
331
- %loop.cond = icmp slt i64 %pos , %buf.units.i64
332
- br i1 %loop.cond , label %.for_body , label %.exit
333
- .for_body:
334
- %pos.idx = phi i64 [ %pos , %.entry ], [ %pos.idx.new , %.for_body ]
335
- %agg_init_val.idx = getelementptr inbounds i64 , i64 addrspace (4 )* %agg_init_val , i64 %pos.idx
336
- %slm.idx = getelementptr inbounds [4096 x i64 ], [4096 x i64 ] addrspace (3 )* @slm.buf.i64 , i64 0 , i64 %pos.idx
337
- %val = load i64 , i64 addrspace (4 )* %agg_init_val.idx
338
- store i64 %val , i64 addrspace (3 )* %slm.idx
339
- %pos.idx.new = add nsw i64 %pos.idx , %wgnum
340
- %cond = icmp slt i64 %pos.idx.new , %buf.units.i64
341
- br i1 %cond , label %.for_body , label %.exit
342
- .exit:
343
- call void @sync_threadblock ()
344
- %res.ptr = bitcast [4096 x i64 ] addrspace (3 )* @slm.buf.i64 to i64 addrspace (3 )*
345
- %res.ptr.casted = addrspacecast i64 addrspace (3 )* %res.ptr to i64 addrspace (4 )*
346
- ret i64 addrspace (4 )* %res.ptr.casted
347
- }
348
-
349
330
define void @write_back_non_grouped_agg (i64 addrspace (4 )* %input_buffer , i64 addrspace (4 )* %output_buffer , i32 noundef %agg_idx ) {
350
331
%tid = call i64 @get_thread_index ()
351
332
%agg_idx.i64 = sext i32 %agg_idx to i64
0 commit comments