@@ -12,10 +12,10 @@ declare i64 @__spirv_BuiltInSubgroupSize(i32 %dimention)
12
12
13
13
declare void @__spirv_ControlBarrier (i32 %execution_scope , i32 %memory_scope , i32 %memory_semantics )
14
14
15
- @slm.buf.i64 = internal local_unnamed_addr addrspace (3 ) global [1024 x i64 ] zeroinitializer , align 4
15
+ @slm.buf.i64 = internal local_unnamed_addr addrspace (3 ) global [4096 x i64 ] zeroinitializer , align 4
16
16
17
17
define i64 addrspace (4 )* @declare_dynamic_shared_memory () {
18
- %res.share = bitcast [1024 x i64 ] addrspace (3 )* @slm.buf.i64 to i64 addrspace (3 )*
18
+ %res.share = bitcast [4096 x i64 ] addrspace (3 )* @slm.buf.i64 to i64 addrspace (3 )*
19
19
%res = addrspacecast i64 addrspace (3 )* %res.share to i64 addrspace (4 )*
20
20
ret i64 addrspace (4 )* %res
21
21
}
@@ -333,15 +333,15 @@ define i64 addrspace(4)* @init_shared_mem(i64 addrspace(4)* %agg_init_val, i32 n
333
333
.for_body:
334
334
%pos.idx = phi i64 [ %pos , %.entry ], [ %pos.idx.new , %.for_body ]
335
335
%agg_init_val.idx = getelementptr inbounds i64 , i64 addrspace (4 )* %agg_init_val , i64 %pos.idx
336
- %slm.idx = getelementptr inbounds [1024 x i64 ], [1024 x i64 ] addrspace (3 )* @slm.buf.i64 , i64 0 , i64 %pos.idx
336
+ %slm.idx = getelementptr inbounds [4096 x i64 ], [4096 x i64 ] addrspace (3 )* @slm.buf.i64 , i64 0 , i64 %pos.idx
337
337
%val = load i64 , i64 addrspace (4 )* %agg_init_val.idx
338
338
store i64 %val , i64 addrspace (3 )* %slm.idx
339
339
%pos.idx.new = add nsw i64 %pos.idx , %wgnum
340
340
%cond = icmp slt i64 %pos.idx.new , %buf.units.i64
341
341
br i1 %cond , label %.for_body , label %.exit
342
342
.exit:
343
343
call void @sync_threadblock ()
344
- %res.ptr = bitcast [1024 x i64 ] addrspace (3 )* @slm.buf.i64 to i64 addrspace (3 )*
344
+ %res.ptr = bitcast [4096 x i64 ] addrspace (3 )* @slm.buf.i64 to i64 addrspace (3 )*
345
345
%res.ptr.casted = addrspacecast i64 addrspace (3 )* %res.ptr to i64 addrspace (4 )*
346
346
ret i64 addrspace (4 )* %res.ptr.casted
347
347
}
0 commit comments