@@ -8,13 +8,14 @@ use rustc_codegen_ssa::traits::BaseTypeCodegenMethods;
8
8
use crate :: builder:: SBuilder ;
9
9
use crate :: common:: AsCCharPtr ;
10
10
use crate :: llvm:: AttributePlace :: Function ;
11
- use crate :: llvm:: { self , Linkage } ;
11
+ use crate :: llvm:: { self , Linkage , Type , Value } ;
12
12
use crate :: { LlvmCodegenBackend , SimpleCx , attributes} ;
13
13
14
14
pub ( crate ) fn handle_gpu_code < ' ll > (
15
15
_cgcx : & CodegenContext < LlvmCodegenBackend > ,
16
16
cx : & ' ll SimpleCx < ' _ > ,
17
17
) {
18
+ // The offload memory transfer type for each kernel
18
19
let mut o_types = vec ! [ ] ;
19
20
let mut kernels = vec ! [ ] ;
20
21
let offload_entry_ty = add_tgt_offload_entry ( & cx) ;
@@ -43,7 +44,7 @@ fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value {
43
44
44
45
// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
45
46
let struct_ident_ty = cx. type_named_struct ( "struct.ident_t" ) ;
46
- let struct_elems: Vec < & llvm :: Value > = vec ! [
47
+ let struct_elems = vec ! [
47
48
cx. get_const_i32( 0 ) ,
48
49
cx. get_const_i32( 2 ) ,
49
50
cx. get_const_i32( 0 ) ,
@@ -163,7 +164,7 @@ pub(crate) fn add_unnamed_global<'ll>(
163
164
l : Linkage ,
164
165
) -> & ' ll llvm:: Value {
165
166
let llglobal = add_global ( cx, name, initializer, l) ;
166
- unsafe { llvm:: LLVMSetUnnamedAddress ( llglobal, llvm:: UnnamedAddr :: Global ) } ;
167
+ llvm:: SetUnnamedAddress ( llglobal, llvm:: UnnamedAddr :: Global ) ;
167
168
llglobal
168
169
}
169
170
@@ -220,24 +221,20 @@ fn gen_define_handling<'ll>(
220
221
let initializer = crate :: common:: bytes_in_context ( cx. llcx , c_val) ;
221
222
let llglobal = add_unnamed_global ( & cx, & offload_entry_name, initializer, InternalLinkage ) ;
222
223
llvm:: set_alignment ( llglobal, Align :: ONE ) ;
223
- let c_section_name = CString :: new ( ".llvm.rodata.offloading" ) . unwrap ( ) ;
224
- llvm:: set_section ( llglobal, & c_section_name) ;
224
+ llvm:: set_section ( llglobal, c".llvm.rodata.offloading" ) ;
225
225
226
226
// Not actively used yet, for calling real kernels
227
227
let name = format ! ( ".offloading.entry.kernel_{num}" ) ;
228
- let ci64_0 = cx. get_const_i64 ( 0 ) ;
229
- let ci16_1 = cx. get_const_i16 ( 1 ) ;
230
- let elems: Vec < & llvm:: Value > = vec ! [
231
- ci64_0,
232
- ci16_1,
233
- ci16_1,
234
- cx. get_const_i32( 0 ) ,
235
- region_id,
236
- llglobal,
237
- ci64_0,
238
- ci64_0,
239
- cx. const_null( cx. type_ptr( ) ) ,
240
- ] ;
228
+
229
+ // See the __tgt_offload_entry documentation above.
230
+ let reserved = cx. get_const_i64 ( 0 ) ;
231
+ let version = cx. get_const_i16 ( 1 ) ;
232
+ let kind = cx. get_const_i16 ( 1 ) ;
233
+ let flags = cx. get_const_i32 ( 0 ) ;
234
+ let size = cx. get_const_i64 ( 0 ) ;
235
+ let data = cx. get_const_i64 ( 0 ) ;
236
+ let aux_addr = cx. const_null ( cx. type_ptr ( ) ) ;
237
+ let elems = vec ! [ reserved, version, kind, flags, region_id, llglobal, size, data, aux_addr] ;
241
238
242
239
let initializer = crate :: common:: named_struct ( offload_entry_ty, & elems) ;
243
240
let c_name = CString :: new ( name) . unwrap ( ) ;
@@ -353,12 +350,7 @@ fn gen_call_handling<'ll>(
353
350
354
351
// Step 1)
355
352
unsafe { llvm:: LLVMRustPositionBefore ( builder. llbuilder , kernel_call) } ;
356
- builder. memset (
357
- tgt_bin_desc_alloca,
358
- cx. get_const_i8 ( 0 ) ,
359
- cx. get_const_i64 ( 32 ) ,
360
- Align :: from_bytes ( 8 ) . unwrap ( ) ,
361
- ) ;
353
+ builder. memset ( tgt_bin_desc_alloca, cx. get_const_i8 ( 0 ) , cx. get_const_i64 ( 32 ) , Align :: EIGHT ) ;
362
354
363
355
let mapper_fn_ty = cx. type_func ( & [ cx. type_ptr ( ) ] , cx. type_void ( ) ) ;
364
356
let register_lib_decl = declare_offload_fn ( & cx, "__tgt_register_lib" , mapper_fn_ty) ;
@@ -384,26 +376,48 @@ fn gen_call_handling<'ll>(
384
376
builder. store ( cx. get_const_i64 ( 1024 ) , gep3, Align :: EIGHT ) ;
385
377
}
386
378
387
- // Step 2)
388
- let gep1 = builder. inbounds_gep ( ty, a1, & [ i32_0, i32_0] ) ;
389
- let gep2 = builder. inbounds_gep ( ty, a2, & [ i32_0, i32_0] ) ;
390
- let gep3 = builder. inbounds_gep ( ty2, a4, & [ i32_0, i32_0] ) ;
379
+ // For now we have a very simplistic indexing scheme into our
380
+ // offload_{baseptrs,ptrs,sizes}. We will probably improve this along with our gpu frontend pr.
381
+ fn get_geps < ' a , ' ll > (
382
+ builder : & mut SBuilder < ' a , ' ll > ,
383
+ cx : & ' ll SimpleCx < ' ll > ,
384
+ ty : & ' ll Type ,
385
+ ty2 : & ' ll Type ,
386
+ a1 : & ' ll Value ,
387
+ a2 : & ' ll Value ,
388
+ a4 : & ' ll Value ,
389
+ ) -> ( & ' ll Value , & ' ll Value , & ' ll Value ) {
390
+ let i32_0 = cx. get_const_i32 ( 0 ) ;
391
+
392
+ let gep1 = builder. inbounds_gep ( ty, a1, & [ i32_0, i32_0] ) ;
393
+ let gep2 = builder. inbounds_gep ( ty, a2, & [ i32_0, i32_0] ) ;
394
+ let gep3 = builder. inbounds_gep ( ty2, a4, & [ i32_0, i32_0] ) ;
395
+ ( gep1, gep2, gep3)
396
+ }
391
397
392
- let nullptr = cx. const_null ( cx. type_ptr ( ) ) ;
393
- let o_type = o_types[ 0 ] ;
398
+ fn generate_mapper_call < ' a , ' ll > (
399
+ builder : & mut SBuilder < ' a , ' ll > ,
400
+ cx : & ' ll SimpleCx < ' ll > ,
401
+ geps : ( & ' ll Value , & ' ll Value , & ' ll Value ) ,
402
+ o_type : & ' ll Value ,
403
+ fn_to_call : & ' ll Value ,
404
+ fn_ty : & ' ll Type ,
405
+ num_args : u64 ,
406
+ s_ident_t : & ' ll Value ,
407
+ ) {
408
+ let nullptr = cx. const_null ( cx. type_ptr ( ) ) ;
409
+ let i64_max = cx. get_const_i64 ( u64:: MAX ) ;
410
+ let num_args = cx. get_const_i32 ( num_args) ;
411
+ let args =
412
+ vec ! [ s_ident_t, i64_max, num_args, geps. 0 , geps. 1 , geps. 2 , o_type, nullptr, nullptr] ;
413
+ builder. call ( fn_ty, fn_to_call, & args, None ) ;
414
+ }
415
+
416
+ // Step 2)
394
417
let s_ident_t = generate_at_one ( & cx) ;
395
- let args = vec ! [
396
- s_ident_t,
397
- cx. get_const_i64( u64 :: MAX ) ,
398
- cx. get_const_i32( num_args) ,
399
- gep1,
400
- gep2,
401
- gep3,
402
- o_type,
403
- nullptr,
404
- nullptr,
405
- ] ;
406
- builder. call ( fn_ty, begin_mapper_decl, & args, None ) ;
418
+ let o = o_types[ 0 ] ;
419
+ let geps = get_geps ( & mut builder, & cx, ty, ty2, a1, a2, a4) ;
420
+ generate_mapper_call ( & mut builder, & cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t) ;
407
421
408
422
// Step 3)
409
423
// Here we will add code for the actual kernel launches in a follow-up PR.
@@ -412,24 +426,9 @@ fn gen_call_handling<'ll>(
412
426
// Step 4)
413
427
unsafe { llvm:: LLVMRustPositionAfter ( builder. llbuilder , kernel_call) } ;
414
428
415
- let gep1 = builder. inbounds_gep ( ty, a1, & [ i32_0, i32_0] ) ;
416
- let gep2 = builder. inbounds_gep ( ty, a2, & [ i32_0, i32_0] ) ;
417
- let gep3 = builder. inbounds_gep ( ty2, a4, & [ i32_0, i32_0] ) ;
418
-
419
- let nullptr = cx. const_null ( cx. type_ptr ( ) ) ;
420
- let o_type = o_types[ 0 ] ;
421
- let args = vec ! [
422
- s_ident_t,
423
- cx. get_const_i64( u64 :: MAX ) ,
424
- cx. get_const_i32( num_args) ,
425
- gep1,
426
- gep2,
427
- gep3,
428
- o_type,
429
- nullptr,
430
- nullptr,
431
- ] ;
432
- builder. call ( fn_ty, end_mapper_decl, & args, None ) ;
429
+ let geps = get_geps ( & mut builder, & cx, ty, ty2, a1, a2, a4) ;
430
+ generate_mapper_call ( & mut builder, & cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t) ;
431
+
433
432
builder. call ( mapper_fn_ty, unregister_lib_decl, & [ tgt_bin_desc_alloca] , None ) ;
434
433
435
434
// With this we generated the following begin and end mappers. We could easily generate the
0 commit comments