11// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
22
3- module attributes {llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true } {
3+ module attributes {dlti.dl_spec = #dlti.dl_spec < #dlti.dl_entry < " dlti.alloca_memory_space " , 5 : ui32 >>, llvm.target_triple = " amdgcn-amd-amdhsa" , omp.is_target_device = true } {
44 llvm.func @omp_target_region_ () {
55 %0 = llvm.mlir.constant (20 : i32 ) : i32
66 %1 = llvm.mlir.constant (10 : i32 ) : i32
77 %2 = llvm.mlir.constant (1 : i64 ) : i64
8- %3 = llvm.alloca %2 x i32 {bindc_name = " a" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEa" } : (i64 ) -> !llvm.ptr
8+ %3 = llvm.alloca %2 x i32 {bindc_name = " a" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEa" } : (i64 ) -> !llvm.ptr < 5 >
99 %4 = llvm.mlir.constant (1 : i64 ) : i64
10- %5 = llvm.alloca %4 x i32 {bindc_name = " b" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEb" } : (i64 ) -> !llvm.ptr
10+ %5 = llvm.alloca %4 x i32 {bindc_name = " b" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEb" } : (i64 ) -> !llvm.ptr < 5 >
1111 %6 = llvm.mlir.constant (1 : i64 ) : i64
12- %7 = llvm.alloca %6 x i32 {bindc_name = " c" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEc" } : (i64 ) -> !llvm.ptr
13- llvm.store %1 , %3 : i32 , !llvm.ptr
14- llvm.store %0 , %5 : i32 , !llvm.ptr
15- %map1 = omp.map.info var_ptr (%3 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
16- %map2 = omp.map.info var_ptr (%5 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
17- %map3 = omp.map.info var_ptr (%7 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
12+ %7 = llvm.alloca %6 x i32 {bindc_name = " c" , in_type = i32 , operandSegmentSizes = array<i32 : 0 , 0 >, uniq_name = " _QFomp_target_regionEc" } : (i64 ) -> !llvm.ptr <5 >
13+ %8 = llvm.addrspacecast %3 : !llvm.ptr <5 > to !llvm.ptr
14+ %9 = llvm.addrspacecast %5 : !llvm.ptr <5 > to !llvm.ptr
15+ %10 = llvm.addrspacecast %7 : !llvm.ptr <5 > to !llvm.ptr
16+ llvm.store %1 , %8 : i32 , !llvm.ptr
17+ llvm.store %0 , %9 : i32 , !llvm.ptr
18+ %map1 = omp.map.info var_ptr (%8 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
19+ %map2 = omp.map.info var_ptr (%9 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
20+ %map3 = omp.map.info var_ptr (%10 : !llvm.ptr , i32 ) map_clauses (tofrom ) capture (ByRef ) -> !llvm.ptr {name = " " }
1821 omp.target map_entries (%map1 -> %arg0 , %map2 -> %arg1 , %map3 -> %arg2 : !llvm.ptr , !llvm.ptr , !llvm.ptr ) {
19- %8 = llvm.load %arg0 : !llvm.ptr -> i32
20- %9 = llvm.load %arg1 : !llvm.ptr -> i32
21- %10 = llvm.add %8 , %9 : i32
22- llvm.store %10 , %arg2 : i32 , !llvm.ptr
22+ %11 = llvm.load %arg0 : !llvm.ptr -> i32
23+ %12 = llvm.load %arg1 : !llvm.ptr -> i32
24+ %13 = llvm.add %11 , %12 : i32
25+ llvm.store %13 , %arg2 : i32 , !llvm.ptr
2326 omp.terminator
2427 }
2528 llvm.return
@@ -31,19 +34,22 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic
3134// CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer
3235// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] }
3336// CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[DYN_PTR:.*]], ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]])
34- // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8
35- // CHECK: store ptr %[[ADDR_A]], ptr %[[TMP_A]], align 8
37+ // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5)
38+ // CHECK: %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr
39+ // CHECK: store ptr %[[ADDR_A]], ptr %[[ASCAST_A]], align 8
3640// CHECK: %[[TMP_B:.*]] = alloca ptr, align 8
37- // CHECK: store ptr %[[ADDR_B]], ptr %[[TMP_B]], align 8
41+ // CHECK: %[[ASCAST_B:.*]] = addrspacecast ptr addrspace(5) %[[TMP_B]] to ptr
42+ // CHECK: store ptr %[[ADDR_B]], ptr %[[ASCAST_B]], align 8
3843// CHECK: %[[TMP_C:.*]] = alloca ptr, align 8
39- // CHECK: store ptr %[[ADDR_C]], ptr %[[TMP_C]], align 8
44+ // CHECK: %[[ASCAST_C:.*]] = addrspacecast ptr addrspace(5) %[[TMP_C]] to ptr
45+ // CHECK: store ptr %[[ADDR_C]], ptr %[[ASCAST_C]], align 8
4046// CHECK: %[[INIT:.*]] = call i32 @__kmpc_target_init(ptr @[[KERNEL_ENV]], ptr %[[DYN_PTR]])
4147// CHECK-NEXT: %[[CMP:.*]] = icmp eq i32 %[[INIT]], -1
4248// CHECK-NEXT: br i1 %[[CMP]], label %[[LABEL_ENTRY:.*]], label %[[LABEL_EXIT:.*]]
4349// CHECK: [[LABEL_ENTRY]]:
44- // CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[TMP_A ]], align 8
45- // CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[TMP_B ]], align 8
46- // CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[TMP_C ]], align 8
50+ // CHECK: %[[PTR_A:.*]] = load ptr, ptr %[[ASCAST_A ]], align 8
51+ // CHECK: %[[PTR_B:.*]] = load ptr, ptr %[[ASCAST_B ]], align 8
52+ // CHECK: %[[PTR_C:.*]] = load ptr, ptr %[[ASCAST_C ]], align 8
4753// CHECK-NEXT: br label %[[LABEL_TARGET:.*]]
4854// CHECK: [[LABEL_TARGET]]:
4955// CHECK: %[[A:.*]] = load i32, ptr %[[PTR_A]], align 4
0 commit comments