@@ -131,3 +131,38 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, d
131
131
// CHECK-LABEL: llvm.func @_QQmain()
132
132
// CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1
133
133
// CHECK: llvm.call @_FortranACUFLaunchClusterKernel(%[[KERNEL_PTR]], {{.*}})
134
+
135
+ // -----
136
+
137
+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
138
+ llvm.func @_QMmod1Phost_sub () {
139
+ %0 = llvm.mlir.constant (1 : i32 ) : i32
140
+ %1 = llvm.alloca %0 x !llvm.struct <(ptr , i64 , i32 , i8 , i8 , i8 , i8 , array <1 x array <3 x i64 >>)> {alignment = 8 : i64 } : (i32 ) -> !llvm.ptr
141
+ %2 = llvm.mlir.constant (40 : i64 ) : i64
142
+ %3 = llvm.mlir.constant (16 : i32 ) : i32
143
+ %4 = llvm.mlir.constant (25 : i32 ) : i32
144
+ %5 = llvm.mlir.constant (21 : i32 ) : i32
145
+ %6 = llvm.mlir.constant (17 : i32 ) : i32
146
+ %7 = llvm.mlir.constant (1 : index ) : i64
147
+ %8 = llvm.mlir.constant (27 : i32 ) : i32
148
+ %9 = llvm.mlir.constant (6 : i32 ) : i32
149
+ %10 = llvm.mlir.constant (1 : i32 ) : i32
150
+ %11 = llvm.mlir.constant (0 : i32 ) : i32
151
+ %12 = llvm.mlir.constant (10 : index ) : i64
152
+ %13 = llvm.mlir.addressof @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 : !llvm.ptr
153
+ %14 = llvm.call @_FortranACUFMemAlloc (%2 , %11 , %13 , %6 ) : (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr
154
+ gpu.launch_func @cuda_device_mod ::@_QMmod1Psub1 blocks in (%7 , %7 , %7 ) threads in (%12 , %7 , %7 ) : i64 dynamic_shared_memory_size %11 args (%14 : !llvm.ptr ) {cuf.proc_attr = #cuf.cuda_proc <grid_global >}
155
+ llvm.return
156
+ }
157
+ llvm.func @_QMmod1Psub1 (!llvm.ptr ) -> ()
158
+ llvm.mlir.global linkonce constant @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 () {addr_space = 0 : i32 } : !llvm.array <2 x i8 > {
159
+ %0 = llvm.mlir.constant (" a\00" ) : !llvm.array <2 x i8 >
160
+ llvm.return %0 : !llvm.array <2 x i8 >
161
+ }
162
+ llvm.func @_FortranACUFMemAlloc (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr attributes {fir.runtime , sym_visibility = " private" }
163
+ llvm.func @_FortranACUFMemFree (!llvm.ptr , i32 , !llvm.ptr , i32 ) -> !llvm.struct <()> attributes {fir.runtime , sym_visibility = " private" }
164
+ gpu.binary @cuda_device_mod [#gpu.object <#nvvm.target , " " >]
165
+ }
166
+
167
+ // CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
168
+ // CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel
0 commit comments