@@ -113,7 +113,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
113113// -----
114114
115115module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
116- llvm.func @_FortranACUFLaunchClusterKernel (!llvm.ptr , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i32 , !llvm.ptr , !llvm.ptr ) attributes {sym_visibility = " private" }
116+ llvm.func @_FortranACUFLaunchClusterKernel (!llvm.ptr , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , !llvm.ptr , i32 , !llvm.ptr , !llvm.ptr ) attributes {sym_visibility = " private" }
117117 llvm.func @_QMmod1Psub1 () attributes {cuf.cluster_dims = #cuf.cluster_dims <x = 2 : i64 , y = 2 : i64 , z = 1 : i64 >} {
118118 llvm.return
119119 }
@@ -166,3 +166,66 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
166166
167167// CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
168168// CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel
169+
170+ // -----
171+
172+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
173+ llvm.func @_QMmod1Psub1 () attributes {cuf.cluster_dims = #cuf.cluster_dims <x = 2 : i64 , y = 2 : i64 , z = 1 : i64 >} {
174+ llvm.return
175+ }
176+ llvm.func @_QQmain () attributes {fir.bindc_name = " test" } {
177+ %0 = llvm.mlir.constant (1 : index ) : i64
178+ %stream = llvm.alloca %0 x i64 : (i64 ) -> !llvm.ptr
179+ %1 = llvm.mlir.constant (2 : index ) : i64
180+ %2 = llvm.mlir.constant (0 : i32 ) : i32
181+ %3 = llvm.mlir.constant (10 : index ) : i64
182+ %token = cuf.stream_cast %stream : !llvm.ptr
183+ gpu.launch_func [%token ] @cuda_device_mod ::@_QMmod1Psub1 blocks in (%3 , %3 , %0 ) threads in (%3 , %3 , %0 ) : i64 dynamic_shared_memory_size %2
184+ llvm.return
185+ }
186+ gpu.binary @cuda_device_mod [#gpu.object <#nvvm.target , " " >]
187+ }
188+
189+ // CHECK-LABEL: llvm.func @_QQmain()
190+ // CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
191+ // CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1
192+ // CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) -> ()
193+
194+ // -----
195+
196+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
197+ llvm.func @_QMmod1Phost_sub () {
198+ %0 = llvm.mlir.constant (1 : i32 ) : i32
199+ %one = llvm.mlir.constant (1 : i64 ) : i64
200+ %1 = llvm.alloca %0 x !llvm.struct <(ptr , i64 , i32 , i8 , i8 , i8 , i8 , array <1 x array <3 x i64 >>)> {alignment = 8 : i64 } : (i32 ) -> !llvm.ptr
201+ %stream = llvm.alloca %one x i64 : (i64 ) -> !llvm.ptr
202+ %2 = llvm.mlir.constant (40 : i64 ) : i64
203+ %3 = llvm.mlir.constant (16 : i32 ) : i32
204+ %4 = llvm.mlir.constant (25 : i32 ) : i32
205+ %5 = llvm.mlir.constant (21 : i32 ) : i32
206+ %6 = llvm.mlir.constant (17 : i32 ) : i32
207+ %7 = llvm.mlir.constant (1 : index ) : i64
208+ %8 = llvm.mlir.constant (27 : i32 ) : i32
209+ %9 = llvm.mlir.constant (6 : i32 ) : i32
210+ %10 = llvm.mlir.constant (1 : i32 ) : i32
211+ %11 = llvm.mlir.constant (0 : i32 ) : i32
212+ %12 = llvm.mlir.constant (10 : index ) : i64
213+ %13 = llvm.mlir.addressof @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 : !llvm.ptr
214+ %14 = llvm.call @_FortranACUFMemAlloc (%2 , %11 , %13 , %6 ) : (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr
215+ %token = cuf.stream_cast %stream : !llvm.ptr
216+ gpu.launch_func [%token ] @cuda_device_mod ::@_QMmod1Psub1 blocks in (%7 , %7 , %7 ) threads in (%12 , %7 , %7 ) : i64 dynamic_shared_memory_size %11 args (%14 : !llvm.ptr ) {cuf.proc_attr = #cuf.cuda_proc <grid_global >}
217+ llvm.return
218+ }
219+ llvm.func @_QMmod1Psub1 (!llvm.ptr ) -> ()
220+ llvm.mlir.global linkonce constant @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 () {addr_space = 0 : i32 } : !llvm.array <2 x i8 > {
221+ %0 = llvm.mlir.constant (" a\00" ) : !llvm.array <2 x i8 >
222+ llvm.return %0 : !llvm.array <2 x i8 >
223+ }
224+ llvm.func @_FortranACUFMemAlloc (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr attributes {fir.runtime , sym_visibility = " private" }
225+ llvm.func @_FortranACUFMemFree (!llvm.ptr , i32 , !llvm.ptr , i32 ) -> !llvm.struct <()> attributes {fir.runtime , sym_visibility = " private" }
226+ gpu.binary @cuda_device_mod [#gpu.object <#nvvm.target , " " >]
227+ }
228+
229+ // CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
230+ // CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
231+ // CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) -> ()
0 commit comments