@@ -121,4 +121,40 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
121121// CHECK-LABEL: gpu.func @_QPnoshared()
122122// CHECK-NOT: fir.global internal @_QPnoshared__shared_mem
123123
124+ // -----
125+
126+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
127+ gpu.module @cuda_device_mod {
128+ gpu.func @_QMmtestsPtestany (%arg0: !fir.ref <!fir.array <?xf32 >> {cuf.data_attr = #cuf.cuda <device >, fir.bindc_name = " a" }) attributes {cuf.proc_attr = #cuf.cuda_proc <global >} {
129+ %0 = fir.dummy_scope : !fir.dscope
130+ %c -1 = arith.constant -1 : index
131+ %1 = fir.shape %c -1 : (index ) -> !fir.shape <1 >
132+ %2:2 = hlfir.declare %arg0 (%1 ) dummy_scope %0 {data_attr = #cuf.cuda <device >, uniq_name = " _QMmtestsFtestanyEa" } : (!fir.ref <!fir.array <?xf32 >>, !fir.shape <1 >, !fir.dscope ) -> (!fir.box <!fir.array <?xf32 >>, !fir.ref <!fir.array <?xf32 >>)
133+ %3 = fir.address_of (@_QM__fortran_builtinsE__builtin_blockdim ) : !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>
134+ %4:2 = hlfir.declare %3 {uniq_name = " _QM__fortran_builtinsE__builtin_blockdim" } : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>, !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>)
135+ %5 = fir.address_of (@_QM__fortran_builtinsE__builtin_blockidx ) : !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>
136+ %6:2 = hlfir.declare %5 {uniq_name = " _QM__fortran_builtinsE__builtin_blockidx" } : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>, !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>)
137+ %c -1 _0 = arith.constant -1 : index
138+ %7 = cuf.shared_memory !fir.array <?xf64 >, %c -1 _0 : index {bindc_name = " dmasks" , uniq_name = " _QMmtestsFtestanyEdmasks" } -> !fir.ref <!fir.array <?xf64 >>
139+ %8 = fir.shape %c -1 _0 : (index ) -> !fir.shape <1 >
140+ %9:2 = hlfir.declare %7 (%8 ) {data_attr = #cuf.cuda <shared >, uniq_name = " _QMmtestsFtestanyEdmasks" } : (!fir.ref <!fir.array <?xf64 >>, !fir.shape <1 >) -> (!fir.box <!fir.array <?xf64 >>, !fir.ref <!fir.array <?xf64 >>)
141+ %10 = fir.address_of (@_QM__fortran_builtinsE__builtin_griddim ) : !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>
142+ %11:2 = hlfir.declare %10 {uniq_name = " _QM__fortran_builtinsE__builtin_griddim" } : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>, !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>)
143+ %12 = fir.alloca i32 {bindc_name = " i" , uniq_name = " _QMmtestsFtestanyEi" }
144+ %13:2 = hlfir.declare %12 {uniq_name = " _QMmtestsFtestanyEi" } : (!fir.ref <i32 >) -> (!fir.ref <i32 >, !fir.ref <i32 >)
145+ %14 = fir.alloca i32 {bindc_name = " iam" , uniq_name = " _QMmtestsFtestanyEiam" }
146+ %15:2 = hlfir.declare %14 {uniq_name = " _QMmtestsFtestanyEiam" } : (!fir.ref <i32 >) -> (!fir.ref <i32 >, !fir.ref <i32 >)
147+ %16 = fir.alloca i32 {bindc_name = " j" , uniq_name = " _QMmtestsFtestanyEj" }
148+ %17:2 = hlfir.declare %16 {uniq_name = " _QMmtestsFtestanyEj" } : (!fir.ref <i32 >) -> (!fir.ref <i32 >, !fir.ref <i32 >)
149+ %c -1 _1 = arith.constant -1 : index
150+ %18 = cuf.shared_memory !fir.array <?xf32 >, %c -1 _1 : index {bindc_name = " smasks" , uniq_name = " _QMmtestsFtestanyEsmasks" } -> !fir.ref <!fir.array <?xf32 >>
151+ %19 = fir.shape %c -1 _1 : (index ) -> !fir.shape <1 >
152+ %20:2 = hlfir.declare %18 (%19 ) {data_attr = #cuf.cuda <shared >, uniq_name = " _QMmtestsFtestanyEsmasks" } : (!fir.ref <!fir.array <?xf32 >>, !fir.shape <1 >) -> (!fir.box <!fir.array <?xf32 >>, !fir.ref <!fir.array <?xf32 >>)
153+ gpu.return
154+ }
155+ }
156+ }
124157
158+ // CHECK-LABEL: gpu.func @_QMmtestsPtestany
159+ // CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array<?xf64>, %c-1{{.*}} : index {bindc_name = "dmasks", uniq_name = "_QMmtestsFtestanyEdmasks"} -> !fir.ref<!fir.array<?xf64>>
160+ // CHECK: %{{.*}} = cuf.shared_memory[%c0{{.*}} : i32] !fir.array<?xf32>, %c-1{{.*}} : index {bindc_name = "smasks", uniq_name = "_QMmtestsFtestanyEsmasks"} -> !fir.ref<!fir.array<?xf32>>
0 commit comments