diff --git a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp index 3f13a182ad0c3..328e2374115b0 100644 --- a/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp +++ b/flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp @@ -32,6 +32,15 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp, mlir::SymbolTable &symbolTable, llvm::DenseSet &candidates, bool recurseInGlobal) { + + // Check if there is a real use of the global. + if (addrOfOp.getOperation()->hasOneUse()) { + mlir::OpOperand &addrUse = *addrOfOp.getOperation()->getUses().begin(); + if (mlir::isa(addrUse.getOwner()) && + addrUse.getOwner()->use_empty()) + return; + } + if (auto globalOp = symbolTable.lookup( addrOfOp.getSymbol().getRootReference().getValue())) { // TO DO: limit candidates to non-scalars. Scalars appear to have been diff --git a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 index 11866d871a607..758c2e2244257 100644 --- a/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 +++ b/flang/test/Fir/CUDA/cuda-implicit-device-global.f90 @@ -329,3 +329,16 @@ // attributes(global) subroutine kernel4() // CHECK-LABEL: fir.global internal @_QFkernel4Ea : i32 // CHECK-LABEL: gpu.module @cuda_device_mod // CHECK: fir.global internal @_QFkernel4Ea : i32 + +// ----- + +fir.global @_QMiso_c_bindingECc_alert constant : !fir.char<1> +func.func @_QMcudafor_lib_internalsPfoo() attributes {cuf.proc_attr = #cuf.cuda_proc} { + %19 = fir.address_of(@_QMiso_c_bindingECc_alert) : !fir.ref> + %c1 = arith.constant 1 : index + %20 = fir.declare %19 typeparams %c1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QMiso_c_bindingECc_alert"} : (!fir.ref>, index) -> !fir.ref> + return +} + +// CHECK-LABEL: gpu.module @cuda_device_mod +// CHECK-NOT: _QMiso_c_bindingECc_alert