diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index 778fcf1d93d8b..54a9e0b867d6c 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -187,9 +187,11 @@ struct IntrinsicLibrary { mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef); mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef); mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef); - mlir::Value genAtomicCas(mlir::Type, llvm::ArrayRef); + fir::ExtendedValue genAtomicCas(mlir::Type, + llvm::ArrayRef); mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef); - mlir::Value genAtomicExch(mlir::Type, llvm::ArrayRef); + fir::ExtendedValue genAtomicExch(mlir::Type, + llvm::ArrayRef); mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef); mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef); mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 0a70ed3c6bdf1..63127f046cf42 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -2740,15 +2740,17 @@ mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType, } // ATOMICCAS -mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType, - llvm::ArrayRef args) { +fir::ExtendedValue +IntrinsicLibrary::genAtomicCas(mlir::Type resultType, + llvm::ArrayRef args) { assert(args.size() == 3); auto successOrdering = mlir::LLVM::AtomicOrdering::acq_rel; auto failureOrdering = mlir::LLVM::AtomicOrdering::monotonic; auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(resultType.getContext()); - mlir::Value arg1 = args[1]; - mlir::Value arg2 = args[2]; + mlir::Value arg0 = fir::getBase(args[0]); + mlir::Value arg1 = fir::getBase(args[1]); + mlir::Value arg2 = fir::getBase(args[2]); auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value { if (mlir::isa(arg.getType())) @@ -2769,7 +2771,7 @@ mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType, } auto address = - builder.create(loc, llvmPtrTy, args[0]) + builder.create(loc, llvmPtrTy, arg0) .getResult(0); auto cmpxchg = builder.create( loc, address, arg1, arg2, successOrdering, failureOrdering); @@ -2786,13 +2788,16 @@ mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType, } // ATOMICEXCH -mlir::Value IntrinsicLibrary::genAtomicExch(mlir::Type resultType, - llvm::ArrayRef args) { +fir::ExtendedValue +IntrinsicLibrary::genAtomicExch(mlir::Type resultType, + llvm::ArrayRef args) { assert(args.size() == 2); - assert(args[1].getType().isIntOrFloat()); + mlir::Value arg0 = fir::getBase(args[0]); + mlir::Value arg1 = fir::getBase(args[1]); + assert(arg1.getType().isIntOrFloat()); mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::xchg; - return genAtomBinOp(builder, loc, binOp, args[0], args[1]); + return genAtomBinOp(builder, loc, binOp, arg0, arg1); } mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType, diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5c7f334bae8ba..d683e8b4caf85 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -161,6 +161,8 @@ attributes(device) subroutine testAtomic(aa, n) istat = atomiccas(a, i, 14) do i = 1, n istat = atomicxor(aa, i) + istat = atomiccas(aa, i, 14) + istat = atomicexch(aa, 0) end do end subroutine @@ -172,6 +174,9 @@ end subroutine ! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32 ! CHECK: fir.do_loop ! CHECK: llvm.atomicrmw _xor %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref> to !llvm.ptr +! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32 +! CHECK: llvm.atomicrmw xchg %{{.*}}, %c0{{.*}} seq_cst : !llvm.ptr, i32 attributes(device) subroutine testAtomic2() integer(8) :: a, i, istat