diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 537c817e32ad8..9a10ce949290a 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -2734,6 +2734,20 @@ mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType, mlir::Value arg1 = args[1]; mlir::Value arg2 = args[2]; + + auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value { + if (mlir::isa(arg.getType())) + return builder.create(loc, builder.getI32Type(), + arg); + if (mlir::isa(arg.getType())) + return builder.create(loc, builder.getI64Type(), + arg); + return arg; + }; + + arg1 = bitCastFloat(arg1); + arg2 = bitCastFloat(arg2); + if (arg1.getType() != arg2.getType()) { // arg1 and arg2 need to have the same type in AtomicCmpXchgOp. arg2 = builder.createConvert(loc, arg1.getType(), arg2); diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index f2b4eb57ad555..c651d34c55093 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -175,3 +175,26 @@ end subroutine ! CHECK: %[[VAL:.*]] = fir.convert %c14{{.*}} : (i32) -> i64 ! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref to !llvm.ptr ! CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %[[VAL]] acq_rel monotonic : !llvm.ptr, i64 + +attributes(device) subroutine testAtomic3() + real :: a, i, istat + istat = atomiccas(a, i, 14.0) +end subroutine + +! CHECK-LABEL: func.func @_QPtestatomic3() +! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f32 to i32 +! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f32 to i32 +! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref to !llvm.ptr +! CHECK: llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i32 + +attributes(device) subroutine testAtomic4() + real(8) :: a, i, istat + istat = atomiccas(a, i, 14.0d0) +end subroutine + +! CHECK-LABEL: func.func @_QPtestatomic4() +! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f64 to i64 +! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f64 to i64 +! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref to !llvm.ptr +! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64 +! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)>