From ac74d6a7170ddd4c8c09c6b5b0400e229e72bbe4 Mon Sep 17 00:00:00 2001 From: Renaud-K Date: Mon, 27 Jan 2025 17:57:17 -0800 Subject: [PATCH 1/2] Adding support for more atomic calls --- .../flang/Optimizer/Builder/IntrinsicCall.h | 7 ++ flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 88 +++++++++++++ flang/module/cudadevice.f90 | 117 +++++++++++++++++- flang/test/Lower/CUDA/cuda-device-proc.cuf | 40 ++++++ 4 files changed, 248 insertions(+), 4 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h index e2ea89483ef11..52ada48503332 100644 --- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h +++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h @@ -186,6 +186,13 @@ struct IntrinsicLibrary { fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef); mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef); mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef); + mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef); fir::ExtendedValue genCommandArgumentCount(mlir::Type, llvm::ArrayRef); mlir::Value genAsind(mlir::Type, llvm::ArrayRef); diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index db9918c265164..5edc050f7f82c 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -46,12 +46,15 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/Math/IR/Math.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include // temporary -- only used in genIeeeGetOrSetModesOrStatus +#include +#include #include #define DEBUG_TYPE "flang-lower-intrinsic" @@ -151,6 +154,22 @@ static constexpr IntrinsicHandler handlers[]{ {"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false}, {"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false}, {"atomicaddl", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicdeci", &I::genAtomicDec, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicinci", &I::genAtomicInc, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmaxd", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmaxf", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmaxi", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmaxl", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmind", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicminf", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicmini", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicminl", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicori", &I::genAtomicOr, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicsubd", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicsubf", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, + {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false}, {"bessel_jn", &I::genBesselJn, {{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}}, @@ -2600,6 +2619,75 @@ mlir::Value IntrinsicLibrary::genAtomicAdd(mlir::Type resultType, return genAtomBinOp(builder, loc, binOp, args[0], args[1]); } +mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + + mlir::LLVM::AtomicBinOp binOp = + mlir::isa(args[1].getType()) + ? mlir::LLVM::AtomicBinOp::sub + : mlir::LLVM::AtomicBinOp::fsub; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicAnd(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + assert(mlir::isa(args[1].getType())); + + mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_and; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + assert(mlir::isa(args[1].getType())); + + mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_or; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + assert(mlir::isa(args[1].getType())); + + mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::udec_wrap; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + assert(mlir::isa(args[1].getType())); + + mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::uinc_wrap; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicMax(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + + mlir::LLVM::AtomicBinOp binOp = + mlir::isa(args[1].getType()) + ? mlir::LLVM::AtomicBinOp::max + : mlir::LLVM::AtomicBinOp::fmax; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + +mlir::Value IntrinsicLibrary::genAtomicMin(mlir::Type resultType, + llvm::ArrayRef args) { + assert(args.size() == 2); + + mlir::LLVM::AtomicBinOp binOp = + mlir::isa(args[1].getType()) + ? mlir::LLVM::AtomicBinOp::min + : mlir::LLVM::AtomicBinOp::fmin; + return genAtomBinOp(builder, loc, binOp, args[0], args[1]); +} + // ASSOCIATED fir::ExtendedValue IntrinsicLibrary::genAssociated(mlir::Type resultType, diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90 index 53b6beaaf1ad8..af516a1866fa9 100644 --- a/flang/module/cudadevice.f90 +++ b/flang/module/cudadevice.f90 @@ -106,10 +106,10 @@ attributes(device) pure real function atomicaddf(address, val) real, intent(inout) :: address real, value :: val end function - attributes(device) pure real*8 function atomicaddd(address, val) + attributes(device) pure real(8) function atomicaddd(address, val) !dir$ ignore_tkr (d) address, (d) val - real*8, intent(inout) :: address - real*8, value :: val + real(8), intent(inout) :: address + real(8), value :: val end function attributes(device) pure integer(8) function atomicaddl(address, val) !dir$ ignore_tkr (d) address, (d) val @@ -117,6 +117,115 @@ attributes(device) pure integer(8) function atomicaddl(address, val) integer(8), value :: val end function end interface -public :: atomicadd + public :: atomicadd + + interface atomicsub + attributes(device) pure integer function atomicsubi(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + attributes(device) pure real function atomicsubf(address, val) + !dir$ ignore_tkr (d) address, (d) val + real, intent(inout) :: address + real, value :: val + end function + attributes(device) pure real(8) function atomicsubd(address, val) + !dir$ ignore_tkr (d) address, (d) val + real(8), intent(inout) :: address + real(8), value :: val + end function + attributes(device) pure integer(8) function atomicsubl(address, val) + !dir$ ignore_tkr (d) address, (dk) val + integer(8), intent(inout) :: address + integer(8), value :: val + end function + end interface + public :: atomicsub + + interface atomicmax + attributes(device) pure integer function atomicmaxi(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + attributes(device) pure real function atomicmaxf(address, val) + !dir$ ignore_tkr (d) address, (d) val + real, intent(inout) :: address + real, value :: val + end function + attributes(device) pure real(8) function atomicmaxd(address, val) + !dir$ ignore_tkr (d) address, (d) val + real(8), intent(inout) :: address + real(8), value :: val + end function + attributes(device) pure integer(8) function atomicmaxl(address, val) + !dir$ ignore_tkr (d) address, (dk) val + integer(8), intent(inout) :: address + integer(8), value :: val + end function + end interface + public :: atomicmax + + interface atomicmin + attributes(device) pure integer function atomicmini(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + attributes(device) pure real function atomicminf(address, val) + !dir$ ignore_tkr (d) address, (d) val + real, intent(inout) :: address + real, value :: val + end function + attributes(device) pure real(8) function atomicmind(address, val) + !dir$ ignore_tkr (d) address, (d) val + real(8), intent(inout) :: address + real(8), value :: val + end function + attributes(device) pure integer(8) function atomicminl(address, val) + !dir$ ignore_tkr (d) address, (dk) val + integer(8), intent(inout) :: address + integer(8), value :: val + end function + end interface + public :: atomicmin + + interface atomicand + attributes(device) pure integer function atomicandi(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + end interface + public :: atomicand + + interface atomicor + attributes(device) pure integer function atomicori(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + end interface + public :: atomicor + + interface atomicinc + attributes(device) pure integer function atomicinci(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + end interface + public :: atomicinc + + interface atomicdec + attributes(device) pure integer function atomicdeci(address, val) + !dir$ ignore_tkr (d) address, (d) val + integer, intent(inout) :: address + integer, value :: val + end function + end interface + public :: atomicdec + end module diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 661e5728bf85b..7ef391c7d308b 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -23,6 +23,26 @@ attributes(global) subroutine devsub() al = atomicadd(al, 1_8) af = atomicadd(af, 1.0_4) ad = atomicadd(ad, 1.0_8) + + ai = atomicsub(ai, 1_4) + al = atomicsub(al, 1_8) + af = atomicsub(af, 1.0_4) + ad = atomicsub(ad, 1.0_8) + + ai = atomicmax(ai, 1_4) + al = atomicmax(al, 1_8) + af = atomicmax(af, 1.0_4) + ad = atomicmax(ad, 1.0_8) + + ai = atomicmin(ai, 1_4) + al = atomicmin(al, 1_8) + af = atomicmin(af, 1.0_4) + ad = atomicmin(ad, 1.0_8) + + ai = atomicand(ai, 1_4) + ai = atomicor(ai, 1_4) + ai = atomicinc(ai, 1_4) + ai = atomicdec(ai, 1_4) end ! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc} @@ -39,6 +59,26 @@ end ! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 ! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 +! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 +! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 +! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 + +! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 +! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 +! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 + +! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 +! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 +! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 + +! CHECK: %{{.*}} = llvm.atomicrmw _and %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw _or %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 +! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 + ! CHECK: func.func private @llvm.nvvm.barrier0() ! CHECK: func.func private @__syncwarp(!fir.ref {cuf.data_attr = #cuf.cuda}) attributes {cuf.proc_attr = #cuf.cuda_proc, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs} ! CHECK: func.func private @llvm.nvvm.membar.gl() From 6f7108f891f4730d662f35798d3c0a80787aa35f Mon Sep 17 00:00:00 2001 From: Renaud-K Date: Mon, 27 Jan 2025 18:01:44 -0800 Subject: [PATCH 2/2] Removing includes from MemRef experiments --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5edc050f7f82c..e75a29c968d17 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -46,15 +46,12 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/Dialect/Math/IR/Math.h" -#include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include // temporary -- only used in genIeeeGetOrSetModesOrStatus -#include -#include #include #define DEBUG_TYPE "flang-lower-intrinsic"