Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ struct IntrinsicLibrary {
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
template <int extent>
fir::ExtendedValue genAtomicAddVector(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
Expand Down
71 changes: 43 additions & 28 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,12 +290,12 @@ static constexpr IntrinsicHandler handlers[]{
{"atan2pi", &I::genAtanpi},
{"atand", &I::genAtand},
{"atanpi", &I::genAtanpi},
{"atomicadd_r2x2",
&I::genAtomicAddVector,
{"atomicadd_r4x2",
&I::genAtomicAddVector<2>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicadd_r4x2",
&I::genAtomicAddVector,
{"atomicadd_r4x4",
&I::genAtomicAddVector<4>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddd", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
Expand All @@ -306,6 +306,14 @@ static constexpr IntrinsicHandler handlers[]{
&I::genAtomicAddR2,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddvector_r2x2",
&I::genAtomicAddVector<2>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddvector_r4x2",
&I::genAtomicAddVector<2>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomiccasd",
&I::genAtomicCas,
Expand Down Expand Up @@ -3176,44 +3184,51 @@ IntrinsicLibrary::genAtomicAddR2(mlir::Type resultType,
mlir::ArrayRef<int64_t>{0});
}

template <int extent>
fir::ExtendedValue
IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
mlir::Value res = fir::AllocaOp::create(
builder, loc, fir::SequenceType::get({2}, resultType));
builder, loc, fir::SequenceType::get({extent}, resultType));
mlir::Value a = fir::getBase(args[0]);
if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}
auto vecTy = mlir::VectorType::get({2}, resultType);
auto vecTy = mlir::VectorType::get({extent}, resultType);
auto refTy = fir::ReferenceType::get(resultType);
mlir::Type i32Ty = builder.getI32Type();
mlir::Type idxTy = builder.getIndexType();
mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Value v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
fir::getBase(args[1]), zero);
mlir::Value v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
fir::getBase(args[1]), one);
mlir::Value v1 = fir::LoadOp::create(builder, loc, v1Coord);
mlir::Value v2 = fir::LoadOp::create(builder, loc, v2Coord);

// Extract the values from the array.
llvm::SmallVector<mlir::Value> values;
for (unsigned i = 0; i < extent; ++i) {
mlir::Value pos = builder.createIntegerConstant(loc, idxTy, i);
mlir::Value coord = fir::CoordinateOp::create(builder, loc, refTy,
fir::getBase(args[1]), pos);
mlir::Value value = fir::LoadOp::create(builder, loc, coord);
values.push_back(value);
}
// Pack extracted values into a vector to call the atomic add.
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecTy);
mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
for (unsigned i = 0; i < extent; ++i) {
mlir::Value insert = mlir::LLVM::InsertElementOp::create(
builder, loc, undef, values[i],
builder.createIntegerConstant(loc, i32Ty, i));
undef = insert;
}
// Atomic operation with a vector of values.
mlir::Value add =
genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
mlir::Value r1 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value r2 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 1));
mlir::Value c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
mlir::Value c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
fir::StoreOp::create(builder, loc, r1, c1);
fir::StoreOp::create(builder, loc, r2, c2);
mlir::Value ext = builder.createIntegerConstant(loc, idxTy, 2);
genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, undef);
// Store results in the result array.
for (unsigned i = 0; i < extent; ++i) {
mlir::Value r = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, i));
mlir::Value c = fir::CoordinateOp::create(
builder, loc, refTy, res, builder.createIntegerConstant(loc, idxTy, i));
fir::StoreOp::create(builder, loc, r, c);
}
mlir::Value ext = builder.createIntegerConstant(loc, idxTy, extent);
return fir::ArrayBoxValue(res, {ext});
}

Expand Down
20 changes: 19 additions & 1 deletion flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
Expand Up @@ -1179,13 +1179,22 @@ attributes(device) pure integer(4) function atomicaddr2(address, val)
end interface

interface atomicaddvector
attributes(device) pure function atomicadd_r2x2(address, val) result(z)
attributes(device) pure function atomicaddvector_r2x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(2), dimension(2), intent(inout) :: address
real(2), dimension(2), intent(in) :: val
real(2), dimension(2) :: z
end function

attributes(device) pure function atomicaddvector_r4x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(4), dimension(2), intent(inout) :: address
real(4), dimension(2), intent(in) :: val
real(4), dimension(2) :: z
end function
end interface

interface atomicaddreal4x2
attributes(device) pure function atomicadd_r4x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(4), dimension(2), intent(inout) :: address
Expand All @@ -1194,6 +1203,15 @@ attributes(device) pure function atomicadd_r4x2(address, val) result(z)
end function
end interface

interface atomicaddreal4x4
attributes(device) pure function atomicadd_r4x4(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(4), dimension(4), intent(inout) :: address
real(4), dimension(4), intent(in) :: val
real(4), dimension(4) :: z
end function
end interface

interface atomicsub
attributes(device) pure integer function atomicsubi(address, val)
!dir$ ignore_tkr (d) address, (d) val
Expand Down
24 changes: 20 additions & 4 deletions flang/test/Lower/CUDA/cuda-atomicadd.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,34 @@

! Test CUDA Fortran atmoicadd functions available cudadevice module

attributes(global) subroutine atomicaddvector_r2()
attributes(global) subroutine test_atomicaddvector_r2()
real(2), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPatomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK-LABEL: func.func @_QPtest_atomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf16>

attributes(global) subroutine atomicaddvector_r4()
attributes(global) subroutine test_atomicaddvector_r4()
real(4), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPatomicaddvector_r4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK-LABEL: func.func @_QPtest_atomicaddvector_r4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf32>

attributes(global) subroutine test_atomicadd_r2x4()
real(4), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicaddreal4x2(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPtest_atomicadd_r2x4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf32>

attributes(global) subroutine test_atomicadd_r4x4()
real(4), device :: a(4), tmp1(4), tmp2(4)
tmp1 = atomicaddreal4x4(a, tmp2)
end subroutine

! CHECK-LABEL: func.func @_QPtest_atomicadd_r4x4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<4xf32>
Loading