Skip to content

Commit d400fe5

Browse files
committed
[flang][cuda] Add interface and lowering for fence_proxy_async
1 parent ca55c07 commit d400fe5

File tree

4 files changed

+28
-0
lines changed

4 files changed

+28
-0
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ struct IntrinsicLibrary {
274274
llvm::ArrayRef<fir::ExtendedValue>);
275275
template <Extremum, ExtremumBehavior>
276276
mlir::Value genExtremum(mlir::Type, llvm::ArrayRef<mlir::Value>);
277+
void genFenceProxyAsync(llvm::ArrayRef<fir::ExtendedValue>);
277278
mlir::Value genFloor(mlir::Type, llvm::ArrayRef<mlir::Value>);
278279
mlir::Value genFraction(mlir::Type resultType,
279280
mlir::ArrayRef<mlir::Value> args);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,10 @@ static constexpr IntrinsicHandler handlers[]{
502502
&I::genExtendsTypeOf,
503503
{{{"a", asBox}, {"mold", asBox}}},
504504
/*isElemental=*/false},
505+
{"fence_proxy_async",
506+
&I::genFenceProxyAsync,
507+
{},
508+
/*isElemental=*/false},
505509
{"findloc",
506510
&I::genFindloc,
507511
{{{"array", asBox},
@@ -4354,6 +4358,17 @@ IntrinsicLibrary::genExtendsTypeOf(mlir::Type resultType,
43544358
fir::getBase(args[1])));
43554359
}
43564360

4361+
// FENCE_PROXY_ASYNC (CUDA)
4362+
void IntrinsicLibrary::genFenceProxyAsync(
4363+
llvm::ArrayRef<fir::ExtendedValue> args) {
4364+
assert(args.size() == 0);
4365+
auto kind = mlir::NVVM::ProxyKindAttr::get(
4366+
builder.getContext(), mlir::NVVM::ProxyKind::async_shared);
4367+
auto space = mlir::NVVM::SharedSpaceAttr::get(
4368+
builder.getContext(), mlir::NVVM::SharedSpace::shared_cta);
4369+
mlir::NVVM::FenceProxyOp::create(builder, loc, kind, space);
4370+
}
4371+
43574372
// FINDLOC
43584373
fir::ExtendedValue
43594374
IntrinsicLibrary::genFindloc(mlir::Type resultType,

flang/module/cudadevice.f90

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,11 @@ attributes(device) function barrier_arrive_cnt(barrier, count) result(token)
20082008
end function
20092009
end interface
20102010

2011+
interface
2012+
attributes(device) subroutine fence_proxy_async()
2013+
end subroutine
2014+
end interface
2015+
20112016
contains
20122017

20132018
attributes(device) subroutine syncthreads()

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,3 +419,10 @@ end subroutine
419419
! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr
420420
! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3>
421421
! CHECK: nvvm.mbarrier.arrive.expect_tx %[[SHARED_PTR]], %{{.*}} : !llvm.ptr<3>, i32
422+
423+
attributes(global) subroutine test_fence()
424+
call fence_proxy_async()
425+
end subroutine
426+
427+
! CHECK-LABEL: func.func @_QPtest_fence()
428+
! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>}

0 commit comments

Comments
 (0)