Skip to content

Commit 5829652

Browse files
authored
[MLIR][NVVM] Add clusterlaunchcontrol Ops (llvm#156585)
This change adds the `clusterlaunchcontrol.try.cancel` and `clusterlaunchcontrol.query.cancel` Ops to the NVVM dialect. Tests are added in `clusterlaunchcontrol.mlir`. PTX Reference: https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel
1 parent 50f8153 commit 5829652

File tree

4 files changed

+244
-0
lines changed

4 files changed

+244
-0
lines changed

mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4431,6 +4431,116 @@ def NVVM_DotAccumulate2WayOp : NVVM_Op<"dot.accumulate.2way"> {
44314431
}];
44324432
}
44334433

4434+
//===----------------------------------------------------------------------===//
4435+
// NVVM clusterlaunchcontrol Ops.
4436+
//===----------------------------------------------------------------------===//
4437+
4438+
def NVVM_ClusterLaunchControlTryCancelOp
4439+
: NVVM_Op<"clusterlaunchcontrol.try.cancel", [NVVMRequiresSM<100>]> {
4440+
let summary = "Request atomically canceling the launch of a cluster that has not started running yet";
4441+
let description = [{
4442+
`clusterlaunchcontrol.try.cancel` requests atomically canceling the launch
4443+
of a cluster that has not started running yet. It asynchronously writes an
4444+
opaque response to shared memory indicating whether the operation succeeded
4445+
or failed.
4446+
4447+
Operand `smemAddress` specifies the naturally aligned address of the
4448+
16-byte wide shared memory location where the request's response is written.
4449+
4450+
Operand `mbarrier` specifies the mbarrier object used to track the
4451+
completion of the asynchronous operation.
4452+
4453+
If `multicast` is specified, the response is asynchronously written to the
4454+
corresponding local shared memory location (specifed by `addr`) of each CTA
4455+
in the requesting cluster.
4456+
4457+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-try-cancel)
4458+
}];
4459+
4460+
let arguments = (ins UnitAttr:$multicast,
4461+
LLVM_PointerShared: $smemAddress,
4462+
LLVM_PointerShared: $mbarrier);
4463+
4464+
let assemblyFormat = "(`multicast` $multicast^ `,`)? $smemAddress `,` $mbarrier attr-dict";
4465+
4466+
let extraClassDeclaration = [{
4467+
static mlir::NVVM::IDArgPair
4468+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
4469+
llvm::IRBuilderBase &builder);
4470+
}];
4471+
4472+
string llvmBuilder = [{
4473+
auto [id, args] =
4474+
NVVM::ClusterLaunchControlTryCancelOp::getIntrinsicIDAndArgs(
4475+
*op, moduleTranslation, builder);
4476+
createIntrinsicCall(builder, id, args);
4477+
}];
4478+
}
4479+
4480+
def ClusterLaunchControlIsCanceled
4481+
: I32EnumCase<"IS_CANCELED", 0, "is_canceled">;
4482+
def ClusterLaunchControlGetFirstCTAIDX
4483+
: I32EnumCase<"GET_FIRST_CTA_ID_X", 1, "get_first_cta_id_x">;
4484+
def ClusterLaunchControlGetFirstCTAIDY
4485+
: I32EnumCase<"GET_FIRST_CTA_ID_Y", 2, "get_first_cta_id_y">;
4486+
def ClusterLaunchControlGetFirstCTAIDZ
4487+
: I32EnumCase<"GET_FIRST_CTA_ID_Z", 3, "get_first_cta_id_z">;
4488+
4489+
def ClusterLaunchControlQueryType
4490+
: I32Enum<"ClusterLaunchControlQueryType",
4491+
"NVVM ClusterLaunchControlQueryType",
4492+
[ClusterLaunchControlIsCanceled, ClusterLaunchControlGetFirstCTAIDX,
4493+
ClusterLaunchControlGetFirstCTAIDY, ClusterLaunchControlGetFirstCTAIDZ]> {
4494+
let cppNamespace = "::mlir::NVVM";
4495+
}
4496+
4497+
def ClusterLaunchControlQueryTypeAttr
4498+
: EnumAttr<NVVM_Dialect,
4499+
ClusterLaunchControlQueryType, "cluster_launch_control_query_type"> {
4500+
let assemblyFormat = "$value";
4501+
}
4502+
4503+
def NVVM_ClusterLaunchControlQueryCancelOp
4504+
: NVVM_Op<"clusterlaunchcontrol.query.cancel", [NVVMRequiresSM<100>]> {
4505+
let summary = "Query the response of a clusterlaunchcontrol.try.cancel operation";
4506+
let description = [{
4507+
`clusterlaunchcontrol.query.cancel` queries the response of a
4508+
`clusterlaunchcontrol.try.cancel` operation specified by operand
4509+
`try_cancel_response`.
4510+
4511+
Operand `query_type` specifies the type of query to perform and can be one
4512+
of the following:
4513+
- `is_canceled` : Returns true if the try cancel request succeeded,
4514+
and false otherwise.
4515+
- `get_first_cta_id_{x/y/z}` : Returns the x, y, or z coordinate of the
4516+
first CTA in the canceled cluster. Behaviour is defined only if the try
4517+
cancel request succeeded.
4518+
4519+
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/#parallel-synchronization-and-communication-instructions-clusterlaunchcontrol-query-cancel)
4520+
}];
4521+
4522+
let arguments = (ins ClusterLaunchControlQueryTypeAttr:$query_type,
4523+
I128:$try_cancel_response);
4524+
let results = (outs AnyTypeOf<[I1, I32]>:$res);
4525+
4526+
let assemblyFormat = "`query` `=` $query_type `,` $try_cancel_response attr-dict `:` type($res)";
4527+
4528+
let hasVerifier = 1;
4529+
4530+
let extraClassDeclaration = [{
4531+
static mlir::NVVM::IDArgPair
4532+
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
4533+
llvm::IRBuilderBase &builder);
4534+
}];
4535+
4536+
string llvmBuilder = [{
4537+
auto [id, args] =
4538+
NVVM::ClusterLaunchControlQueryCancelOp::getIntrinsicIDAndArgs(
4539+
*op, moduleTranslation, builder);
4540+
$res = createIntrinsicCall(builder, id, args);
4541+
}];
4542+
}
4543+
44344544
//===----------------------------------------------------------------------===//
44354545
// NVVM target attribute.
44364546
//===----------------------------------------------------------------------===//

mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,6 +1401,24 @@ LogicalResult NVVM::PrefetchOp::verify() {
14011401
return success();
14021402
}
14031403

1404+
LogicalResult NVVM::ClusterLaunchControlQueryCancelOp::verify() {
1405+
switch (getQueryType()) {
1406+
case NVVM::ClusterLaunchControlQueryType::IS_CANCELED:
1407+
if (!getType().isInteger(1))
1408+
return emitOpError("is_canceled query type returns an i1");
1409+
break;
1410+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_X:
1411+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Y:
1412+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Z:
1413+
if (!getType().isInteger(32)) {
1414+
return emitOpError("get_first_cta_id_x, get_first_cta_id_y, "
1415+
"get_first_cta_id_z query types return an i32");
1416+
}
1417+
break;
1418+
}
1419+
return success();
1420+
}
1421+
14041422
/// Packs the given `field` into the `result`.
14051423
/// The `result` is 64-bits and each `field` can be 32-bits or narrower.
14061424
static llvm::Value *
@@ -2087,6 +2105,51 @@ bool NVVM::InlinePtxOp::getAsmValues(
20872105
return false; // No manual mapping needed
20882106
}
20892107

2108+
NVVM::IDArgPair ClusterLaunchControlTryCancelOp::getIntrinsicIDAndArgs(
2109+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
2110+
auto curOp = cast<NVVM::ClusterLaunchControlTryCancelOp>(op);
2111+
llvm::SmallVector<llvm::Value *> args;
2112+
args.push_back(mt.lookupValue(curOp.getSmemAddress()));
2113+
args.push_back(mt.lookupValue(curOp.getMbarrier()));
2114+
2115+
llvm::Intrinsic::ID intrinsicID =
2116+
curOp.getMulticast()
2117+
? llvm::Intrinsic::
2118+
nvvm_clusterlaunchcontrol_try_cancel_async_multicast_shared
2119+
: llvm::Intrinsic::nvvm_clusterlaunchcontrol_try_cancel_async_shared;
2120+
2121+
return {intrinsicID, args};
2122+
}
2123+
2124+
NVVM::IDArgPair ClusterLaunchControlQueryCancelOp::getIntrinsicIDAndArgs(
2125+
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
2126+
auto curOp = cast<NVVM::ClusterLaunchControlQueryCancelOp>(op);
2127+
llvm::SmallVector<llvm::Value *> args;
2128+
args.push_back(mt.lookupValue(curOp.getTryCancelResponse()));
2129+
2130+
llvm::Intrinsic::ID intrinsicID;
2131+
2132+
switch (curOp.getQueryType()) {
2133+
case NVVM::ClusterLaunchControlQueryType::IS_CANCELED:
2134+
intrinsicID =
2135+
llvm::Intrinsic::nvvm_clusterlaunchcontrol_query_cancel_is_canceled;
2136+
break;
2137+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_X:
2138+
intrinsicID = llvm::Intrinsic::
2139+
nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_x;
2140+
break;
2141+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Y:
2142+
intrinsicID = llvm::Intrinsic::
2143+
nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_y;
2144+
break;
2145+
case NVVM::ClusterLaunchControlQueryType::GET_FIRST_CTA_ID_Z:
2146+
intrinsicID = llvm::Intrinsic::
2147+
nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_z;
2148+
break;
2149+
}
2150+
return {intrinsicID, args};
2151+
}
2152+
20902153
//===----------------------------------------------------------------------===//
20912154
// NVVMDialect initialization, type parsing, and registration.
20922155
//===----------------------------------------------------------------------===//
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
2+
3+
llvm.func @clusterlaunchcontrol_try_cancel(%addr: !llvm.ptr<3>, %mbar: !llvm.ptr<3>) {
4+
// CHECK-LABEL: define void @clusterlaunchcontrol_try_cancel(ptr addrspace(3) %0, ptr addrspace(3) %1) {
5+
// CHECK-NEXT: call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async.shared(ptr addrspace(3) %0, ptr addrspace(3) %1)
6+
// CHECK-NEXT: ret void
7+
// CHECK-NEXT: }
8+
nvvm.clusterlaunchcontrol.try.cancel %addr, %mbar
9+
llvm.return
10+
}
11+
12+
llvm.func @clusterlaunchcontrol_try_cancel_multicast(%addr: !llvm.ptr<3>, %mbar: !llvm.ptr<3>) {
13+
// CHECK-LABEL: define void @clusterlaunchcontrol_try_cancel_multicast(ptr addrspace(3) %0, ptr addrspace(3) %1) {
14+
// CHECK-NEXT: call void @llvm.nvvm.clusterlaunchcontrol.try_cancel.async.multicast.shared(ptr addrspace(3) %0, ptr addrspace(3) %1)
15+
// CHECK-NEXT: ret void
16+
// CHECK-NEXT: }
17+
nvvm.clusterlaunchcontrol.try.cancel multicast, %addr, %mbar
18+
llvm.return
19+
}
20+
21+
llvm.func @clusterlaunchcontrol_query_cancel_is_canceled(%try_cancel_response: i128) {
22+
// CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_is_canceled(i128 %0) {
23+
// CHECK-NEXT: %2 = call i1 @llvm.nvvm.clusterlaunchcontrol.query_cancel.is_canceled(i128 %0)
24+
// CHECK-NEXT: ret void
25+
// CHECK-NEXT: }
26+
%res = nvvm.clusterlaunchcontrol.query.cancel query = is_canceled, %try_cancel_response : i1
27+
llvm.return
28+
}
29+
30+
llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_x(%try_cancel_response: i128) {
31+
// CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_x(i128 %0) {
32+
// CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.x(i128 %0)
33+
// CHECK-NEXT: ret void
34+
// CHECK-NEXT: }
35+
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i32
36+
llvm.return
37+
}
38+
39+
llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_y(%try_cancel_response: i128) {
40+
// CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_y(i128 %0) {
41+
// CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.y(i128 %0)
42+
// CHECK-NEXT: ret void
43+
// CHECK-NEXT: }
44+
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_y, %try_cancel_response : i32
45+
llvm.return
46+
}
47+
48+
llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_z(%try_cancel_response: i128) {
49+
// CHECK-LABEL: define void @clusterlaunchcontrol_query_cancel_get_first_cta_id_z(i128 %0) {
50+
// CHECK-NEXT: %2 = call i32 @llvm.nvvm.clusterlaunchcontrol.query_cancel.get_first_ctaid.z(i128 %0)
51+
// CHECK-NEXT: ret void
52+
// CHECK-NEXT: }
53+
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_z, %try_cancel_response : i32
54+
llvm.return
55+
}

mlir/test/Target/LLVMIR/nvvmir-invalid.mlir

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,3 +535,19 @@ llvm.func @nanosleep() {
535535
nvvm.nanosleep 100000000000000
536536
llvm.return
537537
}
538+
539+
// -----
540+
541+
llvm.func @clusterlaunchcontrol_query_cancel_is_canceled_invalid_return_type(%try_cancel_response: i128) {
542+
// expected-error@+1 {{'nvvm.clusterlaunchcontrol.query.cancel' op is_canceled query type returns an i1}}
543+
%res = nvvm.clusterlaunchcontrol.query.cancel query = is_canceled, %try_cancel_response : i32
544+
llvm.return
545+
}
546+
547+
// -----
548+
549+
llvm.func @clusterlaunchcontrol_query_cancel_get_first_cta_id_invalid_return_type(%try_cancel_response: i128) {
550+
// expected-error@+1 {{'nvvm.clusterlaunchcontrol.query.cancel' op get_first_cta_id_x, get_first_cta_id_y, get_first_cta_id_z query types return an i32}}
551+
%res = nvvm.clusterlaunchcontrol.query.cancel query = get_first_cta_id_x, %try_cancel_response : i1
552+
llvm.return
553+
}

0 commit comments

Comments
 (0)