Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2617,6 +2617,30 @@ def Tcgen05GroupKindAttr :
let assemblyFormat = "`<` $value `>`";
}

def Tcgen05FenceBefore : I32EnumAttrCase<"BEFORE_THREAD_SYNC", 0, "before">;
def Tcgen05FenceAfter : I32EnumAttrCase<"AFTER_THREAD_SYNC", 1, "after">;
def Tcgen05FenceKind : I32EnumAttr<"Tcgen05FenceKind", "NVVM Tcgen05 fence kind",
[Tcgen05FenceBefore, Tcgen05FenceAfter]> {
let genSpecializedAttr = 0;
let cppNamespace = "::mlir::NVVM";
}
def Tcgen05FenceKindAttr :
EnumAttr<NVVM_Dialect, Tcgen05FenceKind, "tcgen05_fence"> {
let assemblyFormat = "`<` $value `>`";
}

def Tcgen05WaitLoad : I32EnumAttrCase<"LOAD", 0, "load">;
def Tcgen05WaitStore : I32EnumAttrCase<"STORE", 1, "store">;
def Tcgen05WaitKind : I32EnumAttr<"Tcgen05WaitKind", "NVVM Tcgen05 wait kind",
[Tcgen05WaitLoad, Tcgen05WaitStore]> {
let genSpecializedAttr = 0;
let cppNamespace = "::mlir::NVVM";
}
def Tcgen05WaitKindAttr :
EnumAttr<NVVM_Dialect, Tcgen05WaitKind, "tcgen05_wait"> {
let assemblyFormat = "`<` $value `>`";
}

def NVVM_Tcgen05AllocOp : NVVM_Op<"tcgen05.alloc"> {
let summary = "Tcgen05 alloc operation";
let description = [{
Expand Down Expand Up @@ -2701,6 +2725,91 @@ def NVVM_Tcgen05RelinquishAllocPermitOp : NVVM_Op<"tcgen05.relinquish_alloc_perm
}];
}

def NVVM_Tcgen05FenceOp : NVVM_Op<"tcgen05.fence"> {
let summary = "Tcgen05 fence operations";
let description = [{
The `tcgen05.fence<before>` orders all prior async tcgen05 operations
with respect to the subsequent tcgen05 and execution ordering operations.
The `tcgen05.fence<after>` orders all subsequent async tcgen05 operations
with respect to the prior tcgen05 and execution ordering operations.

[For more information refer to the PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tensorcore-5th-generation-instructions-tcgen05-fence)
}];

let arguments = (ins Tcgen05FenceKindAttr:$kind);
let assemblyFormat = "$kind attr-dict";

string llvmBuilder = [{
auto id = ($kind == NVVM::Tcgen05FenceKind::BEFORE_THREAD_SYNC)
? llvm::Intrinsic::nvvm_tcgen05_fence_before_thread_sync
: llvm::Intrinsic::nvvm_tcgen05_fence_after_thread_sync;
createIntrinsicCall(builder, id);
}];
}

def NVVM_Tcgen05WaitOp : NVVM_Op<"tcgen05.wait"> {
let summary = "Tcgen05 wait operations";
let description = [{
The `tcgen05.wait<load>` causes the executing thread to block until
all prior `tcgen05.ld` operations issued by the executing thread
have completed. Similarly, the `tcgen05.wait<store>` causes the executing
thread to block until all prior `tcgen05.st` operations issued by the
executing thread have completed.
[For more information refer PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen05-instructions-tcgen05-wait)
}];

let arguments = (ins Tcgen05WaitKindAttr:$kind);
let assemblyFormat = "$kind attr-dict";

string llvmBuilder = [{
auto id = ($kind == NVVM::Tcgen05WaitKind::LOAD)
? llvm::Intrinsic::nvvm_tcgen05_wait_ld
: llvm::Intrinsic::nvvm_tcgen05_wait_st;
createIntrinsicCall(builder, id);
}];
}

def NVVM_Tcgen05CommitOp : NVVM_Op<"tcgen05.commit"> {
let summary = "Tcgen05 commit operations";
let description = [{
The `tcgen05.commit` makes the mbarrier object, specified by
the operand `addr`, track the completion of all the prior
async-tcgen05 operations initiated by the executing thread.
The multicast variants allow signaling on the mbarrier objects
of multiple CTAs within the cluster. Operand `multicastMask`,
when present, specifies the destination CTAs in the cluster such
that each bit position in the 16-bit `multicastMask` operand
corresponds to the `nvvm.read.ptx.sreg.ctaid` of the destination CTA.
[For more information refer PTX ISA]
(https://docs.nvidia.com/cuda/parallel-thread-execution/#tcgen-async-sync-operations-commit)
}];

let arguments = (ins
AnyTypeOf<[LLVM_AnyPointer, LLVM_PointerShared]>:$addr,
Optional<I16>:$multicastMask,
DefaultValuedAttr<Tcgen05GroupKindAttr, "Tcgen05GroupKind::CTA_1">:$group);

let assemblyFormat = [{
$addr (`,` `multicast_mask` `=` $multicastMask^)?
attr-dict `:` type(operands)
}];

let extraClassDeclaration = [{
static llvm::Intrinsic::ID
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
llvm::SmallVector<llvm::Value *> &args);
}];

string llvmBuilder = [{
llvm::SmallVector<llvm::Value *> args;
auto id = NVVM::Tcgen05CommitOp::getIntrinsicIDAndArgs(
*op, moduleTranslation, args);
createIntrinsicCall(builder, id, args);
}];
}

//===----------------------------------------------------------------------===//
// NVVM target attribute.
//===----------------------------------------------------------------------===//
Expand Down
30 changes: 30 additions & 0 deletions mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1284,6 +1284,36 @@ llvm::Intrinsic::ID Tcgen05DeallocOp::getIntrinsicIDAndArgs(
return id;
}

#define TCGEN05_COMMIT_IMPL(cg, is_shared, mc) \
is_shared ? llvm::Intrinsic::nvvm_tcgen05_commit##mc##_shared##_##cg \
: llvm::Intrinsic::nvvm_tcgen05_commit##mc##_##cg

#define GET_TCGEN05_COMMIT_ID(cta_group, is_shared, has_mc) \
has_mc ? TCGEN05_COMMIT_IMPL(cta_group, is_shared, _mc) \
: TCGEN05_COMMIT_IMPL(cta_group, is_shared, )

llvm::Intrinsic::ID
Tcgen05CommitOp::getIntrinsicIDAndArgs(Operation &op,
LLVM::ModuleTranslation &mt,
llvm::SmallVector<llvm::Value *> &args) {
auto curOp = cast<NVVM::Tcgen05CommitOp>(op);
unsigned AS = llvm::cast<LLVM::LLVMPointerType>(curOp.getAddr().getType())
.getAddressSpace();
bool isShared = AS == NVVMMemorySpace::kSharedMemorySpace;
bool hasMulticast = curOp.getMulticastMask() ? true : false;
bool is2CTAMode = curOp.getGroup() == Tcgen05GroupKind::CTA_2;

auto id = is2CTAMode ? GET_TCGEN05_COMMIT_ID(cg2, isShared, hasMulticast)
: GET_TCGEN05_COMMIT_ID(cg1, isShared, hasMulticast);

// Fill the Intrinsic Args
args.push_back(mt.lookupValue(curOp.getAddr()));
if (hasMulticast)
args.push_back(mt.lookupValue(curOp.getMulticastMask()));

return id;
}

/// Infer the result ranges for the NVVM SpecialRangeableRegisterOp that might
/// have ConstantRangeAttr.
static void nvvmInferResultRanges(Operation *op, Value result,
Expand Down
56 changes: 56 additions & 0 deletions mlir/test/Target/LLVMIR/nvvm/tcgen05-barriers.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// RUN: mlir-opt -split-input-file -verify-diagnostics %s
// RUN: mlir-translate -mlir-to-llvmir -split-input-file -verify-diagnostics %s | FileCheck %s --check-prefix=CHECK-LLVM

// CHECK-LABEL: @llvm_nvvm_tcgen05_fence
llvm.func @llvm_nvvm_tcgen05_fence() {
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.before.thread.sync()
nvvm.tcgen05.fence #nvvm.tcgen05_fence<before>

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.fence.after.thread.sync()
nvvm.tcgen05.fence #nvvm.tcgen05_fence<after>

llvm.return
}

// CHECK-LABEL: @llvm_nvvm_tcgen05_wait
llvm.func @llvm_nvvm_tcgen05_wait() {
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.ld()
nvvm.tcgen05.wait #nvvm.tcgen05_wait<load>

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.wait.st()
nvvm.tcgen05.wait #nvvm.tcgen05_wait<store>

llvm.return
}

// CHECK-LABEL: @llvm_nvvm_tcgen05_commit_generic
llvm.func @llvm_nvvm_tcgen05_commit_generic(%barrier : !llvm.ptr, %cta_mask : i16) {
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.cg1(ptr %{{.*}})
nvvm.tcgen05.commit %barrier : !llvm.ptr

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.cg2(ptr %{{.*}})
nvvm.tcgen05.commit %barrier {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg1(ptr %{{.*}}, i16 %{{.*}})
nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr, i16

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.cg2(ptr %{{.*}}, i16 %{{.*}})
nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr, i16
llvm.return
}

// CHECK-LABEL: @llvm_nvvm_tcgen05_commit_shared
llvm.func @llvm_nvvm_tcgen05_commit_shared(%barrier : !llvm.ptr<3>, %cta_mask : i16) {
// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.shared.cg1(ptr addrspace(3) %{{.*}})
nvvm.tcgen05.commit %barrier : !llvm.ptr<3>

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.shared.cg2(ptr addrspace(3) %{{.*}})
nvvm.tcgen05.commit %barrier {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<3>

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg1(ptr addrspace(3) %{{.*}}, i16 %{{.*}})
nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask : !llvm.ptr<3>, i16

// CHECK-LLVM: call void @llvm.nvvm.tcgen05.commit.mc.shared.cg2(ptr addrspace(3) %{{.*}}, i16 %{{.*}})
nvvm.tcgen05.commit %barrier, multicast_mask = %cta_mask {group = #nvvm.tcgen05_group<cta_2>} : !llvm.ptr<3>, i16
llvm.return
}