Skip to content

Commit 6d3ed0b

Browse files
authored
[DIALECT] Rename triton_gpu to ttg and triton_nvidia_gpu to ttng (#5266)
It may cause changes for downstream tasks but we think it's beneficial to shorten dialect name and make them consistent. That is, we are using `tt` to represent the `triton` dialect.
1 parent 2003685 commit 6d3ed0b

File tree

96 files changed

+4781
-4786
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+4781
-4786
lines changed

bin/triton-tensor-layout.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ using namespace mlir;
2222
// clang-format off
2323
// Example usage:
2424
//
25-
// triton-tensor-layout -l "#triton_gpu.nvidia_mma<{versionMajor = 3, versionMinor = 0, warpsPerCTA = [8, 1], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0], instrShape = [16, 256, 32]}>" -t "tensor<128x256xf16>"
25+
// triton-tensor-layout -l "#ttg.nvidia_mma<{versionMajor = 3, versionMinor = 0, warpsPerCTA = [8, 1], CTAsPerCGA = [1, 1], CTASplitNum = [1, 1], CTAOrder = [1, 0], instrShape = [16, 256, 32]}>" -t "tensor<128x256xf16>"
2626
//
2727
// triton-tensor-layout -i input.mlir -t "tensor<1x128x128xf16>" -o output.txt
2828
//
2929
// triton-tensor-layout -i input.mlir -t "tensor<1x128x128xf16>" -o output.txt -alias-names="blocked,mma" -use-hw-view
3030
//
3131
// An input file usually looks like:
3232
// '''
33-
// #mma = #triton_gpu.amd_mfma<{versionMajor = 2, versionMinor = 0, warpsPerCTA = [1, 1, 8], instrShape = [32, 32], isTransposed = false}>
34-
// #blocked = #triton_gpu.blocked<{sizePerThread = [1, 8, 1], threadsPerWarp = [1, 16, 4], warpsPerCTA = [1, 1, 8], order = [0, 1, 2]}>
33+
// #mma = #ttg.amd_mfma<{versionMajor = 2, versionMinor = 0, warpsPerCTA = [1, 1, 8], instrShape = [32, 32], isTransposed = false}>
34+
// #blocked = #ttg.blocked<{sizePerThread = [1, 8, 1], threadsPerWarp = [1, 16, 4], warpsPerCTA = [1, 1, 8], order = [0, 1, 2]}>
3535
// '''
3636
// clang-format on
3737

@@ -83,7 +83,7 @@ LogicalResult layoutPrint(RankedTensorType tensorType, raw_ostream &os) {
8383
StringRef dialectName = tensorType.getEncoding().getDialect().getNamespace();
8484

8585
// Dispatch to the corresponding dialect helper function to print the layout.
86-
if (dialectName == "triton_gpu") {
86+
if (dialectName == "ttg") {
8787
os << triton::gpu::getLayoutStr(tensorType, UseHWPointOfView);
8888
return success();
8989
}

include/triton/Analysis/Allocation.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ class Allocation {
180180
private:
181181
/// A class that represents a shared memory buffer
182182
struct BufferT {
183-
/// Explicit: triton_gpu.local_alloc
184-
/// Scratch: triton_gpu.convert_layout
183+
/// Explicit: ttg.local_alloc
184+
/// Scratch: ttg.convert_layout
185185
/// Virtual: triton.call
186186
enum class BufferKind { Explicit, Scratch, Virtual };
187187

include/triton/Conversion/TritonGPUToLLVM/Utility.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -400,7 +400,7 @@ inline Value getGlobalScratchPtr(Location loc, RewriterBase &rewriter,
400400

401401
ModuleOp mod = funcOp.getOperation()->getParentOfType<ModuleOp>();
402402
auto allocSizeAttr = mod.getOperation()->getAttrOfType<mlir::IntegerAttr>(
403-
"triton_gpu.global_scratch_memory_size");
403+
"ttg.global_scratch_memory_size");
404404
if (!allocSizeAttr) {
405405
return gmemBase;
406406
}

include/triton/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ template <typename T> class OperationPass;
1212

1313
namespace triton {
1414

15-
constexpr static char AttrNumWarpsName[] = "triton_gpu.num-warps";
16-
constexpr static char AttrNumCTAsName[] = "triton_gpu.num-ctas";
17-
constexpr static char AttrTargetName[] = "triton_gpu.target";
15+
constexpr static char AttrNumWarpsName[] = "ttg.num-warps";
16+
constexpr static char AttrNumCTAsName[] = "ttg.num-ctas";
17+
constexpr static char AttrTargetName[] = "ttg.target";
1818

19-
constexpr static char AttrNumThreadsPerWarp[] = "triton_gpu.threads-per-warp";
19+
constexpr static char AttrNumThreadsPerWarp[] = "ttg.threads-per-warp";
2020

2121
// Create the pass with numWarps passed from cl::opt.
2222
std::unique_ptr<OperationPass<ModuleOp>> createConvertTritonToTritonGPUPass();

include/triton/Dialect/TritonGPU/IR/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
22

33
set(LLVM_TARGET_DEFINITIONS TritonGPUOps.td)
4-
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=triton_gpu)
5-
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=triton_gpu)
4+
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=ttg)
5+
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=ttg)
66
mlir_tablegen(Ops.h.inc -gen-op-decls)
77
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
8-
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=triton_gpu)
9-
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=triton_gpu)
8+
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=ttg)
9+
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=ttg)
1010
add_mlir_doc(TritonGPUDialect TritonGPUDialect dialects/ -gen-dialect-doc)
1111
add_mlir_doc(TritonGPUOps TritonGPUOps dialects/ -gen-op-doc)
1212
add_public_tablegen_target(TritonGPUTableGen)

include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -616,7 +616,7 @@ Example 1, a row-major coalesced layout may partition a 16x16 tensor over 2 warp
616616

617617
for
618618

619-
#triton_gpu.blocked_layout<{
619+
#ttg.blocked_layout<{
620620
sizePerThread = {2, 2}
621621
threadsPerWarp = {8, 4}
622622
warpsPerCTA = {1, 2}
@@ -642,7 +642,7 @@ Example 2, a row-major coalesced layout may partition a 32x32 tensor over 2 warp
642642
[ 28 28 29 29 30 30 31 31 ; 60 60 61 61 62 62 63 63 28 28 29 29 30 30 31 31 ; 60 60 61 61 62 62 63 63 ]
643643
for
644644

645-
#triton_gpu.blocked_layout<{
645+
#ttg.blocked_layout<{
646646
sizePerThread = {2, 2}
647647
threadsPerWarp = {8, 4}
648648
warpsPerCTA = {1, 2}
@@ -672,7 +672,7 @@ CTA [1,0] CTA [1,1]
672672
[ 28 28 29 29 30 30 31 31 ; 60 60 61 61 62 62 63 63 ] [ 28 28 29 29 30 30 31 31 ; 60 60 61 61 62 62 63 63 ]
673673
for
674674

675-
#triton_gpu.blocked_layout<{
675+
#ttg.blocked_layout<{
676676
sizePerThread = {2, 2}
677677
threadsPerWarp = {8, 4}
678678
warpsPerCTA = {1, 2}

include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
include "mlir/IR/OpBase.td"
55

66
def TritonGPU_Dialect : Dialect {
7-
let name = "triton_gpu";
7+
let name = "ttg";
88

99
let cppNamespace = "::mlir::triton::gpu";
1010

@@ -21,24 +21,24 @@ def TritonGPU_Dialect : Dialect {
2121
];
2222

2323
let extraClassDeclaration = [{
24-
static std::string getNumWarpsAttrName() { return "triton_gpu.num-warps"; }
24+
static std::string getNumWarpsAttrName() { return "ttg.num-warps"; }
2525
static int getNumWarps(ModuleOp mod) {
26-
if (!mod->hasAttr("triton_gpu.num-warps"))
26+
if (!mod->hasAttr("ttg.num-warps"))
2727
llvm::report_fatal_error(
28-
"TritonGPU module should contain a triton_gpu.num-warps attribute");
29-
return cast<IntegerAttr>(mod->getAttr("triton_gpu.num-warps")).getInt();
28+
"TritonGPU module should contain a ttg.num-warps attribute");
29+
return cast<IntegerAttr>(mod->getAttr("ttg.num-warps")).getInt();
3030
}
3131
static int getNumCTAs(ModuleOp mod) {
32-
if (!mod->hasAttr("triton_gpu.num-ctas"))
32+
if (!mod->hasAttr("ttg.num-ctas"))
3333
return 1;
34-
return cast<IntegerAttr>(mod->getAttr("triton_gpu.num-ctas")).getInt();
34+
return cast<IntegerAttr>(mod->getAttr("ttg.num-ctas")).getInt();
3535
}
3636
void registerTypes();
3737

38-
static std::string getThreadsPerWarpAttrName() { return "triton_gpu.threads-per-warp"; }
38+
static std::string getThreadsPerWarpAttrName() { return "ttg.threads-per-warp"; }
3939

4040
static int getThreadsPerWarp(ModuleOp mod) {
41-
Attribute threadsPerWarp = mod->getDiscardableAttr("triton_gpu.threads-per-warp");
41+
Attribute threadsPerWarp = mod->getDiscardableAttr("ttg.threads-per-warp");
4242
if(!threadsPerWarp) {
4343
return 32;
4444
}

include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,13 @@ def TTG_LocalDeallocOp : TTG_Op<"local_dealloc", [MemoryEffects<[MemFree<SharedM
188188

189189
Because we assume a memdesc is dead at the first point that post-dominates
190190
its uses, ops that wait for an async operation on a memdesc to complete
191-
(such as triton_nvidia_gpu.warp_group_dot_wait) should also take the memdesc as an
191+
(such as ttng.warp_group_dot_wait) should also take the memdesc as an
192192
operand.
193193
}];
194194

195195
let arguments = (ins TTG_MemDescType:$src);
196196

197-
// Use qualified() otherwise "!triton_gpu.memdesc<X>" is printed as "<X>".
197+
// Use qualified() otherwise "!ttg.memdesc<X>" is printed as "<X>".
198198
let assemblyFormat = [{$src attr-dict `:` qualified(type($src))}];
199199
}
200200

@@ -215,7 +215,7 @@ def TTG_MemDescSubviewOp : TTG_Op<"memdesc_subview", [Pure]> {
215215
let arguments = (
216216
ins TTG_MemDescType:$src, Variadic<I32>:$offsets);
217217

218-
// Use qualified() otherwise "!triton_gpu.memdesc<X>" is printed as "<X>".
218+
// Use qualified() otherwise "!ttg.memdesc<X>" is printed as "<X>".
219219
let assemblyFormat = [{$src `[` $offsets `]` attr-dict `:` qualified(type($src)) `->` qualified(type($result))}];
220220

221221
let results = (outs TTG_MemDescType:$result);
@@ -262,7 +262,7 @@ def TTG_LocalLoadOp : TTG_Op<"local_load", [DeclareOpInterfaceMethods<MemoryEffe
262262
build($_builder, $_state, retType, src, /*token=*/static_cast<mlir::Value>(nullptr));
263263
}]>];
264264

265-
// Use qualified() otherwise "!triton_gpu.memdesc<X>" is printed as "<X>".
265+
// Use qualified() otherwise "!ttg.memdesc<X>" is printed as "<X>".
266266
let assemblyFormat = [{$src (`token` $token^)? attr-dict `:` qualified(type($src)) `->` type($result)}];
267267

268268
let results = (outs TT_Tensor:$result);
@@ -277,7 +277,7 @@ def TTG_LocalStoreOp : TTG_Op<"local_store", [DeclareOpInterfaceMethods<MemoryEf
277277
let arguments = (ins TT_Tensor:$src, TTG_MemDescType:$dst);
278278

279279
let hasVerifier = 1;
280-
// Use qualified() otherwise "!triton_gpu.memdesc<X>" is printed as "<X>".
280+
// Use qualified() otherwise "!ttg.memdesc<X>" is printed as "<X>".
281281
let assemblyFormat = [{
282282
$src `,` $dst attr-dict `:` type($src) `->` qualified(type($dst))
283283
}];

include/triton/Dialect/TritonNvidiaGPU/IR/CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
set(MLIR_BINARY_DIR ${CMAKE_BINARY_DIR})
22

33
set(LLVM_TARGET_DEFINITIONS TritonNvidiaGPUOps.td)
4-
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=triton_nvidia_gpu)
5-
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=triton_nvidia_gpu)
4+
mlir_tablegen(Dialect.h.inc -gen-dialect-decls -dialect=ttng)
5+
mlir_tablegen(Dialect.cpp.inc -gen-dialect-defs -dialect=ttng)
66
mlir_tablegen(Ops.h.inc -gen-op-decls)
77
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
8-
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=triton_nvidia_gpu)
9-
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=triton_nvidia_gpu)
8+
mlir_tablegen(Types.h.inc -gen-typedef-decls -typedefs-dialect=ttng)
9+
mlir_tablegen(Types.cpp.inc -gen-typedef-defs -typedefs-dialect=ttng)
1010
add_mlir_doc(TritonNvidiaGPUDialect TritonNvidiaGPUDialect dialects/ -gen-dialect-doc)
1111
add_mlir_doc(TritonNvidiaGPUOps TritonNvidiaGPUOps dialects/ -gen-op-doc)
1212
add_public_tablegen_target(TritonNvidiaGPUTableGen)

include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUDialect.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
include "mlir/IR/OpBase.td"
2626

2727
def TritonNvidiaGPU_Dialect : Dialect {
28-
let name = "triton_nvidia_gpu";
28+
let name = "ttng";
2929

3030
let cppNamespace = "::mlir::triton::nvidia_gpu";
3131

@@ -43,18 +43,18 @@ def TritonNvidiaGPU_Dialect : Dialect {
4343
];
4444

4545
let extraClassDeclaration = [{
46-
static std::string getNumWarpsAttrName() { return "triton_gpu.num-warps"; }
46+
static std::string getNumWarpsAttrName() { return "ttg.num-warps"; }
4747
static int getNumWarps(ModuleOp mod) {
48-
if(!mod->hasAttr("triton_gpu.num-warps"))
48+
if(!mod->hasAttr("ttg.num-warps"))
4949
llvm::report_fatal_error(
50-
"TritonGPU module should contain a triton_gpu.num-warps attribute");
51-
return cast<IntegerAttr>(mod->getAttr("triton_gpu.num-warps")).getInt();
50+
"TritonGPU module should contain a ttg.num-warps attribute");
51+
return cast<IntegerAttr>(mod->getAttr("ttg.num-warps")).getInt();
5252
}
5353
static int getNumCTAs(ModuleOp mod) {
54-
if(!mod->hasAttr("triton_gpu.num-ctas"))
54+
if(!mod->hasAttr("ttg.num-ctas"))
5555
llvm::report_fatal_error(
56-
"TritonGPU module should contain a triton_gpu.num-ctas attribute");
57-
return cast<IntegerAttr>(mod->getAttr("triton_gpu.num-ctas")).getInt();
56+
"TritonGPU module should contain a ttg.num-ctas attribute");
57+
return cast<IntegerAttr>(mod->getAttr("ttg.num-ctas")).getInt();
5858
}
5959
void registerTypes();
6060
}];

0 commit comments

Comments
 (0)