Skip to content

Commit 287802f

Browse files
Merge OpenAI Triton commit 2fafd63 (#5026)
This PR change the Triton base from 83fbc0e to 2fafd63 (Aug 20). Pass rate: 98.85%->98.84%
2 parents 37866dc + faa26cd commit 287802f

File tree

43 files changed

+248
-492
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+248
-492
lines changed

bin/CMakeLists.txt

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
2-
get_property(conversion_libs GLOBAL PROPERTY MLIR_CONVERSION_LIBS)
31
get_property(triton_libs GLOBAL PROPERTY TRITON_LIBS)
42

53
add_llvm_executable(triton-opt triton-opt.cpp PARTIAL_SOURCES_INTENDED)
@@ -8,8 +6,6 @@ add_llvm_executable(triton-opt triton-opt.cpp PARTIAL_SOURCES_INTENDED)
86
llvm_update_compile_flags(triton-opt)
97
target_link_libraries(triton-opt PRIVATE
108
TritonIntelLLVMIR
11-
${dialect_libs}
12-
${conversion_libs}
139
${triton_libs}
1410
# tests
1511
TritonTestAnalysis
@@ -19,6 +15,8 @@ target_link_libraries(triton-opt PRIVATE
1915
# MLIR core
2016
MLIROptLib
2117
MLIRPass
18+
MLIRRegisterAllDialects
19+
MLIRRegisterAllPasses
2220
MLIRTransforms
2321
)
2422

@@ -29,8 +27,6 @@ mlir_check_all_link_libraries(triton-reduce)
2927

3028
llvm_update_compile_flags(triton-reduce)
3129
target_link_libraries(triton-reduce PRIVATE
32-
${dialect_libs}
33-
${conversion_libs}
3430
${triton_libs}
3531
# tests
3632
TritonTestAnalysis
@@ -40,6 +36,8 @@ target_link_libraries(triton-reduce PRIVATE
4036
# MLIR core
4137
MLIRReduceLib
4238
MLIRPass
39+
MLIRRegisterAllDialects
40+
MLIRRegisterAllPasses
4341
MLIRTransforms
4442
)
4543

@@ -49,8 +47,6 @@ add_llvm_executable(triton-lsp triton-lsp.cpp PARTIAL_SOURCES_INTENDED)
4947

5048
llvm_update_compile_flags(triton-lsp)
5149
target_link_libraries(triton-lsp PRIVATE
52-
${dialect_libs}
53-
${conversion_libs}
5450
${triton_libs}
5551
# tests
5652
TritonTestAnalysis
@@ -60,6 +56,8 @@ target_link_libraries(triton-lsp PRIVATE
6056
# MLIR core
6157
MLIRLspServerLib
6258
MLIRPass
59+
MLIRRegisterAllDialects
60+
MLIRRegisterAllPasses
6361
MLIRTransforms
6462
)
6563

@@ -91,10 +89,11 @@ export_executable_symbols_for_plugins(triton-llvm-opt)
9189
add_llvm_executable(triton-tensor-layout triton-tensor-layout.cpp PARTIAL_SOURCES_INTENDED)
9290
target_link_libraries(triton-tensor-layout PRIVATE
9391
${triton_libs}
94-
${conversion_libs}
95-
${dialect_libs}
9692
TritonTestAnalysis
9793
TritonTestDialect
9894
TritonTestProton
9995
TritonAMDGPUTestAnalysis
96+
MLIRRegisterAllDialects
97+
MLIRRegisterAllPasses
98+
MLIRTransforms
10099
)

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
570885128351868c1308bb22e8ca351d318bc4a1
1+
bc773632355b3cebde350b0341624e88be40b744

include/triton/Dialect/Triton/Transforms/Passes.td

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -90,15 +90,4 @@ def TritonLoopAwareCSE : Pass<"triton-loop-aware-cse", "mlir::ModuleOp"> {
9090
}];
9191
}
9292

93-
def TritonSCFToCF : Pass</*cli-arg*/"triton-scf-to-cf", /*Op*/"mlir::ModuleOp"> {
94-
let summary = "MLIR's SCF To CF plus some extra attributes propagation.";
95-
let description = [{
96-
This pass uses MLIR's SCF To CF pass as base. Additionally, it propagates
97-
some extra attributes to the converted CFG.
98-
TODO: upstream the llvm loop attribute propagation and remove this pass.
99-
}];
100-
101-
let dependentDialects = [];
102-
}
103-
10493
#endif

include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,6 @@ def TTG_AsyncCopyGlobalToLocalOp : TTG_Op<"async_copy_global_to_local", [
100100
DefaultValuedAttr<BoolAttr, "false">:$isVolatile
101101
);
102102

103-
let builders = [
104-
OpBuilder<(ins "Value":$src, "Value":$result,
105-
"triton::CacheModifier":$cache,
106-
"triton::EvictionPolicy":$evict, "bool":$isVolatile)>,
107-
];
108-
109103
let results = (outs TTG_AsyncToken:$token);
110104

111105
let extraClassDeclaration = [{
@@ -395,9 +389,6 @@ def TTG_MaskOp: TTG_Op<"mask",
395389
let arguments = (ins I1:$pred);
396390
let results = (outs Variadic<AnyType>:$result);
397391
let regions = (region SizedRegion<1>:$region);
398-
let builders = [
399-
OpBuilder<(ins "Value":$pred)>,
400-
];
401392
}
402393

403394
def TTG_MaskReturnOp: TTG_Op<"mask.return",

include/triton/Dialect/TritonInstrument/IR/TritonInstrumentOps.td

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,25 @@ include "mlir/IR/OpBase.td"
88
include "mlir/Interfaces/SideEffectInterfaces.td"
99
include "triton/Dialect/TritonInstrument/IR/TritonInstrumentAttrDefs.td"
1010

11+
// Concurrency Sanitizer data structures:
12+
// ConSan keeps auxilary data requied for tracking memory accesses in tensors.
13+
// These tensors are stored as a distributed tensor or in global scratch memory.
14+
//
15+
// Tensor name | Storage | Type | Description
16+
// ------------- | ------- | -------- | -----------
17+
// buffers | tensor | <Bxi64> | List of base pointers of all the buffers and sub-buffers in the program
18+
// barriers | tensor | <Mxi64> | List of pointers to all individual mbarriers in the program
19+
// writeState | scratch | <Mxi8> | Marks which buffers are being written to.
20+
// | | | Entries in this tensor are set when write operation is issued. Entries are bitfields, where:
21+
// | | | - bit 0: 1 if the buffer is being written to
22+
// | | | - bit 1: 1 if the write is *not* hwPipelined
23+
// writeBars | scratch | <BxMxi8> | Which barriers track writes to which buffers.
24+
// | | | Entries in this tensor are set when commit with barrier is called.
25+
// readBars | scratch | <BxMxi8> | Which barriers track reads from which buffers.
26+
// | | | Entries in this tensor are set when read operation with barrier is issued.
27+
// asyncCpCommits | scratch | <Bxi8> | Tracks number of outstanding commits for buffers written with cp-async.
28+
// wgmmaCommits | scratch | <Bxi8> | Tracks number of outstanding commits for buffers being read by wgmma.
29+
1130
//
1231
// Interfaces
1332
//
@@ -52,10 +71,6 @@ def TTI_ExperimentalCheckWriteStateOp : TTI_Op<"experimental_check_write_state",
5271
let description = [{
5372
Check if the writeState tensor has non-zero value associated with the buffer.
5473

55-
`writeState` is a tensor of 8b bitfields, where:
56-
- bit 0: 1 if the buffer is being written to
57-
- bit 1: 1 if the write is *not* hwPipelined
58-
5974
If hwPipelined is true, shift the bitfield by 1 to check the second bit - this
6075
means that the error won't be triggered if another pipelined write is outstanding.
6176
}];
@@ -79,7 +94,7 @@ def TTI_ExperimentalCheckWriteStateOp : TTI_Op<"experimental_check_write_state",
7994
def TTI_ExperimentalCheckReadBarriersOp : TTI_Op<"experimental_check_read_barriers", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
8095
let summary = "check if there are outstanding reads from a buffer guarded by a mbar";
8196
let description = [{
82-
Check if there are outstanding reads from a buffer guarded by a mbar.
97+
Check if any of the entries in readBars in the row corresponding to the buffer is non-zero.
8398
}];
8499
let arguments = (ins
85100
TTG_MemDescType:$buf,
@@ -100,11 +115,7 @@ def TTI_ExperimentalSetWriteStateOp : TTI_Op<"experimental_set_write_state", [Me
100115
let description = [{
101116
Mark a buffer as being written to. It is not yet tracked by a barrier, until
102117
`commit_write_with_barrier` is called, at which point all the buffers being written
103-
to are marked as tracked by the barrier.
104-
105-
`writeState` is a tensor of 8b bitfields, where:
106-
- bit 0: 1 if the buffer is being written to
107-
- bit 1: 1 if the write is *not* hwPipelined
118+
to are marked as tracked by the barrier in writeBars tensor.
108119

109120
If hwPipelined is true, the write won't trigger an error if another pipelined
110121
write is executed later without waiting for the barrier.
@@ -149,7 +160,7 @@ def TTI_ExperimentalCommitWriteWithBarrierOp : TTI_Op<"experimental_commit_write
149160
def TTI_ExperimentalSetReadBarrierOp : TTI_Op<"experimental_set_read_barrier", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
150161
let summary = "mark a buffer as being read from using mbar as a guard";
151162
let description = [{
152-
Mark a buffer as being read from using mbar as a guard.
163+
Set the entry under [buffer, mbar] in readBars tensor to 1, marking the buffer as tracked by the barrier.
153164
}];
154165
let arguments = (ins
155166
TTG_MemDescType:$buf,
@@ -170,7 +181,8 @@ def TTI_ExperimentalSetReadBarrierOp : TTI_Op<"experimental_set_read_barrier", [
170181
def TTI_ExperimentalClearWriteBarrierOp : TTI_Op<"experimental_clear_write_barrier", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
171182
let summary = "clear the write state for buffers being guarded by an mbar";
172183
let description = [{
173-
Clear the write state for buffers being guarded by an mbar.
184+
For each buffer that has [buffer, mbar] entry in writeBars tensor, set the corresponding entry in writeState tensor to 0.
185+
Also, set the corresponding entry in writeBars tensor to 0.
174186
}];
175187
let arguments = (ins
176188
TTG_MemDescType:$mbar,
@@ -191,7 +203,7 @@ def TTI_ExperimentalClearWriteBarrierOp : TTI_Op<"experimental_clear_write_barri
191203
def TTI_ExperimentalClearReadBarrierOp : TTI_Op<"experimental_clear_read_barrier", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
192204
let summary = "clear the read state for buffers being guarded by an mbar";
193205
let description = [{
194-
Clear the read state for buffers being guarded by an mbar.
206+
Set all the entries in the column corresponding to the mbar in readBars tensor to 0.
195207
}];
196208
let arguments = (ins
197209
TTG_MemDescType:$mbar,
@@ -210,7 +222,7 @@ def TTI_ExperimentalClearReadBarrierOp : TTI_Op<"experimental_clear_read_barrier
210222
def TTI_ExperimentalCheckBarrierWritesClearedOp : TTI_Op<"experimental_check_barrier_writes_cleared", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
211223
let summary = "verify that the barrier is not used to track any writes";
212224
let description = [{
213-
Verify that the barrier is not used to track any writes.
225+
Verify that the column corresponding to the mbar in writeBars tensor is all 0.
214226
}];
215227
let arguments = (ins
216228
TTG_MemDescType:$mbar,
@@ -248,7 +260,8 @@ def TTI_ExperimentalStageAccessForCommitOp : TTI_Op<"experimental_stage_access_f
248260
def TTI_ExperimentalCommitAccessesOp : TTI_Op<"experimental_commit_accesses", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
249261
let summary = "Commit all the staged accesses for all the buffers.";
250262
let description = [{
251-
Commit all the staged accesses for all the buffers.
263+
Increment the value in outstandingCommits tensor for each entry greater than 0.
264+
Change all the `-1` entries in outstandingCommits tensor to 1, signifying 1 outstanding commit.
252265
}];
253266
let arguments = (ins
254267
TT_PtrLike:$outstandingCommits,
@@ -277,7 +290,7 @@ def TTI_ExperimentalClearOutstandingCommitsOp : TTI_Op<"experimental_clear_outst
277290
def TTI_ExperimentalCheckOutstandingCommitsOp : TTI_Op<"experimental_check_outstanding_commits", [MemoryEffects<[MemWrite<GlobalMemory>]>]> {
278291
let summary = "Check if the buffer has an outstanding commit.";
279292
let description = [{
280-
Check if the buffer has an outstanding commit.
293+
Verify that the entry corresponding to the buffer in outstandingCommits tensor is 0.
281294
}];
282295
let arguments = (ins
283296
TTG_MemDescType:$buf,

lib/Dialect/Triton/Transforms/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ add_triton_library(TritonTransforms
1313
RewriteTensorDescriptorToPointer.cpp
1414
ArithTypeConversion.cpp
1515
FunctionTypeConversion.cpp
16-
SCFToCF.cpp
1716

1817
DEPENDS
1918
TritonTransformsIncGen

lib/Dialect/Triton/Transforms/RewriteTensorDescriptorToPointer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ Value generateMaskFromOffsetRanges(OpBuilder &builder, const Location &loc,
166166

167167
// Compare with lower bound
168168
Value lowerBound = builder.create<mlir::arith::ConstantIntOp>(
169-
loc, 0, builder.getI64Type());
169+
loc, builder.getI64Type(), 0);
170170
Value splatLowerBound = builder.create<triton::SplatOp>(
171171
loc, offsetWithRange.getType(), lowerBound);
172172
Value cmpLower = builder.create<arith::CmpIOp>(

lib/Dialect/Triton/Transforms/RewriteTensorPointer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ struct RewritedInfo {
135135

136136
// Compare with lower bound
137137
Value lowerBound = builder.create<mlir::arith::ConstantIntOp>(
138-
loc, 0, builder.getI64Type());
138+
loc, builder.getI64Type(), 0);
139139
Value splatLowerBound = builder.create<triton::SplatOp>(
140140
loc, offsetWithRange.getType(), lowerBound);
141141
Value cmpLower = builder.create<arith::CmpIOp>(

0 commit comments

Comments
 (0)