Skip to content

Commit d457677

Browse files
authored
[Warp Specialization] Separate partitioning into a separate pass (#6876)
1 parent 4593bcd commit d457677

File tree

7 files changed

+541
-483
lines changed

7 files changed

+541
-483
lines changed

include/triton/Dialect/TritonGPU/Transforms/Passes.td

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,16 @@ def TritonGPUOptimizePartitionWarps : Pass<"tritongpu-optimize-partition-warps",
165165
}];
166166
}
167167

168+
def TritonGPUPartitionScheduling : Pass<"tritongpu-partition-scheduling", "mlir::ModuleOp"> {
169+
let summary = "warp specialization partitioning pass";
170+
171+
let description = [{
172+
The `tritongpu-partition-scheduling` analyzes the loads, MMAs, and other
173+
operations in a loop that is meant to be warp specialized and determines
174+
which partitions to assign to each operation.
175+
}];
176+
}
177+
168178
def TritonGPULoadMMASpecialization : Pass<"tritongpu-load-mma-specialization", "mlir::ModuleOp"> {
169179
let summary = "load MMA specialization";
170180

lib/Dialect/TritonGPU/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ add_triton_library(TritonGPUTransforms
3434
WarpSpecialization/Partition.cpp
3535
WarpSpecialization/OptimizePartitionWarps.cpp
3636
WarpSpecialization/PartitionLoops.cpp
37+
WarpSpecialization/PartitionScheduling.cpp
3738
WarpSpecialization/RewritePartitionDependencies.cpp
3839

3940
DEPENDS

lib/Dialect/TritonGPU/Transforms/WarpSpecialization/AutomaticWarpSpecialization.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct AutomaticWarpSpecialization
3333

3434
void AutomaticWarpSpecialization::runOnOperation() {
3535
OpPassManager pm;
36+
pm.addPass(createTritonGPUPartitionScheduling());
3637
pm.addPass(createTritonGPULoadMMASpecialization({numStages}));
3738
pm.addPass(createTritonGPURewritePartitionDependencies());
3839
// `int-range-optimizations` and SCCP are good at cleaning up loop arithmetic.

0 commit comments

Comments
 (0)