Skip to content

Commit 6342021

Browse files
committed
Merge commit '815b2a46f633cb98459874c75bb74e79051a0f78'
2 parents a0ce2d9 + 815b2a4 commit 6342021

File tree

22 files changed

+731
-250
lines changed

22 files changed

+731
-250
lines changed

.github/workflows/integration-tests-amd.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ jobs:
3131
CCACHE_COMPRESS: "true"
3232
container:
3333
image: ${{ matrix.image }}
34-
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
34+
# Cache save/restore is on the host machine at directory /home/runner/.triton, while in the docker
35+
# container expect it at /github/home/.triton. So map here to make sure visible in docker.
36+
options: >-
37+
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
38+
--volume /home/runner/.triton:/github/home/.triton
3539
steps:
3640
- name: Checkout
3741
uses: actions/checkout@v4
@@ -54,7 +58,6 @@ jobs:
5458
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT
5559
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT
5660
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT
57-
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT
5861
shell: bash
5962
- name: Cache build dependencies
6063
uses: actions/cache@v4
@@ -162,5 +165,5 @@ jobs:
162165
# Always cleanup the worker, even if builds or tests failed
163166
if: always()
164167
run: |
165-
rm -rf ~/.triton
168+
rm -rf ~/.triton/cache
166169
rm -rf ~/.ccache

include/triton/Dialect/TritonGPU/Transforms/Passes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define TRITON_DIALECT_TRITONGPU_TRANSFORMS_PASSES_H_
33

44
#include "mlir/Pass/Pass.h"
5+
#include "nvidia/include/Dialect/NVWS/IR/Dialect.h"
56
#include "triton/Dialect/TritonNvidiaGPU/IR/Dialect.h"
67

78
namespace mlir {

include/triton/Dialect/TritonGPU/Transforms/Passes.td

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ def TritonGPUAutomaticWarpSpecialization : Pass<"tritongpu-automatic-warp-specia
106106
"mlir::triton::gpu::TritonGPUDialect",
107107
"mlir::scf::SCFDialect",
108108
"mlir::arith::ArithDialect",
109-
"mlir::triton::nvidia_gpu::TritonNvidiaGPUDialect"
109+
"mlir::triton::nvidia_gpu::TritonNvidiaGPUDialect",
110+
"triton::nvws::NVWSDialect"
110111
];
111112

112113
let options = [
@@ -143,7 +144,10 @@ def TritonGPUPartitionLoops : Pass<"tritongpu-partition-loops", "mlir::ModuleOp"
143144
between any of the partitions.
144145
}];
145146

146-
let dependentDialects = ["mlir::triton::gpu::TritonGPUDialect"];
147+
let dependentDialects = [
148+
"mlir::triton::gpu::TritonGPUDialect",
149+
"triton::nvws::NVWSDialect"
150+
];
147151
}
148152

149153
def TritonGPUOptimizePartitionWarps : Pass<"tritongpu-optimize-partition-warps", "mlir::ModuleOp"> {

lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,8 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
278278
assert(cvtNeedsSharedMemory(op.getSrc().getType(), op.getType()));
279279

280280
// Try to use swizzling to implement the conversion
281-
// HACK Remove once AMD tests pass for the swizzling path
282-
if (targetInfo.isCuda() && succeeded(transferWithinBlockSwizzling(
283-
op, adaptor.getSrc(), rewriter))) {
281+
if (succeeded(
282+
transferWithinBlockSwizzling(op, adaptor.getSrc(), rewriter))) {
284283
return success();
285284
}
286285

lib/Dialect/Gluon/IR/Dialect.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#include "triton/Dialect/Gluon/IR/Dialect.h"
22

3-
#include "mlir/IR/DialectImplementation.h"
43
#include "mlir/Support/LLVM.h"
4+
#include "triton/Dialect/Triton/IR/Interfaces.h"
55
#include "llvm/ADT/TypeSwitch.h"
66

77
using namespace mlir;
@@ -111,6 +111,7 @@ void GluonDialect::initialize() {
111111
#define GET_OP_LIST
112112
#include "triton/Dialect/Gluon/IR/Ops.cpp.inc"
113113
>();
114+
addInterfaces<TritonInlinerInterface>();
114115
addInterfaces<GluonInferLayoutInterface>();
115116
}
116117

lib/Dialect/Gluon/Transforms/ResolveAutoEncodings.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,9 @@ LogicalResult inferAutoLayouts(FuncOp func) {
6666
return success();
6767
};
6868

69-
// 1. Set seed values from layout conversions
69+
// 1. Set seed values from set_auto_layout ops
7070
auto res = func.walk([&](gluon::SetAutoLayoutOp op) -> WalkResult {
71-
auto res = updateEncoding({op.getSrc()}, op.getType().getEncoding());
72-
op.getResult().replaceAllUsesWith(op.getSrc());
73-
op->erase();
74-
return res;
71+
return updateEncoding({op.getSrc()}, op.getType().getEncoding());
7572
});
7673

7774
if (res.wasInterrupted())
@@ -153,6 +150,13 @@ LogicalResult inferAutoLayouts(FuncOp func) {
153150
}
154151
}
155152

153+
// 4. Cleanup set_auto_layout ops
154+
func.walk([&](gluon::SetAutoLayoutOp op) {
155+
assert(op.getSrc().getType() == op.getType());
156+
op.getResult().replaceAllUsesWith(op.getSrc());
157+
op->erase();
158+
});
159+
156160
return success();
157161
}
158162

lib/Dialect/TritonGPU/Transforms/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ add_triton_library(TritonGPUTransforms
4646
TritonTransforms
4747
TritonGPUIR
4848
TritonNvidiaGPUIR
49+
NVWSIR
50+
NVWSTransforms
4951
TritonToTritonGPU
5052
TritonInstrumentIR
5153
MLIRTransformUtils

0 commit comments

Comments
 (0)