Skip to content

Commit fad8c9b

Browse files
Merge OpenAI Triton commit b3b9931 (#4878)
This PR change the Triton base from 6b70e71 to b3b9931 (Jul 29). Pass rate: 98.88%->98.83% (#4879)
2 parents 7e20e48 + 58b35fe commit fad8c9b

File tree

95 files changed

+2846
-1333
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+2846
-1333
lines changed

.github/workflows/integration-tests-amd.yml

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,25 @@ jobs:
1818
runner: ${{ fromJson(inputs.matrix) }}
1919
include:
2020
- image: rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
21+
runner: ["self-hosted", "gfx90a"]
22+
# Cache save/restore is on the host machine at directory /home/runner/.triton, while in the docker
23+
# container expect it at /github/home/.triton. So map here to make sure visible in docker.
24+
options: >-
25+
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
26+
--volume /home/runner/.triton:/github/home/.triton
27+
- image: rocm/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan
28+
runner: ["amd-gfx942"]
29+
# We add --env-file to pull in HIP_VISIBLE_DEVICES and ROCR_VISIBLE_DEVICES definition for GPU isolation.
30+
options: >-
31+
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
32+
--env-file /etc/podinfo/gha-gpu-isolation-settings
33+
--volume /home/runner/.triton:/github/home/.triton
2134
- image: rocm/7.0-preview:rocm7.0_preview_ubuntu22.04_llama2_70b_training_mlperf_mi35X_prealpha
2235
runner: ["amd-gfx950"]
36+
options: >-
37+
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
38+
--env-file /etc/podinfo/gha-gpu-isolation-settings
39+
--volume /home/runner/.triton:/github/home/.triton
2340
env:
2441
RUNNER_TYPE: ${{ matrix.runner[1] }}
2542
TRITON_BUILD_WITH_CCACHE: "true"
@@ -31,11 +48,7 @@ jobs:
3148
CCACHE_COMPRESS: "true"
3249
container:
3350
image: ${{ matrix.image }}
34-
# Cache save/restore is on the host machine at directory /home/runner/.triton, while in the docker
35-
# container expect it at /github/home/.triton. So map here to make sure visible in docker.
36-
options: >-
37-
--device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root
38-
--volume /home/runner/.triton:/github/home/.triton
51+
options: ${{ matrix.options }}
3952
steps:
4053
- name: Checkout
4154
uses: actions/checkout@v4
@@ -96,6 +109,8 @@ jobs:
96109
run: ccache --print-stats
97110
- name: Run lit tests
98111
run: make test-lit
112+
- name: Run C++ unittests
113+
run: make test-cpp
99114
- name: Run python tests on AMD
100115
run: |
101116
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation"
@@ -147,13 +162,13 @@ jobs:
147162
python3 -m pytest -s -n 8 ./test_cast_matmul.py
148163
- name: Run Proton tests
149164
run: |
165+
unset HIP_VISIBLE_DEVICES
166+
unset ROCR_VISIBLE_DEVICES
150167
if [ "${{ matrix.runner[0] }}" = "amd-gfx950" ]; then
151168
python3 -m pytest -s -n 8 third_party/proton/test -k "not test_instrument_exec"
152169
else
153170
make test-proton
154171
fi
155-
- name: Run C++ unittests
156-
run: make test-cpp
157172
- name: Inspect cache directories
158173
run: |
159174
mkdir -p ~/.triton
@@ -162,7 +177,8 @@ jobs:
162177
mkdir -p ~/.ccache
163178
du -h -d 1 ~/.ccache
164179
- name: Clean up caches
165-
# Always cleanup the worker, even if builds or tests failed
180+
# Always cleanup the worker, even if builds or tests failed given that these directories are
181+
# mapped from the host and we write files as the root user in the docker.
166182
if: always()
167183
run: |
168184
rm -rf ~/.triton/cache

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ PYTHON ?= python
66
BUILD_DIR := $(shell cd python; $(PYTHON) -c 'from build_helpers import get_cmake_dir; print(get_cmake_dir())')
77
TRITON_OPT := $(BUILD_DIR)/bin/triton-opt
88
PYTEST := $(PYTHON) -m pytest
9-
LLVM_BUILD_PATH ?= $(realpath .llvm-project/build)
9+
LLVM_BUILD_PATH ?= "$(shell dirname $(realpath $(lastword $(MAKEFILE_LIST))))/.llvm-project/build"
1010
NUM_PROCS ?= 8
1111

1212
# Incremental builds

include/triton/Analysis/Utility.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,6 @@ bool cvtNeedsWarpShuffle(RankedTensorType srcTy, RankedTensorType dstTy);
252252
// warps, and possibly blocks.
253253
bool cvtNeedsSharedMemory(RankedTensorType srcTy, RankedTensorType dstTy);
254254

255-
bool atomicNeedsSharedMemory(Value result);
256-
257-
// Check if MFMA layout can be converted to the dot operand
258-
// layout using warp shuffle.
259-
bool matchMFMAAndDotOperandShuffleCase(RankedTensorType srcTy,
260-
RankedTensorType dstTy);
261-
262255
// TODO: Move utility functions that belong to ConvertLayoutOp to class
263256
// ConvertLayoutOpHelper in the future
264257
bool shouldUseDistSmem(Attribute srcLayout, Attribute dstLayout);

include/triton/Conversion/TritonGPUToLLVM/Utility.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,13 @@ SmallVector<Value> inlineRegion(RewriterBase &rewriter, Region &region,
655655
mlir::TypeID::get<TerminatorOp>(), loc);
656656
}
657657

658+
void finalizeTensorAtomicResults(Operation *op, RankedTensorType tensorTy,
659+
ConversionPatternRewriter &rewriter,
660+
SmallVector<Value> &resultVals,
661+
Type valueElemTy, TritonLLVMOpBuilder &b,
662+
Value threadPred,
663+
const TargetInfoBase &targetInfo,
664+
const LLVMTypeConverter *typeConverter);
658665
} // namespace mlir
659666

660667
#endif

include/triton/Dialect/Triton/IR/TritonOps.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1275,7 +1275,7 @@ def ReturnOp : TT_Op<"return", [Pure, HasParent<"FuncOp">, /*MemRefsNormalizable
12751275
let arguments = (ins Variadic<AnyType>:$srcs);
12761276

12771277
let builders = [OpBuilder<(ins), [{
1278-
build($_builder, $_state, std::nullopt);
1278+
build($_builder, $_state, mlir::ValueRange());
12791279
}]>];
12801280

12811281
let assemblyFormat = "attr-dict ($srcs^ `:` type($srcs))?";

include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,6 @@ def TTG_MemDescIndexOp : TTG_Op<"memdesc_index", [Pure, MemDescViewTrait]> {
214214
- the output shape is 4x16xf16, and
215215
- index = 1.
216216
Then the output descriptor is equivalent to input[1], where input is the logical tensor.
217-
218-
When the input is of rank 1 (i.e, shape=[k]), the output will have shape=[1].
219217
}];
220218

221219
let arguments = (ins TTG_MemDescType:$src, I32:$index);

include/triton/Dialect/TritonInstrument/IR/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ add_mlir_doc(TritonInstrumentDialect TritonInstrumentDialect dialects/ -gen-dial
88
set(LLVM_TARGET_DEFINITIONS TritonInstrumentOps.td)
99
mlir_tablegen(Ops.h.inc -gen-op-decls)
1010
mlir_tablegen(Ops.cpp.inc -gen-op-defs)
11+
mlir_tablegen(OpsEnums.h.inc -gen-enum-decls)
12+
mlir_tablegen(OpsEnums.cpp.inc -gen-enum-defs)
1113
add_mlir_doc(TritonInstrumentOps TritonInstrumentOps dialects/ -gen-op-doc)
1214

1315
add_public_tablegen_target(TritonInstrumentTableGen)

include/triton/Dialect/TritonInstrument/IR/Dialect.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include "triton/Dialect/Triton/IR/Dialect.h"
66
#include "triton/Dialect/TritonGPU/IR/Dialect.h"
77

8+
#include "triton/Dialect/TritonInstrument/IR/OpsEnums.h.inc"
9+
810
#define GET_OP_CLASSES
911
#include "triton/Dialect/TritonInstrument/IR/Dialect.h.inc"
1012
#include "triton/Dialect/TritonInstrument/IR/Ops.h.inc"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#ifndef TRITONINSTRUMENT_ATTR_DEFS
2+
#define TRITONINSTRUMENT_ATTR_DEFS
3+
4+
include "mlir/IR/EnumAttr.td"
5+
6+
def TT_MemTypeAttr : I32EnumAttr<
7+
"MemType", "",
8+
[
9+
I32EnumAttrCase<"SHARED", 0, "shared">,
10+
I32EnumAttrCase<"TENSOR", 1, "tensor">,
11+
]> {
12+
let cppNamespace = "::mlir::triton::instrument";
13+
}
14+
15+
#endif // TRITONINSTRUMENT_ATTR_DEFS

0 commit comments

Comments
 (0)