Skip to content

Commit 3b3a787

Browse files
Merge OpenAI Triton commit 3a93d6f (#5266)
This PR change the Triton base from bea27e3 to 3a93d6f (Oct 1). Pass rate: 94.2%
2 parents efbbb06 + c2fcf65 commit 3b3a787

File tree

17 files changed

+41
-50
lines changed

17 files changed

+41
-50
lines changed

.github/workflows/llvm-build.yml

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ jobs:
106106
-DLLVM_BUILD_UTILS=ON
107107
-DLLVM_BUILD_TOOLS=ON
108108
-DLLVM_ENABLE_ASSERTIONS=ON
109-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
109+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF
110110
-DLLVM_ENABLE_PROJECTS="mlir;lld"
111111
-DLLVM_INSTALL_UTILS=ON
112112
-DLLVM_TARGETS_TO_BUILD="host;NVPTX;AMDGPU"
@@ -130,7 +130,7 @@ jobs:
130130
-DLLVM_BUILD_UTILS=ON
131131
-DLLVM_BUILD_TOOLS=ON
132132
-DLLVM_ENABLE_ASSERTIONS=ON
133-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
133+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF
134134
-DLLVM_ENABLE_PROJECTS="mlir;llvm;lld"
135135
-DLLVM_ENABLE_DIA_SDK=OFF
136136
-DLLVM_INSTALL_UTILS=ON
@@ -179,7 +179,7 @@ jobs:
179179
-DCLANG_TABLEGEN=$HOST_TOOLS/clang-tblgen \
180180
-DLLVM_ENABLE_ASSERTIONS=ON \
181181
-DCMAKE_LINKER=$LINKER \
182-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
182+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
183183
-DLLVM_ENABLE_ZSTD=OFF \
184184
-DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF \
185185
-DLLVM_INSTALL_UTILS=ON \
@@ -202,12 +202,6 @@ jobs:
202202
-DLLVM_ENABLE_TERMINFO=OFF \
203203
llvm-project/llvm
204204
ninja -C llvm-project/build install
205-
CURR_PWD="$(pwd)"
206-
cd "${{ env.llvm_install_dir }}/python_packages/mlir_core/mlir/_mlir_libs/"
207-
for file in *x86_64*; do
208-
mv "$file" "${file/x86_64/aarch64}"
209-
done
210-
cd $CURR_PWD
211205
tar czf "${{ env.llvm_install_dir }}.tar.gz" "${{ env.llvm_install_dir }}"
212206
213207
- name: Configure, Build, and Install LLVM (macOS arm64)
@@ -225,7 +219,7 @@ jobs:
225219
-DLLVM_BUILD_UTILS=ON
226220
-DLLVM_BUILD_TOOLS=ON
227221
-DLLVM_ENABLE_ASSERTIONS=ON
228-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON
222+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF
229223
-DLLVM_ENABLE_PROJECTS="mlir;lld"
230224
-DLLVM_ENABLE_ZSTD=OFF
231225
-DLLVM_INSTALL_UTILS=ON

.github/workflows/llvm-build/almalinux.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ RUN cmake -GNinja -Bbuild \
2929
-DLLVM_BUILD_UTILS=ON \
3030
-DLLVM_BUILD_TOOLS=ON \
3131
-DLLVM_ENABLE_ASSERTIONS=ON \
32-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
32+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
3333
-DLLVM_ENABLE_PROJECTS="mlir;lld" \
3434
-DLLVM_ENABLE_TERMINFO=OFF \
3535
-DLLVM_INSTALL_UTILS=ON \

.github/workflows/llvm-build/centos.Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ RUN cmake -GNinja -Bbuild \
4646
-DLLVM_BUILD_UTILS=ON \
4747
-DLLVM_BUILD_TOOLS=ON \
4848
-DLLVM_ENABLE_ASSERTIONS=ON \
49-
-DMLIR_ENABLE_BINDINGS_PYTHON=ON \
49+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF \
5050
-DLLVM_ENABLE_PROJECTS="mlir;lld" \
5151
-DLLVM_ENABLE_TERMINFO=OFF \
5252
-DLLVM_INSTALL_UTILS=ON \

cmake/llvm-hash.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
064f02dac0c81c19350a74415b3245f42fed09dc
1+
f6ded0be897e2878612dd903f7e8bb85448269e5

scripts/build-llvm-project.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ if [ -z "$CMAKE_ARGS" ]; then
2222
-DCMAKE_CXX_COMPILER=clang++
2323
-DLLVM_ENABLE_LLD=ON
2424
-DLLVM_OPTIMIZED_TABLEGEN=ON
25+
-DMLIR_ENABLE_BINDINGS_PYTHON=OFF
2526
-DLLVM_TARGETS_TO_BUILD="$LLVM_TARGETS"
2627
-DCMAKE_EXPORT_COMPILE_COMMANDS=1
2728
-DLLVM_ENABLE_PROJECTS="$LLVM_PROJECTS"

test/Conversion/amd/wmma-v2-shortcut.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: triton-opt %s --tritongpu-reduce-data-duplication --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch="gfx1200" -split-input-file | FileCheck %s
1+
// RUN: triton-opt %s --tritongpu-reduce-data-duplication --allocate-shared-memory --convert-triton-amdgpu-to-llvm=arch="gfx1200" -reconcile-unrealized-casts -split-input-file | FileCheck %s
22

33
#wmmaTv2 = #ttg.amd_wmma<{version = 2, warpsPerCTA = [1, 1], isTranspose = true}>
44
#dotop0v2 = #ttg.dot_op<{opIdx = 0, parent = #wmmaTv2, kWidth=8}>

test/Conversion/tritongpu_to_llvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: triton-opt %s -split-input-file --allocate-shared-memory-nv --convert-triton-gpu-to-llvm 2>/dev/null | FileCheck %s --dump-input-context 20
1+
// RUN: triton-opt %s -split-input-file --allocate-shared-memory-nv --convert-triton-gpu-to-llvm -reconcile-unrealized-casts 2>/dev/null | FileCheck %s --dump-input-context 20
22

33
module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
44
// CHECK: llvm.func @test_empty_kernel(%arg0: i32, %arg1: !llvm.ptr<1>, %arg2: !llvm.ptr<1>, %arg3: !llvm.ptr<1>)

test/Conversion/tritonnvidiagpu_to_llvm.mlir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: triton-opt %s -split-input-file --convert-triton-gpu-to-llvm=compute-capability=90 | FileCheck %s
1+
// RUN: triton-opt %s -split-input-file --convert-triton-gpu-to-llvm=compute-capability=90 -reconcile-unrealized-casts | FileCheck %s
22

33
#shared0 = #ttg.swizzled_shared<{vec = 1, perPhase = 1, maxPhase = 1, order = [0], CTAsPerCGA = [1], CTASplitNum = [1], CTAOrder = [0]}>
44
#smem = #ttg.shared_memory

test/LLVMIR/convert-to-llvmir-with-dbg-info.mlir

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
2929
%arg2: !llvm.ptr<1>, %arg3: i32, %arg4: !llvm.ptr<1>) {
3030
%constant_i32 = llvm.mlir.constant(9 : i32) : i32
3131
%constant_i16 = llvm.mlir.constant(0 : i16) : i16
32+
%constant_i64 = llvm.mlir.constant(9 : i64) : i64
3233

3334
// CHECK: !DILocalVariable(name: "pid", scope:
3435
%pid = rocdl.workgroup.id.x : i32 loc(#loc14)
@@ -49,14 +50,14 @@ module attributes {"ttg.num-ctas" = 1 : i32, "ttg.num-warps" = 4 : i32} {
4950

5051
// CHECK: !DILocalVariable(name: "x", scope:
5152
%x_ptr = llvm.getelementptr %arg0[%block_start] : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, f32
52-
%x_buffer_ptr = rocdl.make.buffer.rsrc %x_ptr, %constant_i16, %constant_i32, %constant_i32 : <1> to <8> loc(#loc18)
53+
%x_buffer_ptr = rocdl.make.buffer.rsrc %x_ptr, %constant_i16, %constant_i64, %constant_i32 : <1> to <8> loc(#loc18)
5354
llvm.intr.dbg.value #di_local_variable4 = %x_buffer_ptr : !llvm.ptr<8> loc(#loc8)
5455
%x_val = rocdl.raw.ptr.buffer.load %x_buffer_ptr, %mask_i1, %constant_i32, %constant_i32 : vector<4xf32> loc(#loc18)
5556
%x_scalar = llvm.extractelement %x_val[%constant_i32 : i32] : vector<4xf32> loc(#loc18)
5657

5758
// CHECK: !DILocalVariable(name: "y", scope:
5859
%y_ptr = llvm.getelementptr %arg1[%block_start] : (!llvm.ptr<1>, i32) -> !llvm.ptr<1>, f32
59-
%y_buffer_ptr = rocdl.make.buffer.rsrc %y_ptr, %constant_i16, %constant_i32, %constant_i32 : <1> to <8> loc(#loc19)
60+
%y_buffer_ptr = rocdl.make.buffer.rsrc %y_ptr, %constant_i16, %constant_i64, %constant_i32 : <1> to <8> loc(#loc19)
6061
llvm.intr.dbg.value #di_local_variable5 = %y_buffer_ptr : !llvm.ptr<8> loc(#loc10)
6162
%y_val = rocdl.raw.ptr.buffer.load %y_buffer_ptr, %mask_i1, %constant_i32, %constant_i32 : vector<4xf32> loc(#loc19)
6263
%y_scalar = llvm.extractelement %y_val[%constant_i32 : i32] : vector<4xf32> loc(#loc19)

third_party/amd/lib/TritonAMDGPUToLLVM/BufferOpsEmitter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ Value BufferEmitter::createResourceDescriptor(Value basePtr,
8686

8787
Value flagsConst = b.int_val(32, flags);
8888
Type rsrcType = LLVM::LLVMPointerType::get(rewriter.getContext(), 8);
89-
Value numRecordsByte = b.int_val(32, std::numeric_limits<int>::max() - 1);
89+
Value numRecordsByte = b.int_val(64, std::numeric_limits<int>::max() - 1);
9090

9191
Value resource = rewriter.createOrFold<ROCDL::MakeBufferRsrcOp>(
9292
loc, rsrcType, basePtr, stride, numRecordsByte, flagsConst);

0 commit comments

Comments
 (0)