Skip to content

Commit be91d90

Browse files
anmyachevwhitneywhtsang
authored andcommitted
Fix build and test failures after llvm/llvm-project@bc773632355b
Signed-off-by: Anatoly Myachev <[email protected]>
1 parent a0ea787 commit be91d90

File tree

5 files changed

+11
-10
lines changed

5 files changed

+11
-10
lines changed

test/TritonIntelGPU/split-barrier.mlir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
1919
%22 = tt.make_tensor_ptr %arg1, [%c0_i64, %c0_i64], [%c0_i64, %c0_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<64x256xf16, #dot1>>
2020
// CHECK: scf.for %[[V:.*]] = {{.*}} to {{.*}} step {{.*}} iter_args({{.*}}) -> (tensor<128x256xf32, #mma>, !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>>, !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #mma, kWidth = 2}>>>)
2121
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #[[$DPAS]], kWidth = 1}>>>
22-
// CHECK-NEXT: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
22+
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
2323
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #[[$DPAS]], kWidth = 1}>>>
24-
// CHECK-NEXT: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
24+
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
2525
// CHECK: scf.for %[[IV:.*]] = {{.*}} to {{.*}} step {{.*}} iter_args({{.*}}) -> (tensor<128x256xf32, #mma>, !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>>, !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #mma, kWidth = 2}>>>, !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>>, !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #mma, kWidth = 2}>>>)
2626
// WORKGROUP_SCOPE-NEXT: triton_gen.split_barrier_arrive {execution_scope = WorkGroup, memory_scope = WorkGroup}
2727
// SUBGROUP_SCOPE-NEXT: triton_gen.split_barrier_arrive {execution_scope = SubGroup, memory_scope = SubGroup}
@@ -66,9 +66,9 @@ module attributes {"ttg.num-warps" = 32 : i32, "ttg.threads-per-warp" = 16 : i32
6666
%22 = tt.make_tensor_ptr %arg1, [%c0_i64, %c0_i64], [%c0_i64, %c0_i64], [%c0_i32, %c0_i32] {order = array<i32: 1, 0>} : <tensor<64x256xf16, #dot1>>
6767

6868
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #[[$DPAS]], kWidth = 1}>>>
69-
// CHECK-NEXT: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
69+
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
7070
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #[[$DPAS]], kWidth = 1}>>>
71-
// CHECK-NEXT: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
71+
// CHECK: ttig.prefetch {{.*}} : !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #[[$DPAS]], kWidth = 2}>>>
7272
// CHECK: scf.for %[[IV:.*]] = {{.*}} to {{.*}} step {{.*}} iter_args({{.*}}) -> (tensor<128x256xf32, #mma>, !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>>, !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #mma, kWidth = 2}>>>, !tt.ptr<tensor<128x64xf16, #ttg.dot_op<{opIdx = 0, parent = #mma, kWidth = 1}>>>, !tt.ptr<tensor<64x256xf16, #ttg.dot_op<{opIdx = 1, parent = #mma, kWidth = 2}>>>)
7373
// WORKGROUP_SCOPE-NEXT: triton_gen.split_barrier_arrive {execution_scope = WorkGroup, memory_scope = WorkGroup}
7474
// SUBGROUP_SCOPE-NEXT: triton_gen.split_barrier_arrive {execution_scope = SubGroup, memory_scope = SubGroup}

third_party/intel/lib/Dialect/Triton/Transforms/RemoveMasks.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,8 @@ class CanonicalMaskValidator final : public MaskValidatorBase {
120120
cast<arith::ConstantIntOp>(maskInfo.N.getDefiningOp()).value();
121121
unsigned END = maskInfo.END;
122122
bool cond = UB == ((N - END) / END) + 1;
123-
return builder.create<arith::ConstantIntOp>(forOp.getLoc(), cond,
124-
builder.getI1Type());
123+
return builder.create<arith::ConstantIntOp>(forOp.getLoc(),
124+
builder.getI1Type(), cond);
125125
}
126126

127127
auto divOp = cast<arith::DivSIOp>(defOp);
@@ -276,8 +276,8 @@ class InvariantMaskValidator final : public MaskValidatorBase {
276276
[[maybe_unused]] auto rangeOp = cast<tt::MakeRangeOp>(rhs);
277277
assert(rangeOp.getStart() < rangeOp.getEnd() && "Invalid range");
278278
unsigned start = rangeOp.getStart();
279-
auto cstOp = builder.createOrFold<arith::ConstantIntOp>(loc, start,
280-
lhsVal.getType());
279+
auto cstOp = builder.createOrFold<arith::ConstantIntOp>(
280+
loc, lhsVal.getType(), start);
281281
return builder.createOrFold<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
282282
lhsVal, cstOp);
283283
}

third_party/intel/lib/TritonGENToLLVM/Attributes.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
1111
#include "mlir/IR/Builders.h"
1212
#include "mlir/IR/BuiltinTypes.h"
13+
#include "llvm/IR/Value.h"
1314
#include "llvm/Support/Debug.h"
1415

1516
#define DEBUG_TYPE "attributes"

third_party/intel/lib/TritonIntelGPUTransforms/DistributeToWarps.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ void distributeMakeRangeOp(tt::MakeRangeOp op, Value warpId) {
238238
// case, we would need to determine a dimension-specific offset similar to
239239
// `tt.make_tensor_ptr`' distribution pattern.
240240
auto elemTy = convTy.getElementType();
241-
auto numElemsConst = b.create<arith::ConstantIntOp>(loc, numElems, elemTy);
241+
auto numElemsConst = b.create<arith::ConstantIntOp>(loc, elemTy, numElems);
242242
auto rangeOffset = b.create<arith::MulIOp>(loc, warpId, numElemsConst);
243243
auto splat = b.create<tt::SplatOp>(loc, convTy, rangeOffset);
244244
auto newRange = b.create<arith::AddIOp>(loc, subRange, splat);

third_party/intel/lib/TritonIntelGPUTransforms/MatchTargetSize.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1253,7 +1253,7 @@ void MatchTargetSizePass::transformMakeRangeOp(tt::MakeRangeOp op) {
12531253
SmallVector<Value> subRanges;
12541254
for (int i = 0; i < end / subgroupSize; ++i) {
12551255
Value offset =
1256-
b.create<arith::ConstantIntOp>(loc, i * subgroupSize, elemTy);
1256+
b.create<arith::ConstantIntOp>(loc, elemTy, i * subgroupSize);
12571257
Value offsetTensor = b.create<tt::SplatOp>(loc, subRangeTy, offset);
12581258
subRanges.push_back(b.create<arith::AddIOp>(loc, subRange, offsetTensor));
12591259
}

0 commit comments

Comments
 (0)