Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e12cbd8339b89563059c2bb2a312579b652560d0
8957e64a20fc7f4277565c6cfe3e555c119783ce
2 changes: 1 addition & 1 deletion lib/Analysis/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ SetVector<Operation *> multiRootGetSlice(Operation *op,
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.filter = backwardFilter;
getBackwardSlice(currentOp, &backwardSlice, opt);
(void)getBackwardSlice(currentOp, &backwardSlice, opt);
slice.insert(backwardSlice.begin(), backwardSlice.end());

// Compute and insert the forwardSlice starting from currentOp.
Expand Down
2 changes: 1 addition & 1 deletion lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ struct ElementwiseInlineAsmOpConversion
/*asm_string=*/op.getAsmString(),
/*constraints=*/op.getConstraints(),
/*has_side_effects=*/!op.getPure(),
/*is_align_stack=*/false,
/*is_align_stack=*/false, LLVM::TailCallKind::None,
/*asm_dialect=*/
LLVM::AsmDialectAttr::get(rewriter.getContext(),
LLVM::AsmDialect::AD_ATT),
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static int computeOrigBitWidth(Value x) {
mlir::BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.filter = bwdFilter;
getBackwardSlice(x, &slice, opt);
(void)getBackwardSlice(x, &slice, opt);

// TODO: This heuristic may be a bit too coarse and may need improving
// If the chain contains a fp4 to fp16/bf16 conversion, then the original
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ CoarseSchedule::Cluster schedulePrologueAndEpilogue(scf::ForOp forOp,
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.omitUsesFromAbove = false;
getBackwardSlice((Operation *)op, &backwardSlice, opt);
(void)getBackwardSlice((Operation *)op, &backwardSlice, opt);

for (auto op : backwardSlice) {
if (auto ifOp = dyn_cast<scf::IfOp>(op)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ static void threadValuesThroughWait(ttng::WarpGroupDotWaitOp wait,
return op->getBlock() == wait->getBlock();
};
SetVector<Operation *> slice;
getBackwardSlice(v, &slice, options);
(void)getBackwardSlice(v, &slice, options);
}

for (ttng::WarpGroupDotOp dot : asyncDots) {
Expand Down
2 changes: 1 addition & 1 deletion python/test/unit/language/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6891,7 +6891,7 @@ def test_tl_range_num_stages(device):
if capability[0] >= 8:
ptx = pgm.asm['ptx']
# check that the loop got pipelined with the right number of stages.
assert 'cp.async.wait_group 6' in ptx
assert 'cp.async.wait_group \t6' in ptx


def test_tl_range_fuse():
Expand Down
2 changes: 1 addition & 1 deletion test/Triton/reproducer.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ module attributes {"ttg.target" = "cuda:90", "ttg.num-ctas" = 1 : i32, "ttg.num-
#-}

// CHECK: Pass Manager with
// CHECK-NEXT: convert-triton-gpu-to-llvm
// CHECK: convert-triton-gpu-to-llvm
36 changes: 15 additions & 21 deletions third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,25 @@ cvtScalePkUpcastFromFp8(Location loc, ConversionPatternRewriter &rewriter,
fp8x4Vec = b.insert_element(fp8x4VecTy, fp8x4Vec, v1, idx1);
auto i32v = b.bitcast(fp8x4Vec, i32_ty);

auto resType = i32_ty;
auto dstType = f32_ty;
Type resElemType;
if constexpr (std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkF32Fp8Op> ||
std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkF32Bf8Op>) {
resType = i64_ty;
dstType = f32_ty;
resElemType = f32_ty;
} else if constexpr (std::is_same_v<ConvertOp,
ROCDL::CvtScaleF32PkF16Fp8Op> ||
std::is_same_v<ConvertOp,
ROCDL::CvtScaleF32PkF16Bf8Op>) {
resType = i32_ty;
dstType = f16_ty;
resElemType = f16_ty;
} else {
resType = i32_ty;
dstType = bf16_ty;
resElemType = bf16_ty;
}
Type resType = vec_ty(resElemType, 2);
Value scale = b.f32_val(1);
Value select = b.false_val();
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, scale, select);
auto retVecTy = vec_ty(dstType, 2);
auto retVec = b.bitcast(result, retVecTy);
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, scale,
/*srcLoHiSel=*/false);
SmallVector<Value> ret(2);
ret[0] = b.extract_element(dstType, retVec, idx0);
ret[1] = b.extract_element(dstType, retVec, idx1);
ret[0] = b.extract_element(resElemType, result, idx0);
ret[1] = b.extract_element(resElemType, result, idx1);
return ret;
}

Expand All @@ -73,13 +68,12 @@ cvtScalePkDowncastToFp8(Location loc, ConversionPatternRewriter &rewriter,
Type v2I16Ty = vec_ty(i16_ty, 2);
Value v2I16Vec = b.undef(v2I16Ty);
Value scale = b.f32_val(1);
Value select = b.false_val();

Value result;
if constexpr (std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkFp8F32Op> ||
std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkBf8F32Op>) {
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v2I16Vec, v0, v1, scale,
select);
/*dstLoHiSel=*/false);
} else {
Type v2F16Ty = vec_ty(v0.getType(), 2);
Value srcVec = b.undef(v2F16Ty);
Expand All @@ -88,7 +82,7 @@ cvtScalePkDowncastToFp8(Location loc, ConversionPatternRewriter &rewriter,
srcVec = b.insert_element(v2F16Ty, srcVec, v0, idx0);
srcVec = b.insert_element(v2F16Ty, srcVec, v1, idx1);
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v2I16Vec, srcVec, scale,
select);
/*dstLoHiSel=*/false);
}
auto fp8x4VecTy = vec_ty(i8_ty, 4);
auto fp8x4Vec = b.bitcast(result, fp8x4VecTy);
Expand Down Expand Up @@ -312,8 +306,8 @@ static SmallVector<Value> cvtPkF8ToFp32(Location loc,
auto resType = i64_ty;
auto dstType = f32_ty;

Value select = b.false_val();
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, select);
auto result =
rewriter.create<ConvertOp>(loc, resType, i32v, /*wordSel=*/false);
auto f32x2VecTy = vec_ty(dstType, 2);
auto retVec = b.bitcast(result, f32x2VecTy);
SmallVector<Value> ret(2);
Expand All @@ -330,10 +324,10 @@ static SmallVector<Value> cvtPkFp32ToF8(Location loc,
auto b = TritonLLVMOpBuilder(loc, rewriter);
Type v2I16Ty = vec_ty(i16_ty, 2);
Value old = b.undef(i32_ty);
Value select = b.false_val();

Value result;
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v0, v1, old, select);
result =
rewriter.create<ConvertOp>(loc, v2I16Ty, v0, v1, old, /*wordSel=*/false);
auto fp8x4VecTy = vec_ty(i8_ty, 4);
auto fp8x4Vec = b.bitcast(result, fp8x4VecTy);
SmallVector<Value> ret(2);
Expand Down
1 change: 1 addition & 0 deletions third_party/amd/lib/TritonAMDGPUToLLVM/GCNAsmFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ mlir::Value GCNBuilder::launch(RewriterBase &rewriter, Location loc, Type resTy,
getConstraints(), // constraints
hasSideEffect, // has_side_effects
isAlignStack, // is_align_stack
LLVM::TailCallKind::None,
LLVM::AsmDialectAttr::get(ctx,
LLVM::AsmDialect::AD_ATT), // asm_dialect
ArrayAttr::get(ctx, attrs) // operand_attrs
Expand Down
2 changes: 1 addition & 1 deletion third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ bool isChainDotTail(tt::DotOpInterface dotOp) {
Operation *opA = dotOp.getA().getDefiningOp();
if (!opA)
return false;
getBackwardSlice(opA, &bwdSlices, bwdOpt);
(void)getBackwardSlice(opA, &bwdSlices, bwdOpt);
if (llvm::find_if(bwdSlices, [](Operation *op) {
return isa<tt::DotOpInterface>(op);
}) != bwdSlices.end())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void Pingponger::moveOpAndPredecessorsUpSameBlock(Operation *op) {
return op->getBlock() == checkedOp->getBlock() &&
checkedOp->isBeforeInBlock(op);
};
getBackwardSlice(op, &backwardSlice, opt);
(void)getBackwardSlice(op, &backwardSlice, opt);
for (auto predOp : backwardSlice)
appendOp(predOp);
appendOp(op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ static void moveUpGlobalLoadInPrologue(triton::FuncOp funcOp) {
// Only move ops residing in the same block.
return defBlock == block;
};
mlir::getBackwardSlice(op.getOperation(), &backwardSet, options);
(void)mlir::getBackwardSlice(op.getOperation(), &backwardSet, options);
backwardSet.insert(op);

auto ipoint = findEarlyInsertionPoint(block, op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ void reorderProducerOps(SmallVector<Channel *> &channels) {
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
SetVector<Operation *> backwardSlice;
getBackwardSlice(channel->getSrcOp(), &backwardSlice, opt);
(void)getBackwardSlice(channel->getSrcOp(), &backwardSlice, opt);
for (auto &op : backwardSlice) {
if (op->getBlock() == block)
op->moveBefore(channel->getSrcOp());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static bool rematerializeOp(Operation *op, DataPartitionScheme &partitionScheme,
SetVector<Operation *> slice;
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
getBackwardSlice(op, &slice);
(void)getBackwardSlice(op, &slice);
for (auto depOp : slice)
partitionScheme.undoPartition(depOp);
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ void doTaskPartition(triton::FuncOp &funcOp, unsigned numWarpGroups) {
if (!dotOp)
continue;
SetVector<Operation *> backwardSlice;
getBackwardSlice(dotOp.getA(), &backwardSlice, opt);
getBackwardSlice(dotOp.getB(), &backwardSlice, opt);
(void)getBackwardSlice(dotOp.getA(), &backwardSlice, opt);
(void)getBackwardSlice(dotOp.getB(), &backwardSlice, opt);
for (auto depOp : backwardSlice) {
if (isa<tt::DescriptorLoadOp>(depOp)) {
producerOps.insert(depOp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ mlir::Value PTXBuilder::launch(OpBuilder &rewriter, Location loc, Type resTy,
getConstraints(), // constraints
hasSideEffect, // has_side_effects
isAlignStack, // is_align_stack
LLVM::TailCallKind::None,
LLVM::AsmDialectAttr::get(ctx,
LLVM::AsmDialect::AD_ATT), // asm_dialect
ArrayAttr::get(ctx, attrs) // operand_attrs
Expand Down
Loading