Skip to content
2 changes: 1 addition & 1 deletion cmake/llvm-hash.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
e12cbd8339b89563059c2bb2a312579b652560d0
0a25b5022831c7465790cf99655afdcd0f91e34d
2 changes: 1 addition & 1 deletion lib/Analysis/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ SetVector<Operation *> multiRootGetSlice(Operation *op,
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.filter = backwardFilter;
getBackwardSlice(currentOp, &backwardSlice, opt);
(void)getBackwardSlice(currentOp, &backwardSlice, opt);
slice.insert(backwardSlice.begin(), backwardSlice.end());

// Compute and insert the forwardSlice starting from currentOp.
Expand Down
2 changes: 1 addition & 1 deletion lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ struct ElementwiseInlineAsmOpConversion
/*asm_string=*/op.getAsmString(),
/*constraints=*/op.getConstraints(),
/*has_side_effects=*/!op.getPure(),
/*is_align_stack=*/false,
/*is_align_stack=*/false, LLVM::TailCallKind::None,
/*asm_dialect=*/
LLVM::AsmDialectAttr::get(rewriter.getContext(),
LLVM::AsmDialect::AD_ATT),
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static int computeOrigBitWidth(Value x) {
mlir::BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.filter = bwdFilter;
getBackwardSlice(x, &slice, opt);
(void)getBackwardSlice(x, &slice, opt);

// TODO: This heuristic may be a bit too coarse and may need improving
// If the chain contains a fp4 to fp16/bf16 conversion, then the original
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ CoarseSchedule::Cluster schedulePrologueAndEpilogue(scf::ForOp forOp,
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
opt.omitUsesFromAbove = false;
getBackwardSlice((Operation *)op, &backwardSlice, opt);
(void)getBackwardSlice((Operation *)op, &backwardSlice, opt);

for (auto op : backwardSlice) {
if (auto ifOp = dyn_cast<scf::IfOp>(op)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ static void threadValuesThroughWait(ttng::WarpGroupDotWaitOp wait,
return op->getBlock() == wait->getBlock();
};
SetVector<Operation *> slice;
getBackwardSlice(v, &slice, options);
(void)getBackwardSlice(v, &slice, options);
}

for (ttng::WarpGroupDotOp dot : asyncDots) {
Expand Down
2 changes: 1 addition & 1 deletion python/test/unit/language/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6895,7 +6895,7 @@ def test_tl_range_num_stages(device):
if capability[0] >= 8:
ptx = pgm.asm['ptx']
# check that the loop got pipelined with the right number of stages.
assert 'cp.async.wait_group 6' in ptx
assert 'cp.async.wait_group \t6' in ptx


def test_tl_range_fuse():
Expand Down
36 changes: 15 additions & 21 deletions third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,25 @@ cvtScalePkUpcastFromFp8(Location loc, ConversionPatternRewriter &rewriter,
fp8x4Vec = b.insert_element(fp8x4VecTy, fp8x4Vec, v1, idx1);
auto i32v = b.bitcast(fp8x4Vec, i32_ty);

auto resType = i32_ty;
auto dstType = f32_ty;
Type resElemType;
if constexpr (std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkF32Fp8Op> ||
std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkF32Bf8Op>) {
resType = i64_ty;
dstType = f32_ty;
resElemType = f32_ty;
} else if constexpr (std::is_same_v<ConvertOp,
ROCDL::CvtScaleF32PkF16Fp8Op> ||
std::is_same_v<ConvertOp,
ROCDL::CvtScaleF32PkF16Bf8Op>) {
resType = i32_ty;
dstType = f16_ty;
resElemType = f16_ty;
} else {
resType = i32_ty;
dstType = bf16_ty;
resElemType = bf16_ty;
}
Type resType = vec_ty(resElemType, 2);
Value scale = b.f32_val(1);
Value select = b.false_val();
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, scale, select);
auto retVecTy = vec_ty(dstType, 2);
auto retVec = b.bitcast(result, retVecTy);
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, scale,
/*srcLoHiSel=*/false);
SmallVector<Value> ret(2);
ret[0] = b.extract_element(dstType, retVec, idx0);
ret[1] = b.extract_element(dstType, retVec, idx1);
ret[0] = b.extract_element(resElemType, result, idx0);
ret[1] = b.extract_element(resElemType, result, idx1);
return ret;
}

Expand All @@ -73,13 +68,12 @@ cvtScalePkDowncastToFp8(Location loc, ConversionPatternRewriter &rewriter,
Type v2I16Ty = vec_ty(i16_ty, 2);
Value v2I16Vec = b.undef(v2I16Ty);
Value scale = b.f32_val(1);
Value select = b.false_val();

Value result;
if constexpr (std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkFp8F32Op> ||
std::is_same_v<ConvertOp, ROCDL::CvtScaleF32PkBf8F32Op>) {
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v2I16Vec, v0, v1, scale,
select);
/*dstLoHiSel=*/false);
} else {
Type v2F16Ty = vec_ty(v0.getType(), 2);
Value srcVec = b.undef(v2F16Ty);
Expand All @@ -88,7 +82,7 @@ cvtScalePkDowncastToFp8(Location loc, ConversionPatternRewriter &rewriter,
srcVec = b.insert_element(v2F16Ty, srcVec, v0, idx0);
srcVec = b.insert_element(v2F16Ty, srcVec, v1, idx1);
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v2I16Vec, srcVec, scale,
select);
/*dstLoHiSel=*/false);
}
auto fp8x4VecTy = vec_ty(i8_ty, 4);
auto fp8x4Vec = b.bitcast(result, fp8x4VecTy);
Expand Down Expand Up @@ -312,8 +306,8 @@ static SmallVector<Value> cvtPkF8ToFp32(Location loc,
auto resType = i64_ty;
auto dstType = f32_ty;

Value select = b.false_val();
auto result = rewriter.create<ConvertOp>(loc, resType, i32v, select);
auto result =
rewriter.create<ConvertOp>(loc, resType, i32v, /*wordSel=*/false);
auto f32x2VecTy = vec_ty(dstType, 2);
auto retVec = b.bitcast(result, f32x2VecTy);
SmallVector<Value> ret(2);
Expand All @@ -330,10 +324,10 @@ static SmallVector<Value> cvtPkFp32ToF8(Location loc,
auto b = TritonLLVMOpBuilder(loc, rewriter);
Type v2I16Ty = vec_ty(i16_ty, 2);
Value old = b.undef(i32_ty);
Value select = b.false_val();

Value result;
result = rewriter.create<ConvertOp>(loc, v2I16Ty, v0, v1, old, select);
result =
rewriter.create<ConvertOp>(loc, v2I16Ty, v0, v1, old, /*wordSel=*/false);
auto fp8x4VecTy = vec_ty(i8_ty, 4);
auto fp8x4Vec = b.bitcast(result, fp8x4VecTy);
SmallVector<Value> ret(2);
Expand Down
1 change: 1 addition & 0 deletions third_party/amd/lib/TritonAMDGPUToLLVM/GCNAsmFormat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ mlir::Value GCNBuilder::launch(RewriterBase &rewriter, Location loc, Type resTy,
getConstraints(), // constraints
hasSideEffect, // has_side_effects
isAlignStack, // is_align_stack
LLVM::TailCallKind::None,
LLVM::AsmDialectAttr::get(ctx,
LLVM::AsmDialect::AD_ATT), // asm_dialect
ArrayAttr::get(ctx, attrs) // operand_attrs
Expand Down
2 changes: 1 addition & 1 deletion third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ bool isChainDotTail(tt::DotOpInterface dotOp) {
Operation *opA = dotOp.getA().getDefiningOp();
if (!opA)
return false;
getBackwardSlice(opA, &bwdSlices, bwdOpt);
(void)getBackwardSlice(opA, &bwdSlices, bwdOpt);
if (llvm::find_if(bwdSlices, [](Operation *op) {
return isa<tt::DotOpInterface>(op);
}) != bwdSlices.end())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ void Pingponger::moveOpAndPredecessorsUpSameBlock(Operation *op) {
return op->getBlock() == checkedOp->getBlock() &&
checkedOp->isBeforeInBlock(op);
};
getBackwardSlice(op, &backwardSlice, opt);
(void)getBackwardSlice(op, &backwardSlice, opt);
for (auto predOp : backwardSlice)
appendOp(predOp);
appendOp(op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ static void moveUpGlobalLoadInPrologue(triton::FuncOp funcOp) {
// Only move ops residing in the same block.
return defBlock == block;
};
mlir::getBackwardSlice(op.getOperation(), &backwardSet, options);
(void)mlir::getBackwardSlice(op.getOperation(), &backwardSet, options);
backwardSet.insert(op);

auto ipoint = findEarlyInsertionPoint(block, op);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ void reorderProducerOps(SmallVector<Channel *> &channels) {
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
SetVector<Operation *> backwardSlice;
getBackwardSlice(channel->getSrcOp(), &backwardSlice, opt);
(void)getBackwardSlice(channel->getSrcOp(), &backwardSlice, opt);
for (auto &op : backwardSlice) {
if (op->getBlock() == block)
op->moveBefore(channel->getSrcOp());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ static bool rematerializeOp(Operation *op, DataPartitionScheme &partitionScheme,
SetVector<Operation *> slice;
BackwardSliceOptions opt;
opt.omitBlockArguments = true;
getBackwardSlice(op, &slice);
(void)getBackwardSlice(op, &slice);
for (auto depOp : slice)
partitionScheme.undoPartition(depOp);
return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ void doTaskPartition(triton::FuncOp &funcOp, unsigned numWarpGroups) {
if (!dotOp)
continue;
SetVector<Operation *> backwardSlice;
getBackwardSlice(dotOp.getA(), &backwardSlice, opt);
getBackwardSlice(dotOp.getB(), &backwardSlice, opt);
(void)getBackwardSlice(dotOp.getA(), &backwardSlice, opt);
(void)getBackwardSlice(dotOp.getB(), &backwardSlice, opt);
for (auto depOp : backwardSlice) {
if (isa<tt::DescriptorLoadOp>(depOp)) {
producerOps.insert(depOp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ mlir::Value PTXBuilder::launch(OpBuilder &rewriter, Location loc, Type resTy,
getConstraints(), // constraints
hasSideEffect, // has_side_effects
isAlignStack, // is_align_stack
LLVM::TailCallKind::None,
LLVM::AsmDialectAttr::get(ctx,
LLVM::AsmDialect::AD_ATT), // asm_dialect
ArrayAttr::get(ctx, attrs) // operand_attrs
Expand Down
Loading