Skip to content

Commit e1f4e6d

Browse files
committed
Fix transpose
1 parent 3586668 commit e1f4e6d

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

third_party/intel/lib/TritonIntelGPUToLLVM/ConvertLayoutOpToLLVM.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -802,8 +802,6 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
802802
int threadsPerWarp = triton::gpu::TritonGPUDialect::getThreadsPerWarp(mod);
803803
int rowLength = threadsPerWarp + 1;
804804
Type offsetType = getTypeConverter()->getIndexType();
805-
Value subGroupOffset =
806-
int_val(offsetType.getIntOrFloatBitWidth(), rowLength * numRows);
807805
Value subGroupId = getValueOrCreateCastToIndexLike(
808806
rewriter, loc, offsetType,
809807
rewriter.create<mlir::gpu::SubgroupIdOp>(
@@ -812,6 +810,9 @@ struct ConvertLayoutOpUsingLinearLayoutsConversion
812810
rewriter, loc, offsetType,
813811
rewriter.create<mlir::gpu::LaneIdOp>(loc,
814812
/*upper_bound=*/IntegerAttr{}));
813+
Value subGroupOffset =
814+
mul(subGroupId,
815+
int_val(offsetType.getIntOrFloatBitWidth(), rowLength * numRows));
815816
Value subGroupBasePtr = gep(ptrType, elementType, smemBase,
816817
ValueRange{subGroupOffset}, /*inbounds=*/true);
817818
Value base = subGroupBasePtr;

0 commit comments

Comments
 (0)