Skip to content

Commit 4a80233

Browse files
plognjenoplavsic
andauthored
[NFC] Change term kMajor to kContig (#5679)
This commit changes terminology for matrices that are contiguous along K dimension in memory. Previously term kMajor is used for such matrices. Since major-minor terminology can be ambiguous this commit switches to using term kContig for K contiguous matrices, which is hopefully more clear. Co-authored-by: Ognjen Plavsic <[email protected]>
1 parent 76f0064 commit 4a80233

File tree

6 files changed

+17
-17
lines changed

6 files changed

+17
-17
lines changed

include/triton/Dialect/TritonGPU/IR/Dialect.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,11 +178,11 @@ unsigned getNumCTAs(Attribute layout);
178178
// len(shape) == rank.
179179
SmallVector<unsigned> getMatrixOrder(unsigned rank, bool rowMajor);
180180

181-
// Return the order that represents that the dot operand is in kMajor
181+
// Return the order that represents that the dot operand is in kContig
182182
// (contiguous in the inner dimension) or it's contiguous on the outer
183183
// dimension.
184184
SmallVector<unsigned> getOrderForDotOperand(unsigned opIdx, unsigned rank,
185-
bool kMajor);
185+
bool kContig);
186186

187187
bool isExpensiveCat(CatOp cat, Attribute targetEncoding);
188188

lib/Dialect/TritonGPU/IR/Dialect.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -227,15 +227,15 @@ SmallVector<unsigned> getMatrixOrder(unsigned rank, bool rowMajor) {
227227
}
228228

229229
SmallVector<unsigned> getOrderForDotOperand(unsigned opIdx, unsigned rank,
230-
bool kMajor) {
231-
// kMajor: if true, the matrix is fastest-running on k,
230+
bool kContig) {
231+
// kContig: if true, the matrix is fastest-running on k,
232232
// otherwise it is on m (resp. n)
233233
// opIdx=0: [batch, m, k] if rank == 3 else [m, k]
234234
// opIdx=1: [batch, k, n] if rank == 3 else [k, n]
235235
// batch (if rank == 3) is always the slowest running dimension
236236
assert(rank == 2 || rank == 3);
237237
assert(opIdx == 0 || opIdx == 1);
238-
auto rowMajor = bool(opIdx) != kMajor;
238+
auto rowMajor = bool(opIdx) != kContig;
239239
return getMatrixOrder(rank, rowMajor);
240240
}
241241

@@ -268,7 +268,7 @@ SmallVector<unsigned> getOrder(Attribute layout) {
268268
}
269269
if (auto dotLayout = dyn_cast<DotOperandEncodingAttr>(layout)) {
270270
auto rank = dotLayout.getWarpsPerCTA().size();
271-
return getOrderForDotOperand(dotLayout.getOpIdx(), rank, /*kMajor*/ true);
271+
return getOrderForDotOperand(dotLayout.getOpIdx(), rank, /*kContig*/ true);
272272
}
273273
if (auto sliceLayout = dyn_cast<SliceEncodingAttr>(layout)) {
274274
SmallVector<unsigned> parentOrder = getOrder(sliceLayout.getParent());
@@ -987,7 +987,7 @@ SmallVector<unsigned> DotOperandEncodingAttr::getWarpOrder() const {
987987
}
988988
SmallVector<unsigned> DotOperandEncodingAttr::getThreadOrder() const {
989989
return getOrderForDotOperand(getOpIdx(), getWarpsPerCTA().size(),
990-
/*kMajor*/ true);
990+
/*kContig*/ true);
991991
}
992992

993993
LogicalResult DotOperandEncodingAttr::verify(
@@ -1959,7 +1959,7 @@ SmallVector<unsigned> AMDMfmaEncodingAttr::getRepOrder() const {
19591959
SmallVector<unsigned>
19601960
AMDMfmaEncodingAttr::getRepOrderForOperand(int opIdx) const {
19611961
auto rank = getWarpsPerCTA().size();
1962-
return getOrderForDotOperand(opIdx, rank, /*kMajor*/ true);
1962+
return getOrderForDotOperand(opIdx, rank, /*kContig*/ true);
19631963
}
19641964

19651965
SmallVector<unsigned>
@@ -2027,7 +2027,7 @@ SmallVector<unsigned> AMDWmmaEncodingAttr::getRepOrder() const {
20272027
SmallVector<unsigned>
20282028
AMDWmmaEncodingAttr::getRepOrderForOperand(int opIdx) const {
20292029
auto rank = getWarpsPerCTA().size();
2030-
return getOrderForDotOperand(opIdx, rank, /*kMajor*/ true);
2030+
return getOrderForDotOperand(opIdx, rank, /*kContig*/ true);
20312031
}
20322032

20332033
SmallVector<unsigned>
@@ -2219,7 +2219,7 @@ SmallVector<unsigned> NvidiaMmaEncodingAttr::getSizePerThread() const {
22192219
SmallVector<unsigned>
22202220
NvidiaMmaEncodingAttr::getRepOrderForOperand(int opIdx) const {
22212221
auto rank = getWarpsPerCTA().size();
2222-
return getOrderForDotOperand(opIdx, rank, /*kMajor*/ true);
2222+
return getOrderForDotOperand(opIdx, rank, /*kContig*/ true);
22232223
}
22242224

22252225
SmallVector<unsigned>

third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandHelper.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Value computeBasePtr(ConversionPatternRewriter &rewriter, Location loc,
7474
return base;
7575
}
7676

77-
bool isKMajor(llvm::ArrayRef<unsigned> order, int opIdx) {
77+
bool isKContig(llvm::ArrayRef<unsigned> order, int opIdx) {
7878
auto rank = order.size();
7979
int kdim = opIdx == 0 ? rank - 1 : rank - 2;
8080
return order[0] == kdim;
@@ -102,9 +102,9 @@ bool isSwizzlePatternFitsIntoBlock(const SharedEncodingAttr sharedLayout,
102102
const auto swizzleSlowDimSize =
103103
sharedLayout.getMaxPhase() * sharedLayout.getPerPhase();
104104
const auto swizzlePatternSizeK =
105-
isKMajor(order, opIdx) ? swizzleFastDimSize : swizzleSlowDimSize;
105+
isKContig(order, opIdx) ? swizzleFastDimSize : swizzleSlowDimSize;
106106
const auto swizzlePatternSizeNonK =
107-
!isKMajor(order, opIdx) ? swizzleFastDimSize : swizzleSlowDimSize;
107+
!isKContig(order, opIdx) ? swizzleFastDimSize : swizzleSlowDimSize;
108108

109109
const auto blockSizeK = mfmaInstrK * reps[reps.size() - 1];
110110
const auto blockSizeNonK = mfmaInstrNonK * warpsPerBlockNonK;

third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandHelper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ Value computeBasePtr(ConversionPatternRewriter &rewriter, Location loc,
3636
const SharedMemoryObject &smemObj,
3737
ArrayRef<Value> strides);
3838

39-
bool isKMajor(llvm::ArrayRef<unsigned> order, int opIdx);
39+
bool isKContig(llvm::ArrayRef<unsigned> order, int opIdx);
4040

4141
using computeTensorElemMappingInBlockT =
4242
std::function<llvm::SmallVector<llvm::SmallVector<Value>>(

third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandMFMA.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ Value convertLayout(int opIdx, ConversionPatternRewriter &rewriter,
279279
Value smemBase;
280280
auto smemStrides = smemObj.getStrides(aTensorTy, loc, rewriter);
281281
bool isFastPath =
282-
!AMD::isKMajor(order, opIdx) && !hasSwizzleEnabled(sharedLayout);
282+
!AMD::isKContig(order, opIdx) && !hasSwizzleEnabled(sharedLayout);
283283
if (isFastPath) {
284284
// fast path handles tensors that are not k-major and have swizzling
285285
// disabled, in which case offsets computation can be simplified

third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/MMAv2.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -498,13 +498,13 @@ LogicalResult convertDot(const LLVMTypeConverter *typeConverter,
498498
// getValuesFromDotOperandLayoutStruct as both a and b are K-major
499499
assert(dotOpA.getRepOrder() == getOrderForDotOperand(dotOpA.getOpIdx(),
500500
aShapePerCTA.size(),
501-
/*kMajor=*/true));
501+
/*kContig=*/true));
502502
auto ha = getValuesFromDotOperandLayoutStruct(
503503
typeConverter, loc, rewriter, loadedA, repBatch, repM, repK, aTensorTy);
504504

505505
assert(dotOpB.getRepOrder() == getOrderForDotOperand(dotOpB.getOpIdx(),
506506
bShapePerCTA.size(),
507-
/*kMajor=*/true));
507+
/*kContig=*/true));
508508
auto hb = getValuesFromDotOperandLayoutStruct(
509509
typeConverter, loc, rewriter, loadedB, repBatch, repN, repK, bTensorTy);
510510

0 commit comments

Comments
 (0)