Skip to content

Commit 73a724f

Browse files
author
SJW
authored
[AMD] Update shared memory size for cdna4 (#5964)
New architecture has increased shared memory size.
1 parent 2da5674 commit 73a724f

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ llvm::AMDGPU::GPUKind TargetInfo::getGPUKind() const {
6464
return llvm::AMDGPU::parseArchAMDGCN(arch);
6565
}
6666

67-
int TargetInfo::getSharedMemorySize() const { return 64 * 1024; }
67+
int TargetInfo::getSharedMemorySize() const {
68+
int kbytes = getISAFamily() == ISAFamily::CDNA4 ? 160 : 64;
69+
return kbytes * 1024;
70+
}
6871

6972
bool TargetInfo::supportMaximumMinimum() const { return false; }
7073

@@ -102,7 +105,8 @@ bool TargetInfo::canUseStMatrix(RankedTensorType tensorTy,
102105
}
103106

104107
bool TargetInfo::canUseLDSTransLoad(int bitwidth) const {
105-
return arch == "gfx950" && llvm::is_contained({16, 8, 4, 6}, bitwidth);
108+
return getISAFamily() == ISAFamily::CDNA4 &&
109+
llvm::is_contained({16, 8, 4, 6}, bitwidth);
106110
}
107111

108112
void TargetInfo::storeMatrixShared(RewriterBase &rewriter, Location loc,

0 commit comments

Comments
 (0)