Skip to content

Commit 2c56a3e

Browse files
[AMDGPU] NFCI: Track AV Register Pressure separately (llvm#149863) (llvm#4095)
This patch fixes a performance regression in rocrand. See JIRA SWDEV-538318
2 parents 64ee5b3 + 75432aa commit 2c56a3e

File tree

2 files changed

+32
-14
lines changed

2 files changed

+32
-14
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,11 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
3838

3939
unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
4040
const SIRegisterInfo *STI) {
41-
return STI->isSGPRClass(RC) ? SGPR : (STI->isAGPRClass(RC) ? AGPR : VGPR);
41+
return STI->isSGPRClass(RC)
42+
? SGPR
43+
: (STI->isAGPRClass(RC)
44+
? AGPR
45+
: (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
4246
}
4347

4448
void GCNRegPressure::inc(unsigned Reg,

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,43 +29,57 @@ class raw_ostream;
2929
class SlotIndex;
3030

3131
struct GCNRegPressure {
32-
enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
32+
enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
3333

3434
GCNRegPressure() {
3535
clear();
3636
}
3737

38-
bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
38+
bool empty() const {
39+
return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
40+
}
3941

4042
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
4143

4244
/// \returns the SGPR32 pressure
4345
unsigned getSGPRNum() const { return Value[SGPR]; }
44-
/// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p
45-
/// UnifiedVGPRFile
46+
/// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
47+
/// dependent upon \p UnifiedVGPRFile
4648
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
4749
if (UnifiedVGPRFile) {
48-
return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR])
49-
: Value[VGPR];
50+
return Value[AGPR]
51+
? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
52+
: Value[VGPR] + Value[AVGPR];
5053
}
51-
return std::max(Value[VGPR], Value[AGPR]);
54+
// AVGPR assignment priority is based on the width of the register. Account
55+
// AVGPR pressure as VGPR.
56+
return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]);
5257
}
5358

5459
/// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
55-
/// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
60+
/// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified
61+
/// VGPR file.
5662
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
57-
unsigned NumAGPRs) {
58-
return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
63+
unsigned NumAGPRs,
64+
unsigned NumAVGPRs) {
65+
66+
// Assume AVGPRs will be assigned as VGPRs.
67+
return alignTo(NumArchVGPRs + NumAVGPRs,
68+
AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
5969
NumAGPRs;
6070
}
6171

62-
/// \returns the ArchVGPR32 pressure
63-
unsigned getArchVGPRNum() const { return Value[VGPR]; }
72+
/// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
73+
/// allocated as VGPR
74+
unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
6475
/// \returns the AccVGPR32 pressure
6576
unsigned getAGPRNum() const { return Value[AGPR]; }
77+
/// \returns the AVGPR32 pressure
78+
unsigned getAVGPRNum() const { return Value[AVGPR]; }
6679

6780
unsigned getVGPRTuplesWeight() const {
68-
return std::max(Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR]);
81+
return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
82+
Value[TOTAL_KINDS + AGPR]);
6983
}
7084
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
7185

0 commit comments

Comments
 (0)