Skip to content

Commit 64ee5b3

Browse files
author
Salinas, David
authored
[AMDGPU][Attributor] Remove final update of waves-per-eu after the at… (llvm#4094)
2 parents e784a9a + 753bbd3 commit 64ee5b3

33 files changed

+216
-290
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 1 addition & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1299,74 +1299,6 @@ struct AAAMDGPUNoAGPR
12991299

13001300
const char AAAMDGPUNoAGPR::ID = 0;
13011301

1302-
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
1303-
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
1304-
/// Both attributes start with narrow ranges that expand during iteration.
1305-
/// However, a narrower flat-workgroup-size leads to a wider waves-per-eu range,
1306-
/// preventing optimal updates later. Therefore, waves-per-eu can't be updated
1307-
/// with intermediate values during the attributor run. We defer the
1308-
/// finalization of waves-per-eu until after the flat-workgroup-size is
1309-
/// finalized.
1310-
/// TODO: Remove this and move similar logic back into the attributor run once
1311-
/// we have a better representation for waves-per-eu.
1312-
static bool updateWavesPerEU(Module &M, TargetMachine &TM) {
1313-
bool Changed = false;
1314-
1315-
LLVMContext &Ctx = M.getContext();
1316-
1317-
for (Function &F : M) {
1318-
if (F.isDeclaration())
1319-
continue;
1320-
1321-
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1322-
1323-
std::optional<std::pair<unsigned, std::optional<unsigned>>>
1324-
FlatWgrpSizeAttr =
1325-
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1326-
1327-
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
1328-
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
1329-
1330-
unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize();
1331-
unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize();
1332-
if (FlatWgrpSizeAttr.has_value()) {
1333-
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
1334-
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
1335-
}
1336-
1337-
// Start with the "best" range.
1338-
unsigned Min = MinWavesPerEU;
1339-
unsigned Max = MinWavesPerEU;
1340-
1341-
// Compute the range from flat workgroup size. `getWavesPerEU` will also
1342-
// account for the 'amdgpu-waves-er-eu' attribute.
1343-
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1344-
ST.getWavesPerEU(F, {MinFlatWgrpSize, MaxFlatWgrpSize});
1345-
1346-
// For the lower bound, we have to "tighten" it.
1347-
Min = std::max(Min, MinFromFlatWgrpSize);
1348-
// For the upper bound, we have to "extend" it.
1349-
Max = std::max(Max, MaxFromFlatWgrpSize);
1350-
1351-
// Clamp the range to the max range.
1352-
Min = std::max(Min, MinWavesPerEU);
1353-
Max = std::min(Max, MaxWavesPerEU);
1354-
1355-
// Update the attribute if it is not the max.
1356-
if (Min != MinWavesPerEU || Max != MaxWavesPerEU) {
1357-
SmallString<10> Buffer;
1358-
raw_svector_ostream OS(Buffer);
1359-
OS << Min << ',' << Max;
1360-
Attribute OldAttr = F.getFnAttribute("amdgpu-waves-per-eu");
1361-
Attribute NewAttr = Attribute::get(Ctx, "amdgpu-waves-per-eu", OS.str());
1362-
F.addFnAttr(NewAttr);
1363-
Changed |= OldAttr == NewAttr;
1364-
}
1365-
}
1366-
1367-
return Changed;
1368-
}
1369-
13701302
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13711303
AMDGPUAttributorOptions Options,
13721304
ThinOrFullLTOPhase LTOPhase) {
@@ -1447,11 +1379,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
14471379
}
14481380
}
14491381

1450-
bool Changed = A.run() == ChangeStatus::CHANGED;
1451-
1452-
Changed |= updateWavesPerEU(M, TM);
1453-
1454-
return Changed;
1382+
return A.run() == ChangeStatus::CHANGED;
14551383
}
14561384
} // namespace
14571385

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,6 @@ AMDGPUSubtarget::getWavesPerEU(const Function &F) const {
216216
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
217217
}
218218

219-
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
220-
const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
221-
// Minimum number of bytes allocated in the LDS.
222-
unsigned LDSBytes = AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
223-
{0, UINT32_MAX}, true)
224-
.first;
225-
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
226-
}
227-
228219
std::pair<unsigned, unsigned>
229220
AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
230221
unsigned LDSBytes, const Function &F) const {

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
169169

170170
;.
171171
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
172-
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
173-
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
172+
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
173+
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
174174
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ declare void @unknown()
105105

106106
define amdgpu_kernel void @kernel_calls_extern() {
107107
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
108-
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
108+
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
109109
; CHECK-NEXT: call void @unknown()
110110
; CHECK-NEXT: ret void
111111
;
@@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
115115

116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118-
; CHECK-SAME: ) #[[ATTR3]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]]
118+
; CHECK-SAME: ) #[[ATTR2]] {
119+
; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
125125

126126
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
127127
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
128-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
128+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
129129
; CHECK-NEXT: call void [[INDIRECT]]()
130130
; CHECK-NEXT: ret void
131131
;
@@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
135135

136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR7]]
138+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -242,12 +242,11 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
242242

243243
attributes #0 = { "amdgpu-agpr-alloc"="0" }
244244
;.
245-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247-
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
248-
; CHECK: attributes #[[ATTR3]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
249-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
250-
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
251-
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
252-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" }
245+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247+
; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
248+
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
249+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
250+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
251+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
253252
;.

llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ define void @call_no_dispatch_id() {
117117
ret void
118118
}
119119
;.
120-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
121-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
122-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
123-
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
124-
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
125-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
126-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
127-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
128-
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
129-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
120+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
121+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
122+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
123+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" }
124+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" }
125+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" }
126+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" }
127+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" }
128+
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
129+
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
130130
;.

0 commit comments

Comments
 (0)