Skip to content

Commit 7f61756

Browse files
authored
[AMDGPU][Attributor] Rework update of AAAMDWavesPerEU (llvm#123995) (llvm#3199)
2 parents 7d26f23 + 734c629 commit 7f61756

32 files changed

+368
-279
lines changed

llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp

Lines changed: 94 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,47 +1115,25 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11151115
Function *F = getAssociatedFunction();
11161116
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11171117

1118-
auto TakeRange = [&](std::pair<unsigned, unsigned> R) {
1119-
auto [Min, Max] = R;
1120-
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1121-
IntegerRangeState RangeState(Range);
1122-
clampStateAndIndicateChange(this->getState(), RangeState);
1123-
indicateOptimisticFixpoint();
1124-
};
1125-
1126-
std::pair<unsigned, unsigned> MaxWavesPerEURange{
1127-
1U, InfoCache.getMaxWavesPerEU(*F)};
1128-
11291118
// If the attribute exists, we will honor it if it is not the default.
11301119
if (auto Attr = InfoCache.getWavesPerEUAttr(*F)) {
1120+
std::pair<unsigned, unsigned> MaxWavesPerEURange{
1121+
1U, InfoCache.getMaxWavesPerEU(*F)};
11311122
if (*Attr != MaxWavesPerEURange) {
1132-
TakeRange(*Attr);
1123+
auto [Min, Max] = *Attr;
1124+
ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
1125+
IntegerRangeState RangeState(Range);
1126+
this->getState() = RangeState;
1127+
indicateOptimisticFixpoint();
11331128
return;
11341129
}
11351130
}
11361131

1137-
// Unlike AAAMDFlatWorkGroupSize, it's getting trickier here. Since the
1138-
// calculation of waves per EU involves flat work group size, we can't
1139-
// simply use an assumed flat work group size as a start point, because the
1140-
// update of flat work group size is in an inverse direction of waves per
1141-
// EU. However, we can still do something if it is an entry function. Since
1142-
// an entry function is a terminal node, and flat work group size either
1143-
// from attribute or default will be used anyway, we can take that value and
1144-
// calculate the waves per EU based on it. This result can't be updated by
1145-
// no means, but that could still allow us to propagate it.
1146-
if (AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
1147-
std::pair<unsigned, unsigned> FlatWorkGroupSize;
1148-
if (auto Attr = InfoCache.getFlatWorkGroupSizeAttr(*F))
1149-
FlatWorkGroupSize = *Attr;
1150-
else
1151-
FlatWorkGroupSize = InfoCache.getDefaultFlatWorkGroupSize(*F);
1152-
TakeRange(InfoCache.getEffectiveWavesPerEU(*F, MaxWavesPerEURange,
1153-
FlatWorkGroupSize));
1154-
}
1132+
if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
1133+
indicatePessimisticFixpoint();
11551134
}
11561135

11571136
ChangeStatus updateImpl(Attributor &A) override {
1158-
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
11591137
ChangeStatus Change = ChangeStatus::UNCHANGED;
11601138

11611139
auto CheckCallSite = [&](AbstractCallSite CS) {
@@ -1164,24 +1142,21 @@ struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
11641142
LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
11651143
<< "->" << Func->getName() << '\n');
11661144

1167-
const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
1145+
const auto *CallerAA = A.getAAFor<AAAMDWavesPerEU>(
11681146
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
1169-
const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
1170-
*this, IRPosition::function(*Func), DepClassTy::REQUIRED);
1171-
if (!CallerInfo || !AssumedGroupSize || !CallerInfo->isValidState() ||
1172-
!AssumedGroupSize->isValidState())
1147+
if (!CallerAA || !CallerAA->isValidState())
11731148
return false;
11741149

1175-
unsigned Min, Max;
1176-
std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
1177-
*Caller,
1178-
{CallerInfo->getAssumed().getLower().getZExtValue(),
1179-
CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
1180-
{AssumedGroupSize->getAssumed().getLower().getZExtValue(),
1181-
AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
1182-
ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
1183-
IntegerRangeState CallerRangeState(CallerRange);
1184-
Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
1150+
ConstantRange Assumed = getAssumed();
1151+
unsigned Min = std::max(Assumed.getLower().getZExtValue(),
1152+
CallerAA->getAssumed().getLower().getZExtValue());
1153+
unsigned Max = std::max(Assumed.getUpper().getZExtValue(),
1154+
CallerAA->getAssumed().getUpper().getZExtValue());
1155+
ConstantRange Range(APInt(32, Min), APInt(32, Max));
1156+
IntegerRangeState RangeState(Range);
1157+
getState() = RangeState;
1158+
Change |= getState() == Assumed ? ChangeStatus::UNCHANGED
1159+
: ChangeStatus::CHANGED;
11851160

11861161
return true;
11871162
};
@@ -1320,6 +1295,74 @@ struct AAAMDGPUNoAGPR
13201295

13211296
const char AAAMDGPUNoAGPR::ID = 0;
13221297

1298+
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
1299+
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
1300+
/// Both attributes start with narrow ranges that expand during iteration.
1301+
/// However, a narrower flat-workgroup-size leads to a wider waves-per-eu range,
1302+
/// preventing optimal updates later. Therefore, waves-per-eu can't be updated
1303+
/// with intermediate values during the attributor run. We defer the
1304+
/// finalization of waves-per-eu until after the flat-workgroup-size is
1305+
/// finalized.
1306+
/// TODO: Remove this and move similar logic back into the attributor run once
1307+
/// we have a better representation for waves-per-eu.
1308+
static bool updateWavesPerEU(Module &M, TargetMachine &TM) {
1309+
bool Changed = false;
1310+
1311+
LLVMContext &Ctx = M.getContext();
1312+
1313+
for (Function &F : M) {
1314+
if (F.isDeclaration())
1315+
continue;
1316+
1317+
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
1318+
1319+
std::optional<std::pair<unsigned, std::optional<unsigned>>>
1320+
FlatWgrpSizeAttr =
1321+
AMDGPU::getIntegerPairAttribute(F, "amdgpu-flat-work-group-size");
1322+
1323+
unsigned MinWavesPerEU = ST.getMinWavesPerEU();
1324+
unsigned MaxWavesPerEU = ST.getMaxWavesPerEU();
1325+
1326+
unsigned MinFlatWgrpSize = ST.getMinFlatWorkGroupSize();
1327+
unsigned MaxFlatWgrpSize = ST.getMaxFlatWorkGroupSize();
1328+
if (FlatWgrpSizeAttr.has_value()) {
1329+
MinFlatWgrpSize = FlatWgrpSizeAttr->first;
1330+
MaxFlatWgrpSize = *(FlatWgrpSizeAttr->second);
1331+
}
1332+
1333+
// Start with the "best" range.
1334+
unsigned Min = MinWavesPerEU;
1335+
unsigned Max = MinWavesPerEU;
1336+
1337+
// Compute the range from flat workgroup size. `getWavesPerEU` will also
1338+
// account for the 'amdgpu-waves-er-eu' attribute.
1339+
auto [MinFromFlatWgrpSize, MaxFromFlatWgrpSize] =
1340+
ST.getWavesPerEU(F, {MinFlatWgrpSize, MaxFlatWgrpSize});
1341+
1342+
// For the lower bound, we have to "tighten" it.
1343+
Min = std::max(Min, MinFromFlatWgrpSize);
1344+
// For the upper bound, we have to "extend" it.
1345+
Max = std::max(Max, MaxFromFlatWgrpSize);
1346+
1347+
// Clamp the range to the max range.
1348+
Min = std::max(Min, MinWavesPerEU);
1349+
Max = std::min(Max, MaxWavesPerEU);
1350+
1351+
// Update the attribute if it is not the max.
1352+
if (Min != MinWavesPerEU || Max != MaxWavesPerEU) {
1353+
SmallString<10> Buffer;
1354+
raw_svector_ostream OS(Buffer);
1355+
OS << Min << ',' << Max;
1356+
Attribute OldAttr = F.getFnAttribute("amdgpu-waves-per-eu");
1357+
Attribute NewAttr = Attribute::get(Ctx, "amdgpu-waves-per-eu", OS.str());
1358+
F.addFnAttr(NewAttr);
1359+
Changed |= OldAttr == NewAttr;
1360+
}
1361+
}
1362+
1363+
return Changed;
1364+
}
1365+
13231366
static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13241367
AMDGPUAttributorOptions Options) {
13251368
SetVector<Function *> Functions;
@@ -1394,8 +1437,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
13941437
}
13951438
}
13961439

1397-
ChangeStatus Change = A.run();
1398-
return Change == ChangeStatus::CHANGED;
1440+
bool Changed = A.run() == ChangeStatus::CHANGED;
1441+
1442+
Changed |= updateWavesPerEU(M, TM);
1443+
1444+
return Changed;
13991445
}
14001446

14011447
class AMDGPUAttributorLegacy : public ModulePass {

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ AMDGPUSubtarget::getWavesPerEU(const Function &F) const {
216216
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
217217
}
218218

219+
std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
220+
const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
221+
// Minimum number of bytes allocated in the LDS.
222+
unsigned LDSBytes = AMDGPU::getIntegerPairAttribute(F, "amdgpu-lds-size",
223+
{0, UINT32_MAX}, true)
224+
.first;
225+
return getWavesPerEU(FlatWorkGroupSizes, LDSBytes, F);
226+
}
227+
219228
std::pair<unsigned, unsigned>
220229
AMDGPUSubtarget::getWavesPerEU(std::pair<unsigned, unsigned> FlatWorkGroupSizes,
221230
unsigned LDSBytes, const Function &F) const {

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class AMDGPUSubtarget {
108108
/// size, register usage, and/or lds usage.
109109
std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
110110

111+
/// Overload which uses the specified values for the flat work group sizes,
112+
/// rather than querying the function itself. \p FlatWorkGroupSizes Should
113+
/// correspond to the function's value for getFlatWorkGroupSizes.
114+
std::pair<unsigned, unsigned>
115+
getWavesPerEU(const Function &F,
116+
std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
117+
111118
/// Overload which uses the specified values for the flat workgroup sizes and
112119
/// LDS space rather than querying the function itself. \p FlatWorkGroupSizes
113120
/// should correspond to the function's value for getFlatWorkGroupSizes and \p

llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ attributes #1 = { nounwind }
233233
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
234234
;.
235235
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
236-
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
237-
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
236+
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
237+
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
238238
;.
239239
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
240240
;.

llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ declare void @unknown()
105105

106106
define amdgpu_kernel void @kernel_calls_extern() {
107107
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
108-
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
108+
; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
109109
; CHECK-NEXT: call void @unknown()
110110
; CHECK-NEXT: ret void
111111
;
@@ -115,8 +115,8 @@ define amdgpu_kernel void @kernel_calls_extern() {
115115

116116
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
117117
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
118-
; CHECK-SAME: ) #[[ATTR2]] {
119-
; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
118+
; CHECK-SAME: ) #[[ATTR3]] {
119+
; CHECK-NEXT: call void @unknown() #[[ATTR7:[0-9]+]]
120120
; CHECK-NEXT: ret void
121121
;
122122
call void @unknown() #0
@@ -125,7 +125,7 @@ define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
125125

126126
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
127127
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
128-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
128+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
129129
; CHECK-NEXT: call void [[INDIRECT]]()
130130
; CHECK-NEXT: ret void
131131
;
@@ -135,8 +135,8 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
135135

136136
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
137137
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
138-
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
139-
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
138+
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
139+
; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR7]]
140140
; CHECK-NEXT: ret void
141141
;
142142
call void %indirect() #0
@@ -242,11 +242,12 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
242242

243243
attributes #0 = { "amdgpu-no-agpr" }
244244
;.
245-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247-
; CHECK: attributes #[[ATTR2]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
248-
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
249-
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
250-
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
251-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-agpr" }
245+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
246+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
247+
; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
248+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-waves-per-eu"="4,8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
249+
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
250+
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
251+
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
252+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-agpr" }
252253
;.

llvm/test/CodeGen/AMDGPU/annotate-existing-abi-attributes.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,14 +117,14 @@ define void @call_no_dispatch_id() {
117117
ret void
118118
}
119119
;.
120-
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
121-
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
122-
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
123-
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "uniform-work-group-size"="false" }
124-
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "uniform-work-group-size"="false" }
125-
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "uniform-work-group-size"="false" }
126-
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "uniform-work-group-size"="false" }
127-
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "uniform-work-group-size"="false" }
128-
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
129-
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
120+
; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-workitem-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
121+
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
122+
; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
123+
; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-workgroup-id-x" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
124+
; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-workgroup-id-y" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
125+
; CHECK: attributes #[[ATTR5]] = { "amdgpu-no-workgroup-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
126+
; CHECK: attributes #[[ATTR6]] = { "amdgpu-no-dispatch-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
127+
; CHECK: attributes #[[ATTR7]] = { "amdgpu-no-queue-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
128+
; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
129+
; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-dispatch-id" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
130130
;.

0 commit comments

Comments
 (0)