Skip to content

Commit dc0221e

Browse files
committed
Implement some of the review suggestions.
1 parent 420a19c commit dc0221e

File tree

2 files changed

+14
-16
lines changed

2 files changed

+14
-16
lines changed

llvm/lib/Target/AMDGPU/AMDGPUExpandFeaturePredicates.cpp

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,10 @@
1313
// (AMDGCNSPIRV). These placeholder globals are used to guide target specific
1414
// lowering, once the concrete target is known, by way of constant folding their
1515
// value all the way into a terminator (i.e. a controlled block) or into a no
16-
// live use scenario. The pass makes a best effort attempt to look through
17-
// calls, i.e. a constant evaluatable passthrough of a predicate value will
18-
// generally work, however we hard fail if the folding fails, to avoid obtuse
19-
// BE errors or opaque run time errors. This pass should run as early as
20-
// possible / immediately after Clang CodeGen, so that the optimisation pipeline
21-
// and the BE operate with concrete target data.
16+
// live use scenario. We hard fail if the folding fails, to avoid obtuse BE
17+
// errors or opaque run time errors. This pass should run as early as possible /
18+
// immediately after Clang CodeGen, so that the optimisation pipeline and the BE
19+
// operate with concrete target data.
2220
//===----------------------------------------------------------------------===//
2321

2422
#include "AMDGPU.h"
@@ -50,13 +48,13 @@ template <typename C> void collectUsers(Value *V, C &Container) {
5048
}
5149

5250
inline void setPredicate(const GCNSubtarget &ST, GlobalVariable *P) {
53-
const auto IsFeature = P->getName().starts_with("llvm.amdgcn.has");
54-
const auto Offset =
51+
const bool IsFeature = P->getName().starts_with("llvm.amdgcn.has");
52+
const size_t Offset =
5553
IsFeature ? sizeof("llvm.amdgcn.has") : sizeof("llvm.amdgcn.is");
5654

57-
auto PV = P->getName().substr(Offset).str();
55+
std::string PV = P->getName().substr(Offset).str();
5856
if (IsFeature) {
59-
auto Dx = PV.find(',');
57+
size_t Dx = PV.find(',');
6058
while (Dx != std::string::npos) {
6159
PV.insert(++Dx, {'+'});
6260

@@ -65,7 +63,7 @@ inline void setPredicate(const GCNSubtarget &ST, GlobalVariable *P) {
6563
PV.insert(PV.cbegin(), '+');
6664
}
6765

68-
auto *PTy = P->getValueType();
66+
Type *PTy = P->getValueType();
6967
P->setLinkage(GlobalValue::PrivateLinkage);
7068
P->setExternallyInitialized(false);
7169

@@ -103,15 +101,15 @@ std::pair<PreservedAnalyses, bool> handlePredicate(const GCNSubtarget &ST,
103101
auto *I = *ToFold.begin();
104102
ToFold.erase(I);
105103

104+
I->dropDroppableUses();
105+
106106
if (auto *C = ConstantFoldInstruction(I, P->getDataLayout())) {
107107
collectUsers(I, ToFold);
108108
I->replaceAllUsesWith(C);
109109
I->eraseFromParent();
110110
continue;
111111
} else if (I->isTerminator() && ConstantFoldTerminator(I->getParent())) {
112112
continue;
113-
} else if (I->users().empty()) {
114-
continue;
115113
}
116114

117115
return unfoldableFound(I->getParent()->getParent(), P, I);

llvm/test/CodeGen/AMDGPU/amdgpu-expand-feature-predicates.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %p.coerce, i32 %x) {
121121
; GFX1010-NEXT: br label %[[IF_END6]]
122122
; GFX1010: [[IF_END6]]:
123123
; GFX1010-NEXT: call void @llvm.assume(i1 true)
124-
; GFX1010-NEXT: call void @llvm.assume(i1 false)
124+
; GFX1010-NEXT: call void @llvm.assume(i1 true)
125125
; GFX1010-NEXT: br label %[[FOR_COND]]
126126
; GFX1010: [[FOR_COND]]:
127127
; GFX1010-NEXT: [[DOTPROMOTED:%.*]] = load i32, ptr [[TMP1]], align 4
@@ -167,7 +167,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %p.coerce, i32 %x) {
167167
; GFX1101-NEXT: call void @llvm.amdgcn.s.ttracedata.imm(i16 1)
168168
; GFX1101-NEXT: br label %[[IF_END6]]
169169
; GFX1101: [[IF_END6]]:
170-
; GFX1101-NEXT: call void @llvm.assume(i1 false)
170+
; GFX1101-NEXT: call void @llvm.assume(i1 true)
171171
; GFX1101-NEXT: call void @llvm.assume(i1 true)
172172
; GFX1101-NEXT: br label %[[FOR_COND:.*]]
173173
; GFX1101: [[FOR_COND]]:
@@ -278,7 +278,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %p.coerce, i32 %x) {
278278
; GFX1201-W64-NEXT: call void @llvm.amdgcn.s.ttracedata.imm(i16 1)
279279
; GFX1201-W64-NEXT: br label %[[IF_END11]]
280280
; GFX1201-W64: [[IF_END11]]:
281-
; GFX1201-W64-NEXT: call void @llvm.assume(i1 false)
281+
; GFX1201-W64-NEXT: call void @llvm.assume(i1 true)
282282
; GFX1201-W64-NEXT: [[DOTPROMOTED9:%.*]] = load i32, ptr [[TMP1]], align 4
283283
; GFX1201-W64-NEXT: [[SUB13_PEEL:%.*]] = sub nsw i32 [[DOTPROMOTED9]], [[X]]
284284
; GFX1201-W64-NEXT: store i32 [[SUB13_PEEL]], ptr [[TMP1]], align 4

0 commit comments

Comments
 (0)