Skip to content

Commit 7a8c0e0

Browse files
added OPM support and added pass into llc pipeline
1 parent b6aba3e commit 7a8c0e0

File tree

4 files changed

+100
-0
lines changed

4 files changed

+100
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,10 @@ class AMDGPURewriteAGPRCopyMFMAPass
562562
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &);
563563
extern char &AMDGPURewriteAGPRCopyMFMALegacyID;
564564

565+
void initializeAMDGPUUniformIntrinsicCombineLegacyPass(PassRegistry &);
566+
extern char &AMDGPUUniformIntrinsicCombineLegacyPassID;
567+
ModulePass *createAMDGPUUniformIntrinsicCombineLegacyPass();
568+
565569
struct AMDGPUUniformIntrinsicCombinePass
566570
: public PassInfoMixin<AMDGPUUniformIntrinsicCombinePass> {
567571
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
618618
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
619619
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
620620
initializeAMDGPUPreloadKernelArgumentsLegacyPass(*PR);
621+
initializeAMDGPUUniformIntrinsicCombineLegacyPass(*PR);
621622
}
622623

623624
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -1310,6 +1311,9 @@ void AMDGPUPassConfig::addIRPasses() {
13101311
isPassEnabled(EnableImageIntrinsicOptimizer))
13111312
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
13121313

1314+
if (EnableUniformIntrinsicCombine)
1315+
addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());
1316+
13131317
// This can be disabled by passing ::Disable here or on the command line
13141318
// with --expand-variadics-override=disable.
13151319
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));

llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,62 @@ static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
148148
return IsChanged;
149149
}
150150

151+
// Legacy PM version
152+
static bool runUniformIntrinsicCombine(Module &M, ModulePass &P) {
153+
bool IsChanged = false;
154+
ValueMap<const Value *, bool> Tracker;
155+
156+
for (Function &F : M) {
157+
switch (F.getIntrinsicID()) {
158+
case Intrinsic::amdgcn_permlane64:
159+
case Intrinsic::amdgcn_readfirstlane:
160+
case Intrinsic::amdgcn_readlane:
161+
case Intrinsic::amdgcn_ballot:
162+
break;
163+
default:
164+
continue;
165+
}
166+
167+
for (User *U : make_early_inc_range(F.users())) {
168+
auto *II = cast<IntrinsicInst>(U);
169+
Function *ParentF = II->getFunction();
170+
auto &UI = P.getAnalysis<UniformityInfoWrapperPass>(*ParentF)
171+
.getUniformityInfo();
172+
IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
173+
}
174+
}
175+
return IsChanged;
176+
}
177+
178+
namespace {
179+
class AMDGPUUniformIntrinsicCombineLegacy : public ModulePass {
180+
public:
181+
static char ID;
182+
AMDGPUUniformIntrinsicCombineLegacy() : ModulePass(ID) {
183+
initializeAMDGPUUniformIntrinsicCombineLegacyPass(
184+
*PassRegistry::getPassRegistry());
185+
}
186+
187+
private:
188+
bool runOnModule(Module &M) override;
189+
void getAnalysisUsage(AnalysisUsage &AU) const override {
190+
AU.setPreservesCFG();
191+
AU.addRequired<UniformityInfoWrapperPass>();
192+
AU.addRequired<TargetPassConfig>();
193+
}
194+
};
195+
} // namespace
196+
197+
char AMDGPUUniformIntrinsicCombineLegacy::ID = 0;
198+
char &llvm::AMDGPUUniformIntrinsicCombineLegacyPassID =
199+
AMDGPUUniformIntrinsicCombineLegacy::ID;
200+
201+
bool AMDGPUUniformIntrinsicCombineLegacy::runOnModule(Module &M) {
202+
if (skipModule(M))
203+
return false;
204+
return runUniformIntrinsicCombine(M, *this);
205+
}
206+
151207
PreservedAnalyses
152208
AMDGPUUniformIntrinsicCombinePass::run(Module &M, ModuleAnalysisManager &AM) {
153209
if (!runUniformIntrinsicCombine(M, AM))
@@ -157,3 +213,14 @@ AMDGPUUniformIntrinsicCombinePass::run(Module &M, ModuleAnalysisManager &AM) {
157213
PA.preserve<UniformityInfoAnalysis>();
158214
return PA;
159215
}
216+
217+
INITIALIZE_PASS_BEGIN(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
218+
"AMDGPU Uniform Intrinsic Combine", false, false)
219+
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
220+
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
221+
INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
222+
"AMDGPU Uniform Intrinsic Combine", false, false)
223+
224+
ModulePass *llvm::createAMDGPUUniformIntrinsicCombineLegacyPass() {
225+
return new AMDGPUUniformIntrinsicCombineLegacy();
226+
}

llvm/test/CodeGen/AMDGPU/llc-pipeline.ll

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
; GCN-O0-NEXT: AMDGPU Remove Incompatible Functions
3232
; GCN-O0-NEXT: AMDGPU Printf lowering
3333
; GCN-O0-NEXT: Lower ctors and dtors for AMDGPU
34+
; GCN-O0-NEXT: AMDGPU Uniform Intrinsic Combine
35+
; GCN-O0-NEXT: FunctionPass Manager
36+
; GCN-O0-NEXT: Dominator Tree Construction
37+
; GCN-O0-NEXT: Cycle Info Analysis
38+
; GCN-O0-NEXT: Uniformity Analysis
3439
; GCN-O0-NEXT: Expand variadic functions
3540
; GCN-O0-NEXT: AMDGPU Inline All Functions
3641
; GCN-O0-NEXT: Inliner for always_inline functions
@@ -179,6 +184,11 @@
179184
; GCN-O1-NEXT: AMDGPU Remove Incompatible Functions
180185
; GCN-O1-NEXT: AMDGPU Printf lowering
181186
; GCN-O1-NEXT: Lower ctors and dtors for AMDGPU
187+
; GCN-O1-NEXT: AMDGPU Uniform Intrinsic Combine
188+
; GCN-O1-NEXT: FunctionPass Manager
189+
; GCN-O1-NEXT: Dominator Tree Construction
190+
; GCN-O1-NEXT: Cycle Info Analysis
191+
; GCN-O1-NEXT: Uniformity Analysis
182192
; GCN-O1-NEXT: Expand variadic functions
183193
; GCN-O1-NEXT: AMDGPU Inline All Functions
184194
; GCN-O1-NEXT: Inliner for always_inline functions
@@ -466,6 +476,11 @@
466476
; GCN-O1-OPTS-NEXT: AMDGPU Remove Incompatible Functions
467477
; GCN-O1-OPTS-NEXT: AMDGPU Printf lowering
468478
; GCN-O1-OPTS-NEXT: Lower ctors and dtors for AMDGPU
479+
; GCN-O1-OPTS-NEXT: AMDGPU Uniform Intrinsic Combine
480+
; GCN-O1-OPTS-NEXT: FunctionPass Manager
481+
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
482+
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
483+
; GCN-O1-OPTS-NEXT: Uniformity Analysis
469484
; GCN-O1-OPTS-NEXT: Expand variadic functions
470485
; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions
471486
; GCN-O1-OPTS-NEXT: Inliner for always_inline functions
@@ -783,6 +798,11 @@
783798
; GCN-O2-NEXT: Lower ctors and dtors for AMDGPU
784799
; GCN-O2-NEXT: FunctionPass Manager
785800
; GCN-O2-NEXT: AMDGPU Image Intrinsic Optimizer
801+
; GCN-O2-NEXT: AMDGPU Uniform Intrinsic Combine
802+
; GCN-O2-NEXT: FunctionPass Manager
803+
; GCN-O2-NEXT: Dominator Tree Construction
804+
; GCN-O2-NEXT: Cycle Info Analysis
805+
; GCN-O2-NEXT: Uniformity Analysis
786806
; GCN-O2-NEXT: Expand variadic functions
787807
; GCN-O2-NEXT: AMDGPU Inline All Functions
788808
; GCN-O2-NEXT: Inliner for always_inline functions
@@ -1104,6 +1124,11 @@
11041124
; GCN-O3-NEXT: Lower ctors and dtors for AMDGPU
11051125
; GCN-O3-NEXT: FunctionPass Manager
11061126
; GCN-O3-NEXT: AMDGPU Image Intrinsic Optimizer
1127+
; GCN-O3-NEXT: AMDGPU Uniform Intrinsic Combine
1128+
; GCN-O3-NEXT: FunctionPass Manager
1129+
; GCN-O3-NEXT: Dominator Tree Construction
1130+
; GCN-O3-NEXT: Cycle Info Analysis
1131+
; GCN-O3-NEXT: Uniformity Analysis
11071132
; GCN-O3-NEXT: Expand variadic functions
11081133
; GCN-O3-NEXT: AMDGPU Inline All Functions
11091134
; GCN-O3-NEXT: Inliner for always_inline functions

0 commit comments

Comments
 (0)