Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,10 @@ class AMDGPURewriteAGPRCopyMFMAPass
void initializeAMDGPURewriteAGPRCopyMFMALegacyPass(PassRegistry &);
extern char &AMDGPURewriteAGPRCopyMFMALegacyID;

void initializeAMDGPUUniformIntrinsicCombineLegacyPass(PassRegistry &);
extern char &AMDGPUUniformIntrinsicCombineLegacyPassID;
ModulePass *createAMDGPUUniformIntrinsicCombineLegacyPass();

struct AMDGPUUniformIntrinsicCombinePass
: public PassInfoMixin<AMDGPUUniformIntrinsicCombinePass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
initializeAMDGPUPreloadKernelArgumentsLegacyPass(*PR);
initializeAMDGPUUniformIntrinsicCombineLegacyPass(*PR);
}

static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
Expand Down Expand Up @@ -1310,6 +1311,9 @@ void AMDGPUPassConfig::addIRPasses() {
isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));

if (EnableUniformIntrinsicCombine)
addPass(createAMDGPUUniformIntrinsicCombineLegacyPass());

// This can be disabled by passing ::Disable here or on the command line
// with --expand-variadics-override=disable.
addPass(createExpandVariadicsPass(ExpandVariadicsMode::Lowering));
Expand Down
67 changes: 67 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUUniformIntrinsicCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,62 @@ static bool runUniformIntrinsicCombine(Module &M, ModuleAnalysisManager &AM) {
return IsChanged;
}

// Legacy PM version
static bool runUniformIntrinsicCombine(Module &M, ModulePass &P) {
bool IsChanged = false;
ValueMap<const Value *, bool> Tracker;

for (Function &F : M) {
switch (F.getIntrinsicID()) {
case Intrinsic::amdgcn_permlane64:
case Intrinsic::amdgcn_readfirstlane:
case Intrinsic::amdgcn_readlane:
case Intrinsic::amdgcn_ballot:
break;
default:
continue;
}

for (User *U : make_early_inc_range(F.users())) {
auto *II = cast<IntrinsicInst>(U);
Function *ParentF = II->getFunction();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still running on optnone functions, another issue with doing this as a module pass

auto &UI = P.getAnalysis<UniformityInfoWrapperPass>(*ParentF)
.getUniformityInfo();
IsChanged |= optimizeUniformIntrinsic(*II, UI, Tracker);
}
}
return IsChanged;
}

namespace {
class AMDGPUUniformIntrinsicCombineLegacy : public ModulePass {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This belongs with the base support PR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its ok to have separately. Initially, it was part of the base PR. after reviews I dropped it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this should have gone there, and the IR test needs both new and old PM run lines

public:
static char ID;
AMDGPUUniformIntrinsicCombineLegacy() : ModulePass(ID) {
initializeAMDGPUUniformIntrinsicCombineLegacyPass(
*PassRegistry::getPassRegistry());
}

private:
bool runOnModule(Module &M) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
}
};
} // namespace

char AMDGPUUniformIntrinsicCombineLegacy::ID = 0;
char &llvm::AMDGPUUniformIntrinsicCombineLegacyPassID =
AMDGPUUniformIntrinsicCombineLegacy::ID;

bool AMDGPUUniformIntrinsicCombineLegacy::runOnModule(Module &M) {
if (skipModule(M))
return false;
return runUniformIntrinsicCombine(M, *this);
}

PreservedAnalyses
AMDGPUUniformIntrinsicCombinePass::run(Module &M, ModuleAnalysisManager &AM) {
if (!runUniformIntrinsicCombine(M, AM))
Expand All @@ -157,3 +213,14 @@ AMDGPUUniformIntrinsicCombinePass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<UniformityInfoAnalysis>();
return PA;
}

INITIALIZE_PASS_BEGIN(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
"AMDGPU Uniform Intrinsic Combine", false, false)
INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(AMDGPUUniformIntrinsicCombineLegacy, DEBUG_TYPE,
"AMDGPU Uniform Intrinsic Combine", false, false)

ModulePass *llvm::createAMDGPUUniformIntrinsicCombineLegacyPass() {
return new AMDGPUUniformIntrinsicCombineLegacy();
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
}
}

Loading
Loading