-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[X86][NewPM] Port lower-amx-intrinsics to NewPM #165113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][NewPM] Port lower-amx-intrinsics to NewPM #165113
Conversation
Created using spr 1.3.7 [skip ci]
Created using spr 1.3.7
|
@llvm/pr-subscribers-backend-x86 Author: Aiden Grossman (boomanaiden154) ChangesFull diff: https://github.com/llvm/llvm-project/pull/165113.diff 6 Files Affected:
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 51b540a7a51d0..bdb43cfb4adb4 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -179,7 +179,18 @@ FunctionPass *createX86LowerAMXTypeLegacyPass();
/// The pass transforms amx intrinsics to scalar operation if the function has
/// optnone attribute or it is O0.
-FunctionPass *createX86LowerAMXIntrinsicsPass();
+class X86LowerAMXIntrinsicsPass
+ : public PassInfoMixin<X86LowerAMXIntrinsicsPass> {
+private:
+ const TargetMachine *TM;
+
+public:
+ X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+ static bool isRequired() { return true; }
+};
+
+FunctionPass *createX86LowerAMXIntrinsicsLegacyPass();
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
const X86Subtarget &,
diff --git a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
index 7f3393910da2c..38165736fde9a 100644
--- a/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp
@@ -23,12 +23,15 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Analysis.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsX86.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -40,7 +43,7 @@
using namespace llvm;
using namespace PatternMatch;
-#define DEBUG_TYPE "lower-amx-intrinsics"
+#define DEBUG_TYPE "x86-lower-amx-intrinsics"
#ifndef NDEBUG
static bool isV256I32Ty(Type *Ty) {
@@ -626,6 +629,41 @@ bool X86LowerAMXIntrinsics::visit() {
return C;
}
+namespace {
+bool shouldRunLowerAMXIntrinsics(Function &F, const TargetMachine *TM) {
+ if (!X86ScalarizeAMX)
+ return false;
+ if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
+ TM->getOptLevel() != CodeGenOptLevel::None)
+ return false;
+ return true;
+}
+
+bool runLowerAMXIntrinsics(Function &F, DominatorTree *DT, LoopInfo *LI) {
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ X86LowerAMXIntrinsics LAT(F, DTU, LI);
+ return LAT.visit();
+}
+} // namespace
+
+PreservedAnalyses X86LowerAMXIntrinsicsPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ if (!shouldRunLowerAMXIntrinsics(F, TM))
+ return PreservedAnalyses::all();
+
+ DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
+ bool Changed = runLowerAMXIntrinsics(F, &DT, &LI);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
+ return PA;
+}
+
namespace {
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
public:
@@ -634,21 +672,15 @@ class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
X86LowerAMXIntrinsicsLegacyPass() : FunctionPass(ID) {}
bool runOnFunction(Function &F) override {
- if (!X86ScalarizeAMX)
- return false;
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
- TM->getOptLevel() != CodeGenOptLevel::None)
+ if (!shouldRunLowerAMXIntrinsics(F, TM))
return false;
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- X86LowerAMXIntrinsics LAT(F, DTU, LI);
- return LAT.visit();
+ return runLowerAMXIntrinsics(F, DT, LI);
}
StringRef getPassName() const override { return "Lower AMX intrinsics"; }
@@ -668,6 +700,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
false, false)
-FunctionPass *llvm::createX86LowerAMXIntrinsicsPass() {
+FunctionPass *llvm::createX86LowerAMXIntrinsicsLegacyPass() {
return new X86LowerAMXIntrinsicsLegacyPass();
}
diff --git a/llvm/lib/Target/X86/X86PassRegistry.def b/llvm/lib/Target/X86/X86PassRegistry.def
index fc25d55d3059a..81c98febc4ba8 100644
--- a/llvm/lib/Target/X86/X86PassRegistry.def
+++ b/llvm/lib/Target/X86/X86PassRegistry.def
@@ -15,13 +15,13 @@
#ifndef FUNCTION_PASS
#define FUNCTION_PASS(NAME, CREATE_PASS)
#endif
+FUNCTION_PASS("x86-lower-amx-intrinsics", X86LowerAMXIntrinsicsPass(this))
FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
#undef FUNCTION_PASS
#ifndef DUMMY_FUNCTION_PASS
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
#endif
-DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
#undef DUMMY_FUNCTION_PASS
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 9a76abcd351bf..bf4dab0371b88 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -422,7 +422,7 @@ void X86PassConfig::addIRPasses() {
// We add both pass anyway and when these two passes run, we skip the pass
// based on the option level and option attribute.
- addPass(createX86LowerAMXIntrinsicsPass());
+ addPass(createX86LowerAMXIntrinsicsLegacyPass());
addPass(createX86LowerAMXTypeLegacyPass());
TargetPassConfig::addIRPasses();
diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll
index 87059c5d474e6..6ae7b2260c15c 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
+; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
+; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
define dso_local void @test_no_bitcast(ptr %A_mem, ptr %B_mem, ptr %C_mem) local_unnamed_addr #0 {
; CHECK-LABEL: @test_no_bitcast(
diff --git a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll
index 5fb2dcdc1d621..ca7c3573a3294 100644
--- a/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
+; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
+; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
define dso_local void @test_amx_load_non_O0(i16 signext %row, i16 signext %col, ptr%ptr, i64 %stride, ptr %vptr) {
; CHECK-LABEL: @test_amx_load_non_O0(
|
Created using spr 1.3.7 [skip ci]
|
@arsenm Bump on this review when you get a chance. |
| public: | ||
| X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {} | ||
| PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); | ||
| static bool isRequired() { return true; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't really understand the circumstances when this pass is supposed to run. It took all possible strategies for controlling a pass and used all of them. This "only run for different flavors of -O0, except not by default" seems very unsound
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is for -O0 or none O0 but has optnone attributes. It solves the redundant stack load/stores which eliminated in other cases.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure we should be using a backend pass enabled at O0 to remove redundant stack load/stores if those are supposed to be cleaned up in the middle end.
Either way, this patch just intends to port the pass, not fix any latent issues or clean up any latent tech debt of this sort.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds like a solution to a problem that shouldn't be solved?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes. The comment at the top of the file:
/// To decouple the dependency of the shape, we transform amx intrinsics
/// to scalar operation, so that compiling doesn't fail. In long term, we
/// should improve fast register allocation to allocate amx register.
makes it seem like this might require some effort though.
Created using spr 1.3.7 [skip ci]
Created using spr 1.3.7
|
@arsenm Is this good to go or do you want more thought put into when this is supposed to run (which looks like it might require a lot of work on AMX lowering that I would prefer not to do)? |
| public: | ||
| X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {} | ||
| PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); | ||
| static bool isRequired() { return true; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds like a solution to a problem that shouldn't be solved?
Reviewers: paperchalice, phoebewang, arsenm Reviewed By: arsenm Pull Request: llvm/llvm-project#165113
No description provided.