Skip to content

Commit b1cf8a2

Browse files
[𝘀𝗽𝗿] initial version
Created using spr 1.3.7
2 parents 0b939de + 54ac616 commit b1cf8a2

File tree

7 files changed

+117
-46
lines changed

7 files changed

+117
-46
lines changed

llvm/lib/Target/X86/X86.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,16 @@ FunctionPass *createX86InsertX87waitPass();
158158
/// This pass optimizes arithmetic based on knowledge that is only used by
159159
/// a reduction sequence and is therefore safe to reassociate in interesting
160160
/// ways.
161-
FunctionPass *createX86PartialReductionPass();
161+
class X86PartialReductionPass : public PassInfoMixin<X86PartialReductionPass> {
162+
private:
163+
const TargetMachine *TM;
164+
165+
public:
166+
X86PartialReductionPass(const TargetMachine *TM) : TM(TM) {}
167+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
168+
};
169+
170+
FunctionPass *createX86PartialReductionLegacyPass();
162171

163172
/// // Analyzes and emits pseudos to support Win x64 Unwind V2.
164173
FunctionPass *createX86WinEHUnwindV2Pass();
@@ -179,7 +188,18 @@ FunctionPass *createX86LowerAMXTypeLegacyPass();
179188

180189
/// The pass transforms amx intrinsics to scalar operation if the function has
181190
/// optnone attribute or it is O0.
182-
FunctionPass *createX86LowerAMXIntrinsicsPass();
191+
class X86LowerAMXIntrinsicsPass
192+
: public PassInfoMixin<X86LowerAMXIntrinsicsPass> {
193+
private:
194+
const TargetMachine *TM;
195+
196+
public:
197+
X86LowerAMXIntrinsicsPass(const TargetMachine *TM) : TM(TM) {}
198+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
199+
static bool isRequired() { return true; }
200+
};
201+
202+
FunctionPass *createX86LowerAMXIntrinsicsLegacyPass();
183203

184204
InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM,
185205
const X86Subtarget &,
@@ -220,7 +240,7 @@ void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &);
220240
void initializeX86LowerAMXTypeLegacyPassPass(PassRegistry &);
221241
void initializeX86LowerTileCopyPass(PassRegistry &);
222242
void initializeX86OptimizeLEAPassPass(PassRegistry &);
223-
void initializeX86PartialReductionPass(PassRegistry &);
243+
void initializeX86PartialReductionLegacyPass(PassRegistry &);
224244
void initializeX86PreTileConfigPass(PassRegistry &);
225245
void initializeX86ReturnThunksPass(PassRegistry &);
226246
void initializeX86SpeculativeExecutionSideEffectSuppressionPass(PassRegistry &);

llvm/lib/Target/X86/X86LowerAMXIntrinsics.cpp

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,15 @@
2323
#include "llvm/CodeGen/Passes.h"
2424
#include "llvm/CodeGen/TargetPassConfig.h"
2525
#include "llvm/CodeGen/ValueTypes.h"
26+
#include "llvm/IR/Analysis.h"
2627
#include "llvm/IR/DataLayout.h"
28+
#include "llvm/IR/Dominators.h"
2729
#include "llvm/IR/Function.h"
2830
#include "llvm/IR/IRBuilder.h"
2931
#include "llvm/IR/Instructions.h"
3032
#include "llvm/IR/IntrinsicInst.h"
3133
#include "llvm/IR/IntrinsicsX86.h"
34+
#include "llvm/IR/PassManager.h"
3235
#include "llvm/IR/PatternMatch.h"
3336
#include "llvm/InitializePasses.h"
3437
#include "llvm/Pass.h"
@@ -40,7 +43,7 @@
4043
using namespace llvm;
4144
using namespace PatternMatch;
4245

43-
#define DEBUG_TYPE "lower-amx-intrinsics"
46+
#define DEBUG_TYPE "x86-lower-amx-intrinsics"
4447

4548
#ifndef NDEBUG
4649
static bool isV256I32Ty(Type *Ty) {
@@ -626,6 +629,37 @@ bool X86LowerAMXIntrinsics::visit() {
626629
return C;
627630
}
628631

632+
namespace {
633+
bool shouldRunLowerAMXIntrinsics(const Function &F, const TargetMachine *TM) {
634+
return X86ScalarizeAMX && (F.hasFnAttribute(Attribute::OptimizeNone) ||
635+
TM->getOptLevel() == CodeGenOptLevel::None);
636+
}
637+
638+
bool runLowerAMXIntrinsics(Function &F, DominatorTree *DT, LoopInfo *LI) {
639+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
640+
641+
X86LowerAMXIntrinsics LAT(F, DTU, LI);
642+
return LAT.visit();
643+
}
644+
} // namespace
645+
646+
PreservedAnalyses X86LowerAMXIntrinsicsPass::run(Function &F,
647+
FunctionAnalysisManager &FAM) {
648+
if (!shouldRunLowerAMXIntrinsics(F, TM))
649+
return PreservedAnalyses::all();
650+
651+
DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
652+
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
653+
bool Changed = runLowerAMXIntrinsics(F, &DT, &LI);
654+
if (!Changed)
655+
return PreservedAnalyses::all();
656+
657+
PreservedAnalyses PA = PreservedAnalyses::none();
658+
PA.preserve<DominatorTreeAnalysis>();
659+
PA.preserve<LoopAnalysis>();
660+
return PA;
661+
}
662+
629663
namespace {
630664
class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
631665
public:
@@ -634,21 +668,15 @@ class X86LowerAMXIntrinsicsLegacyPass : public FunctionPass {
634668
X86LowerAMXIntrinsicsLegacyPass() : FunctionPass(ID) {}
635669

636670
bool runOnFunction(Function &F) override {
637-
if (!X86ScalarizeAMX)
638-
return false;
639671
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
640-
if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
641-
TM->getOptLevel() != CodeGenOptLevel::None)
672+
if (!shouldRunLowerAMXIntrinsics(F, TM))
642673
return false;
643674

644675
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
645676
auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
646677
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
647678
auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr;
648-
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
649-
650-
X86LowerAMXIntrinsics LAT(F, DTU, LI);
651-
return LAT.visit();
679+
return runLowerAMXIntrinsics(F, DT, LI);
652680
}
653681
StringRef getPassName() const override { return "Lower AMX intrinsics"; }
654682

@@ -668,6 +696,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
668696
INITIALIZE_PASS_END(X86LowerAMXIntrinsicsLegacyPass, DEBUG_TYPE, PassName,
669697
false, false)
670698

671-
FunctionPass *llvm::createX86LowerAMXIntrinsicsPass() {
699+
FunctionPass *llvm::createX86LowerAMXIntrinsicsLegacyPass() {
672700
return new X86LowerAMXIntrinsicsLegacyPass();
673701
}

llvm/lib/Target/X86/X86PartialReduction.cpp

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
#include "X86TargetMachine.h"
1717
#include "llvm/Analysis/ValueTracking.h"
1818
#include "llvm/CodeGen/TargetPassConfig.h"
19+
#include "llvm/IR/Analysis.h"
1920
#include "llvm/IR/Constants.h"
2021
#include "llvm/IR/IRBuilder.h"
2122
#include "llvm/IR/Instructions.h"
2223
#include "llvm/IR/IntrinsicsX86.h"
24+
#include "llvm/IR/PassManager.h"
2325
#include "llvm/IR/PatternMatch.h"
2426
#include "llvm/Pass.h"
2527
#include "llvm/Support/KnownBits.h"
@@ -30,39 +32,44 @@ using namespace llvm;
3032

3133
namespace {
3234

33-
class X86PartialReduction : public FunctionPass {
35+
class X86PartialReduction {
36+
const X86TargetMachine *TM;
3437
const DataLayout *DL = nullptr;
3538
const X86Subtarget *ST = nullptr;
3639

40+
public:
41+
X86PartialReduction(const X86TargetMachine *TM) : TM(TM) {}
42+
bool run(Function &F);
43+
44+
private:
45+
bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
46+
bool trySADReplacement(Instruction *Op);
47+
};
48+
49+
class X86PartialReductionLegacy : public FunctionPass {
3750
public:
3851
static char ID; // Pass identification, replacement for typeid.
3952

40-
X86PartialReduction() : FunctionPass(ID) { }
53+
X86PartialReductionLegacy() : FunctionPass(ID) {}
4154

42-
bool runOnFunction(Function &Fn) override;
55+
bool runOnFunction(Function &F) override;
4356

4457
void getAnalysisUsage(AnalysisUsage &AU) const override {
4558
AU.setPreservesCFG();
4659
}
4760

48-
StringRef getPassName() const override {
49-
return "X86 Partial Reduction";
50-
}
51-
52-
private:
53-
bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
54-
bool trySADReplacement(Instruction *Op);
61+
StringRef getPassName() const override { return "X86 Partial Reduction"; }
5562
};
5663
}
5764

58-
FunctionPass *llvm::createX86PartialReductionPass() {
59-
return new X86PartialReduction();
65+
FunctionPass *llvm::createX86PartialReductionLegacyPass() {
66+
return new X86PartialReductionLegacy();
6067
}
6168

62-
char X86PartialReduction::ID = 0;
69+
char X86PartialReductionLegacy::ID = 0;
6370

64-
INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,
65-
"X86 Partial Reduction", false, false)
71+
INITIALIZE_PASS(X86PartialReductionLegacy, DEBUG_TYPE, "X86 Partial Reduction",
72+
false, false)
6673

6774
// This function should be aligned with detectExtMul() in X86ISelLowering.cpp.
6875
static bool matchVPDPBUSDPattern(const X86Subtarget *ST, BinaryOperator *Mul,
@@ -494,17 +501,8 @@ static void collectLeaves(Value *Root, SmallVectorImpl<Instruction *> &Leaves) {
494501
}
495502
}
496503

497-
bool X86PartialReduction::runOnFunction(Function &F) {
498-
if (skipFunction(F))
499-
return false;
500-
501-
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
502-
if (!TPC)
503-
return false;
504-
505-
auto &TM = TPC->getTM<X86TargetMachine>();
506-
ST = TM.getSubtargetImpl(F);
507-
504+
bool X86PartialReduction::run(Function &F) {
505+
ST = TM->getSubtargetImpl(F);
508506
DL = &F.getDataLayout();
509507

510508
bool MadeChange = false;
@@ -540,3 +538,26 @@ bool X86PartialReduction::runOnFunction(Function &F) {
540538

541539
return MadeChange;
542540
}
541+
542+
bool X86PartialReductionLegacy::runOnFunction(Function &F) {
543+
if (skipFunction(F))
544+
return false;
545+
546+
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
547+
if (!TPC)
548+
return false;
549+
550+
return X86PartialReduction(&TPC->getTM<X86TargetMachine>()).run(F);
551+
}
552+
553+
PreservedAnalyses X86PartialReductionPass::run(Function &F,
554+
FunctionAnalysisManager &FAM) {
555+
bool Changed =
556+
X86PartialReduction(static_cast<const X86TargetMachine *>(TM)).run(F);
557+
if (!Changed)
558+
return PreservedAnalyses::all();
559+
560+
PreservedAnalyses PA = PreservedAnalyses::none();
561+
PA.preserveSet<CFGAnalyses>();
562+
return PA;
563+
}

llvm/lib/Target/X86/X86PassRegistry.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,14 @@
1515
#ifndef FUNCTION_PASS
1616
#define FUNCTION_PASS(NAME, CREATE_PASS)
1717
#endif
18+
FUNCTION_PASS("x86-lower-amx-intrinsics", X86LowerAMXIntrinsicsPass(this))
1819
FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
20+
FUNCTION_PASS("x86-partial-reduction", X86PartialReductionPass(this))
1921
#undef FUNCTION_PASS
2022

2123
#ifndef DUMMY_FUNCTION_PASS
2224
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
2325
#endif
24-
DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
25-
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
2626
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
2727
#undef DUMMY_FUNCTION_PASS
2828

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ extern "C" LLVM_C_ABI void LLVMInitializeX86Target() {
9797
initializeX86LoadValueInjectionLoadHardeningPassPass(PR);
9898
initializeX86LoadValueInjectionRetHardeningPassPass(PR);
9999
initializeX86OptimizeLEAPassPass(PR);
100-
initializeX86PartialReductionPass(PR);
100+
initializeX86PartialReductionLegacyPass(PR);
101101
initializePseudoProbeInserterPass(PR);
102102
initializeX86ReturnThunksPass(PR);
103103
initializeX86DAGToDAGISelLegacyPass(PR);
@@ -422,14 +422,14 @@ void X86PassConfig::addIRPasses() {
422422

423423
// We add both pass anyway and when these two passes run, we skip the pass
424424
// based on the option level and option attribute.
425-
addPass(createX86LowerAMXIntrinsicsPass());
425+
addPass(createX86LowerAMXIntrinsicsLegacyPass());
426426
addPass(createX86LowerAMXTypeLegacyPass());
427427

428428
TargetPassConfig::addIRPasses();
429429

430430
if (TM->getOptLevel() != CodeGenOptLevel::None) {
431431
addPass(createInterleavedAccessPass());
432-
addPass(createX86PartialReductionPass());
432+
addPass(createX86PartialReductionLegacyPass());
433433
}
434434

435435
// Add passes that handle indirect branch removal and insertion of a retpoline

llvm/test/CodeGen/X86/AMX/amx-low-intrinsics-no-amx-bitcast.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
2+
; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
3+
; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
34

45
define dso_local void @test_no_bitcast(ptr %A_mem, ptr %B_mem, ptr %C_mem) local_unnamed_addr #0 {
56
; CHECK-LABEL: @test_no_bitcast(

llvm/test/CodeGen/X86/AMX/amx-low-intrinsics.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -mtriple=x86_64 -lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
2+
; RUN: opt -mtriple=x86_64 -x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
3+
; RUN: opt -mtriple=x86_64 -passes=x86-lower-amx-intrinsics -enable-x86-scalar-amx=true %s -S | FileCheck %s
34

45
define dso_local void @test_amx_load_non_O0(i16 signext %row, i16 signext %col, ptr%ptr, i64 %stride, ptr %vptr) {
56
; CHECK-LABEL: @test_amx_load_non_O0(

0 commit comments

Comments
 (0)