4646#include " llvm/CodeGen/Passes.h"
4747#include " llvm/CodeGen/TargetPassConfig.h"
4848#include " llvm/CodeGen/ValueTypes.h"
49+ #include " llvm/IR/Analysis.h"
4950#include " llvm/IR/DataLayout.h"
5051#include " llvm/IR/Function.h"
5152#include " llvm/IR/IRBuilder.h"
5253#include " llvm/IR/Instructions.h"
5354#include " llvm/IR/IntrinsicInst.h"
5455#include " llvm/IR/IntrinsicsX86.h"
56+ #include " llvm/IR/PassManager.h"
5557#include " llvm/IR/PatternMatch.h"
5658#include " llvm/InitializePasses.h"
5759#include " llvm/Pass.h"
6466using namespace llvm ;
6567using namespace PatternMatch ;
6668
67- #define DEBUG_TYPE " lower-amx-type"
69+ #define DEBUG_TYPE " x86- lower-amx-type"
6870
6971static bool isAMXCast (Instruction *II) {
7072 return match (II,
@@ -137,15 +139,15 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
137139
138140class ShapeCalculator {
139141private:
140- TargetMachine *TM = nullptr ;
142+ const TargetMachine *TM = nullptr ;
141143
142144 // In AMX intrinsics we let Shape = {Row, Col}, but the
143145 // RealCol = Col / ElementSize. We may use the RealCol
144146 // as a new Row for other new created AMX intrinsics.
145147 std::map<Value *, Value *> Col2Row, Row2Col;
146148
147149public:
148- ShapeCalculator (TargetMachine *TargetM) : TM(TargetM) {}
150+ ShapeCalculator (const TargetMachine *TargetM) : TM(TargetM) {}
149151 std::pair<Value *, Value *> getShape (IntrinsicInst *II, unsigned OpNo);
150152 std::pair<Value *, Value *> getShape (PHINode *Phi);
151153 Value *getRowFromCol (Instruction *II, Value *V, unsigned Granularity);
@@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() {
14321434 return Change;
14331435}
14341436
1437+ bool lowerAmxType (Function &F, const TargetMachine *TM,
1438+ TargetLibraryInfo *TLI) {
1439+ // Performance optimization: most code doesn't use AMX, so return early if
1440+ // there are no instructions that produce AMX values. This is sufficient, as
1441+ // AMX arguments and constants are not allowed -- so any producer of an AMX
1442+ // value must be an instruction.
1443+ // TODO: find a cheaper way for this, without looking at all instructions.
1444+ if (!containsAMXCode (F))
1445+ return false ;
1446+
1447+ bool C = false ;
1448+ ShapeCalculator SC (TM);
1449+ X86LowerAMXCast LAC (F, &SC);
1450+ C |= LAC.combineAMXcast (TLI);
1451+ // There might be remaining AMXcast after combineAMXcast and they should be
1452+ // handled elegantly.
1453+ C |= LAC.transformAllAMXCast ();
1454+
1455+ X86LowerAMXType LAT (F, &SC);
1456+ C |= LAT.visit ();
1457+
1458+ // Prepare for fast register allocation at O0.
1459+ // Todo: May better check the volatile model of AMX code, not just
1460+ // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
1461+ if (TM->getOptLevel () == CodeGenOptLevel::None) {
1462+ // If Front End not use O0 but the Mid/Back end use O0, (e.g.
1463+ // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
1464+ // sure the amx data is volatile, that is necessary for AMX fast
1465+ // register allocation.
1466+ if (!F.hasFnAttribute (Attribute::OptimizeNone)) {
1467+ X86VolatileTileData VTD (F);
1468+ C = VTD.volatileTileData () || C;
1469+ }
1470+ }
1471+
1472+ return C;
1473+ }
1474+
14351475} // anonymous namespace
14361476
1477+ PreservedAnalyses X86LowerAMXTypePass::run (Function &F,
1478+ FunctionAnalysisManager &FAM) {
1479+ TargetLibraryInfo &TLI = FAM.getResult <TargetLibraryAnalysis>(F);
1480+ bool Changed = lowerAmxType (F, TM, &TLI);
1481+ if (!Changed)
1482+ return PreservedAnalyses::all ();
1483+
1484+ PreservedAnalyses PA = PreservedAnalyses::none ();
1485+ PA.preserveSet <CFGAnalyses>();
1486+ return PA;
1487+ }
1488+
14371489namespace {
14381490
14391491class X86LowerAMXTypeLegacyPass : public FunctionPass {
@@ -1443,44 +1495,10 @@ class X86LowerAMXTypeLegacyPass : public FunctionPass {
14431495 X86LowerAMXTypeLegacyPass () : FunctionPass(ID) {}
14441496
14451497 bool runOnFunction (Function &F) override {
1446- // Performance optimization: most code doesn't use AMX, so return early if
1447- // there are no instructions that produce AMX values. This is sufficient, as
1448- // AMX arguments and constants are not allowed -- so any producer of an AMX
1449- // value must be an instruction.
1450- // TODO: find a cheaper way for this, without looking at all instructions.
1451- if (!containsAMXCode (F))
1452- return false ;
1453-
1454- bool C = false ;
14551498 TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM <TargetMachine>();
14561499 TargetLibraryInfo *TLI =
14571500 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI (F);
1458-
1459- ShapeCalculator SC (TM);
1460- X86LowerAMXCast LAC (F, &SC);
1461- C |= LAC.combineAMXcast (TLI);
1462- // There might be remaining AMXcast after combineAMXcast and they should be
1463- // handled elegantly.
1464- C |= LAC.transformAllAMXCast ();
1465-
1466- X86LowerAMXType LAT (F, &SC);
1467- C |= LAT.visit ();
1468-
1469- // Prepare for fast register allocation at O0.
1470- // Todo: May better check the volatile model of AMX code, not just
1471- // by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
1472- if (TM->getOptLevel () == CodeGenOptLevel::None) {
1473- // If Front End not use O0 but the Mid/Back end use O0, (e.g.
1474- // "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
1475- // sure the amx data is volatile, that is nessary for AMX fast
1476- // register allocation.
1477- if (!F.hasFnAttribute (Attribute::OptimizeNone)) {
1478- X86VolatileTileData VTD (F);
1479- C = VTD.volatileTileData () || C;
1480- }
1481- }
1482-
1483- return C;
1501+ return lowerAmxType (F, TM, TLI);
14841502 }
14851503
14861504 void getAnalysisUsage (AnalysisUsage &AU) const override {
@@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
15011519INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false ,
15021520 false )
15031521
1504- FunctionPass *llvm::createX86LowerAMXTypePass () {
1522+ FunctionPass *llvm::createX86LowerAMXTypeLegacyPass () {
15051523 return new X86LowerAMXTypeLegacyPass ();
15061524}
0 commit comments