Skip to content

Commit f80b273

Browse files
[X86][NewPM] Port X86LowerAMXType to NewPM
To enable the eventual migration of everything to the NewPM. Reviewers: RKSimon, phoebewang, paperchalice, arsenm, topperc Reviewed By: arsenm Pull Request: #165084
1 parent 6658933 commit f80b273

13 files changed

+95
-50
lines changed

llvm/lib/Target/X86/X86.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414
#ifndef LLVM_LIB_TARGET_X86_X86_H
1515
#define LLVM_LIB_TARGET_X86_X86_H
1616

17+
#include "llvm/IR/Analysis.h"
18+
#include "llvm/IR/PassManager.h"
1719
#include "llvm/Support/CodeGen.h"
20+
#include "llvm/Target/TargetMachine.h"
1821

1922
namespace llvm {
2023

@@ -162,7 +165,17 @@ FunctionPass *createX86WinEHUnwindV2Pass();
162165

163166
/// The pass transforms load/store <256 x i32> to AMX load/store intrinsics
164167
/// or split the data to two <128 x i32>.
165-
FunctionPass *createX86LowerAMXTypePass();
168+
class X86LowerAMXTypePass : public PassInfoMixin<X86LowerAMXTypePass> {
169+
private:
170+
const TargetMachine *TM;
171+
172+
public:
173+
X86LowerAMXTypePass(const TargetMachine *TM) : TM(TM) {}
174+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
175+
static bool isRequired() { return true; }
176+
};
177+
178+
FunctionPass *createX86LowerAMXTypeLegacyPass();
166179

167180
/// The pass transforms amx intrinsics to scalar operation if the function has
168181
/// optnone attribute or it is O0.

llvm/lib/Target/X86/X86CodeGenPassBuilder.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
/// TODO: Port CodeGen passes to new pass manager.
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "X86.h"
1314
#include "X86ISelDAGToDAG.h"
1415
#include "X86TargetMachine.h"
1516

llvm/lib/Target/X86/X86LowerAMXType.cpp

Lines changed: 57 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@
4646
#include "llvm/CodeGen/Passes.h"
4747
#include "llvm/CodeGen/TargetPassConfig.h"
4848
#include "llvm/CodeGen/ValueTypes.h"
49+
#include "llvm/IR/Analysis.h"
4950
#include "llvm/IR/DataLayout.h"
5051
#include "llvm/IR/Function.h"
5152
#include "llvm/IR/IRBuilder.h"
5253
#include "llvm/IR/Instructions.h"
5354
#include "llvm/IR/IntrinsicInst.h"
5455
#include "llvm/IR/IntrinsicsX86.h"
56+
#include "llvm/IR/PassManager.h"
5557
#include "llvm/IR/PatternMatch.h"
5658
#include "llvm/InitializePasses.h"
5759
#include "llvm/Pass.h"
@@ -64,7 +66,7 @@
6466
using namespace llvm;
6567
using namespace PatternMatch;
6668

67-
#define DEBUG_TYPE "lower-amx-type"
69+
#define DEBUG_TYPE "x86-lower-amx-type"
6870

6971
static bool isAMXCast(Instruction *II) {
7072
return match(II,
@@ -137,15 +139,15 @@ static Instruction *getFirstNonAllocaInTheEntryBlock(Function &F) {
137139

138140
class ShapeCalculator {
139141
private:
140-
TargetMachine *TM = nullptr;
142+
const TargetMachine *TM = nullptr;
141143

142144
// In AMX intrinsics we let Shape = {Row, Col}, but the
143145
// RealCol = Col / ElementSize. We may use the RealCol
144146
// as a new Row for other new created AMX intrinsics.
145147
std::map<Value *, Value *> Col2Row, Row2Col;
146148

147149
public:
148-
ShapeCalculator(TargetMachine *TargetM) : TM(TargetM) {}
150+
ShapeCalculator(const TargetMachine *TargetM) : TM(TargetM) {}
149151
std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo);
150152
std::pair<Value *, Value *> getShape(PHINode *Phi);
151153
Value *getRowFromCol(Instruction *II, Value *V, unsigned Granularity);
@@ -1432,8 +1434,58 @@ bool X86LowerAMXCast::transformAllAMXCast() {
14321434
return Change;
14331435
}
14341436

1437+
bool lowerAmxType(Function &F, const TargetMachine *TM,
1438+
TargetLibraryInfo *TLI) {
1439+
// Performance optimization: most code doesn't use AMX, so return early if
1440+
// there are no instructions that produce AMX values. This is sufficient, as
1441+
// AMX arguments and constants are not allowed -- so any producer of an AMX
1442+
// value must be an instruction.
1443+
// TODO: find a cheaper way for this, without looking at all instructions.
1444+
if (!containsAMXCode(F))
1445+
return false;
1446+
1447+
bool C = false;
1448+
ShapeCalculator SC(TM);
1449+
X86LowerAMXCast LAC(F, &SC);
1450+
C |= LAC.combineAMXcast(TLI);
1451+
// There might be remaining AMXcast after combineAMXcast and they should be
1452+
// handled elegantly.
1453+
C |= LAC.transformAllAMXCast();
1454+
1455+
X86LowerAMXType LAT(F, &SC);
1456+
C |= LAT.visit();
1457+
1458+
// Prepare for fast register allocation at O0.
1459+
// Todo: May better check the volatile model of AMX code, not just
1460+
// by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
1461+
if (TM->getOptLevel() == CodeGenOptLevel::None) {
1462+
// If Front End not use O0 but the Mid/Back end use O0, (e.g.
1463+
// "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
1464+
// sure the amx data is volatile, that is necessary for AMX fast
1465+
// register allocation.
1466+
if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
1467+
X86VolatileTileData VTD(F);
1468+
C = VTD.volatileTileData() || C;
1469+
}
1470+
}
1471+
1472+
return C;
1473+
}
1474+
14351475
} // anonymous namespace
14361476

1477+
PreservedAnalyses X86LowerAMXTypePass::run(Function &F,
1478+
FunctionAnalysisManager &FAM) {
1479+
TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1480+
bool Changed = lowerAmxType(F, TM, &TLI);
1481+
if (!Changed)
1482+
return PreservedAnalyses::all();
1483+
1484+
PreservedAnalyses PA = PreservedAnalyses::none();
1485+
PA.preserveSet<CFGAnalyses>();
1486+
return PA;
1487+
}
1488+
14371489
namespace {
14381490

14391491
class X86LowerAMXTypeLegacyPass : public FunctionPass {
@@ -1443,44 +1495,10 @@ class X86LowerAMXTypeLegacyPass : public FunctionPass {
14431495
X86LowerAMXTypeLegacyPass() : FunctionPass(ID) {}
14441496

14451497
bool runOnFunction(Function &F) override {
1446-
// Performance optimization: most code doesn't use AMX, so return early if
1447-
// there are no instructions that produce AMX values. This is sufficient, as
1448-
// AMX arguments and constants are not allowed -- so any producer of an AMX
1449-
// value must be an instruction.
1450-
// TODO: find a cheaper way for this, without looking at all instructions.
1451-
if (!containsAMXCode(F))
1452-
return false;
1453-
1454-
bool C = false;
14551498
TargetMachine *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
14561499
TargetLibraryInfo *TLI =
14571500
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1458-
1459-
ShapeCalculator SC(TM);
1460-
X86LowerAMXCast LAC(F, &SC);
1461-
C |= LAC.combineAMXcast(TLI);
1462-
// There might be remaining AMXcast after combineAMXcast and they should be
1463-
// handled elegantly.
1464-
C |= LAC.transformAllAMXCast();
1465-
1466-
X86LowerAMXType LAT(F, &SC);
1467-
C |= LAT.visit();
1468-
1469-
// Prepare for fast register allocation at O0.
1470-
// Todo: May better check the volatile model of AMX code, not just
1471-
// by checking Attribute::OptimizeNone and CodeGenOptLevel::None.
1472-
if (TM->getOptLevel() == CodeGenOptLevel::None) {
1473-
// If Front End not use O0 but the Mid/Back end use O0, (e.g.
1474-
// "Clang -O2 -S -emit-llvm t.c" + "llc t.ll") we should make
1475-
// sure the amx data is volatile, that is nessary for AMX fast
1476-
// register allocation.
1477-
if (!F.hasFnAttribute(Attribute::OptimizeNone)) {
1478-
X86VolatileTileData VTD(F);
1479-
C = VTD.volatileTileData() || C;
1480-
}
1481-
}
1482-
1483-
return C;
1501+
return lowerAmxType(F, TM, TLI);
14841502
}
14851503

14861504
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1501,6 +1519,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
15011519
INITIALIZE_PASS_END(X86LowerAMXTypeLegacyPass, DEBUG_TYPE, PassName, false,
15021520
false)
15031521

1504-
FunctionPass *llvm::createX86LowerAMXTypePass() {
1522+
FunctionPass *llvm::createX86LowerAMXTypeLegacyPass() {
15051523
return new X86LowerAMXTypeLegacyPass();
15061524
}

llvm/lib/Target/X86/X86PassRegistry.def

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,16 @@
1212

1313
// NOTE: NO INCLUDE GUARD DESIRED!
1414

15+
#ifndef FUNCTION_PASS
16+
#define FUNCTION_PASS(NAME, CREATE_PASS)
17+
#endif
18+
FUNCTION_PASS("x86-lower-amx-type", X86LowerAMXTypePass(this))
19+
#undef FUNCTION_PASS
20+
1521
#ifndef DUMMY_FUNCTION_PASS
1622
#define DUMMY_FUNCTION_PASS(NAME, CREATE_PASS)
1723
#endif
1824
DUMMY_FUNCTION_PASS("lower-amx-intrinsics", X86LowerAMXIntrinsics(*this))
19-
DUMMY_FUNCTION_PASS("lower-amx-type", X86LowerAMXTypePass(*this))
2025
DUMMY_FUNCTION_PASS("x86-partial-reduction", X86PartialReduction())
2126
DUMMY_FUNCTION_PASS("x86-winehstate", WinEHStatePass())
2227
#undef DUMMY_FUNCTION_PASS

llvm/lib/Target/X86/X86TargetMachine.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ void X86PassConfig::addIRPasses() {
423423
// We add both pass anyway and when these two passes run, we skip the pass
424424
// based on the option level and option attribute.
425425
addPass(createX86LowerAMXIntrinsicsPass());
426-
addPass(createX86LowerAMXTypePass());
426+
addPass(createX86LowerAMXTypeLegacyPass());
427427

428428
TargetPassConfig::addIRPasses();
429429

llvm/test/CodeGen/X86/AMX/amx-combine-undef.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
2+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
3+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
34

45
define void @undef_2phi(ptr%buf) {
56
; CHECK-LABEL: @undef_2phi(

llvm/test/CodeGen/X86/AMX/amx-combine.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
2+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
3+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
34

45
define void @combine_store(ptr%p) {
56
; CHECK-LABEL: @combine_store(

llvm/test/CodeGen/X86/AMX/amx-configO2toO0-lower.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -lower-amx-type -S | FileCheck %s
1+
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -x86-lower-amx-type -S | FileCheck %s
2+
; RUN: opt < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -passes=x86-lower-amx-type -S | FileCheck %s
23

34
@buf = dso_local global [1024 x i8] zeroinitializer, align 16
45
@buf2 = dso_local global [1024 x i8] zeroinitializer, align 16

llvm/test/CodeGen/X86/AMX/amx-type.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
2+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
3+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
34

45
%struct.__tile_str = type { i16, i16, <256 x i32> }
56

llvm/test/CodeGen/X86/AMX/lat-combine-amx-bitcast.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -lower-amx-type %s -S | FileCheck %s
2+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -x86-lower-amx-type %s -S | FileCheck %s
3+
; RUN: opt --codegen-opt-level=2 -mtriple=x86_64 -passes=x86-lower-amx-type %s -S | FileCheck %s
34

45
define void @combine_amx_cast_inside_bb() {
56
; CHECK-LABEL: @combine_amx_cast_inside_bb(

0 commit comments

Comments
 (0)