Skip to content

Commit f309fea

Browse files
authored
merge main into amd-staging (llvm#1505)
2 parents 181b30e + 7520acc commit f309fea

File tree

11 files changed

+272
-132
lines changed

11 files changed

+272
-132
lines changed

llvm/docs/LangRef.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,8 +3109,7 @@ as follows:
31093109
``S<size>``
31103110
Specifies the natural alignment of the stack in bits. Alignment
31113111
promotion of stack variables is limited to the natural stack
3112-
alignment to avoid dynamic stack realignment. The stack alignment
3113-
must be a multiple of 8-bits. If omitted, the natural stack
3112+
alignment to avoid dynamic stack realignment. If omitted, the natural stack
31143113
alignment defaults to "unspecified", which does not prevent any
31153114
alignment promotions.
31163115
``P<address space>``
@@ -3136,8 +3135,8 @@ as follows:
31363135
Defaults to the default address space of 0.
31373136
``p[n]:<size>:<abi>[:<pref>][:<idx>]``
31383137
This specifies the *size* of a pointer and its ``<abi>`` and
3139-
``<pref>``\erred alignments for address space ``n``. ``<pref>`` is optional
3140-
and defaults to ``<abi>``. The fourth parameter ``<idx>`` is the size of the
3138+
``<pref>``\erred alignments for address space ``n``.
3139+
The fourth parameter ``<idx>`` is the size of the
31413140
index that used for address calculation, which must be less than or equal
31423141
to the pointer size. If not
31433142
specified, the default index size is equal to the pointer size. All sizes
@@ -3147,23 +3146,21 @@ as follows:
31473146
``i<size>:<abi>[:<pref>]``
31483147
This specifies the alignment for an integer type of a given bit
31493148
``<size>``. The value of ``<size>`` must be in the range [1,2^24).
3150-
``<pref>`` is optional and defaults to ``<abi>``.
31513149
For ``i8``, the ``<abi>`` value must equal 8,
31523150
that is, ``i8`` must be naturally aligned.
31533151
``v<size>:<abi>[:<pref>]``
31543152
This specifies the alignment for a vector type of a given bit
31553153
``<size>``. The value of ``<size>`` must be in the range [1,2^24).
3156-
``<pref>`` is optional and defaults to ``<abi>``.
31573154
``f<size>:<abi>[:<pref>]``
31583155
This specifies the alignment for a floating-point type of a given bit
31593156
``<size>``. Only values of ``<size>`` that are supported by the target
31603157
will work. 32 (float) and 64 (double) are supported on all targets; 80
31613158
or 128 (different flavors of long double) are also supported on some
31623159
targets. The value of ``<size>`` must be in the range [1,2^24).
3163-
``<pref>`` is optional and defaults to ``<abi>``.
31643160
``a:<abi>[:<pref>]``
31653161
This specifies the alignment for an object of aggregate type.
3166-
``<pref>`` is optional and defaults to ``<abi>``.
3162+
In addition to the usual requirements for alignment values,
3163+
the value of ``<abi>`` can also be zero, which means one byte alignment.
31673164
``F<type><abi>``
31683165
This specifies the alignment for function pointers.
31693166
The options for ``<type>`` are:
@@ -3202,6 +3199,9 @@ as follows:
32023199
as :ref:`Non-Integral Pointer Type <nointptrtype>` s. The ``0``
32033200
address space cannot be specified as non-integral.
32043201

3202+
Unless explicitly stated otherwise, on every specification that specifies
3203+
an alignment, the value of the alignment must be in the range [1,2^16)
3204+
and must be a power of two times the width of a byte.
32053205
On every specification that takes a ``<abi>:<pref>``, specifying the
32063206
``<pref>`` alignment is optional. If omitted, the preceding ``:``
32073207
should be omitted too and ``<pref>`` will be equal to ``<abi>``.

llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ bool AArch64SelectionDAGInfo::isTargetStrictFPOpcode(unsigned Opcode) const {
3535

3636
void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
3737
const SDNode *N) const {
38+
#ifndef NDEBUG
3839
switch (N->getOpcode()) {
3940
default:
4041
break;
@@ -110,6 +111,7 @@ void AArch64SelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG,
110111
break;
111112
}
112113
}
114+
#endif
113115
}
114116

115117
SDValue AArch64SelectionDAGInfo::EmitMOPS(unsigned Opcode, SelectionDAG &DAG,

llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -792,7 +792,8 @@ class LowerMatrixIntrinsics {
792792
/// This creates and erases instructions as needed, and returns the newly
793793
/// created instruction while updating the iterator to avoid invalidation. If
794794
/// this returns nullptr, no new instruction was created.
795-
Instruction *sinkTranspose(Instruction &I, BasicBlock::reverse_iterator &II) {
795+
Instruction *sinkTranspose(Instruction &I, BasicBlock::reverse_iterator &II,
796+
bool &Changed) {
796797
BasicBlock &BB = *I.getParent();
797798
IRBuilder<> IB(&I);
798799
MatrixBuilder Builder(IB);
@@ -809,13 +810,15 @@ class LowerMatrixIntrinsics {
809810
updateShapeAndReplaceAllUsesWith(I, TATA);
810811
eraseFromParentAndMove(&I, II, BB);
811812
eraseFromParentAndMove(TA, II, BB);
813+
Changed = true;
812814
return nullptr;
813815
}
814816

815817
// k^T -> k
816818
if (isSplat(TA)) {
817819
updateShapeAndReplaceAllUsesWith(I, TA);
818820
eraseFromParentAndMove(&I, II, BB);
821+
Changed = true;
819822
return nullptr;
820823
}
821824

@@ -834,6 +837,7 @@ class LowerMatrixIntrinsics {
834837
updateShapeAndReplaceAllUsesWith(I, NewInst);
835838
eraseFromParentAndMove(&I, II, BB);
836839
eraseFromParentAndMove(TA, II, BB);
840+
Changed = true;
837841
return NewInst;
838842
}
839843

@@ -859,6 +863,7 @@ class LowerMatrixIntrinsics {
859863
updateShapeAndReplaceAllUsesWith(I, NewInst);
860864
eraseFromParentAndMove(&I, II, BB);
861865
eraseFromParentAndMove(TA, II, BB);
866+
Changed = true;
862867
return NewInst;
863868
}
864869

@@ -880,13 +885,14 @@ class LowerMatrixIntrinsics {
880885
updateShapeAndReplaceAllUsesWith(I, NewInst);
881886
eraseFromParentAndMove(&I, II, BB);
882887
eraseFromParentAndMove(TA, II, BB);
888+
Changed = true;
883889
return NewInst;
884890
}
885891

886892
return nullptr;
887893
}
888894

889-
void liftTranspose(Instruction &I) {
895+
bool liftTranspose(Instruction &I) {
890896
// Erase dead Instructions after lifting transposes from binops.
891897
auto CleanupBinOp = [this](Instruction &T, Value *A, Value *B) {
892898
if (T.use_empty())
@@ -914,6 +920,7 @@ class LowerMatrixIntrinsics {
914920
R->getZExtValue());
915921
updateShapeAndReplaceAllUsesWith(I, NewInst);
916922
CleanupBinOp(I, A, B);
923+
return true;
917924
}
918925
// A^t + B ^t -> (A + B)^t. Pick rows and columns from first transpose. If
919926
// the shape of the second transpose is different, there's a shape conflict
@@ -940,19 +947,22 @@ class LowerMatrixIntrinsics {
940947
ShapeMap[AddI] &&
941948
"Shape of updated addition doesn't match cached shape.");
942949
}
950+
return true;
943951
}
952+
return false;
944953
}
945954

946955
/// Try moving transposes in order to fold them away or into multiplies.
947-
void optimizeTransposes() {
956+
bool optimizeTransposes() {
957+
bool Changed = false;
948958
// First sink all transposes inside matmuls and adds, hoping that we end up
949959
// with NN, NT or TN variants.
950960
for (BasicBlock &BB : reverse(Func)) {
951961
for (auto II = BB.rbegin(); II != BB.rend();) {
952962
Instruction &I = *II;
953963
// We may remove II. By default continue on the next/prev instruction.
954964
++II;
955-
if (Instruction *NewInst = sinkTranspose(I, II))
965+
if (Instruction *NewInst = sinkTranspose(I, II, Changed))
956966
II = std::next(BasicBlock::reverse_iterator(NewInst));
957967
}
958968
}
@@ -961,9 +971,10 @@ class LowerMatrixIntrinsics {
961971
// to fold into consuming multiply or add.
962972
for (BasicBlock &BB : Func) {
963973
for (Instruction &I : llvm::make_early_inc_range(BB)) {
964-
liftTranspose(I);
974+
Changed |= liftTranspose(I);
965975
}
966976
}
977+
return Changed;
967978
}
968979

969980
bool Visit() {
@@ -1006,15 +1017,15 @@ class LowerMatrixIntrinsics {
10061017
WorkList = propagateShapeBackward(WorkList);
10071018
}
10081019

1020+
bool Changed = false;
10091021
if (!isMinimal()) {
1010-
optimizeTransposes();
1022+
Changed |= optimizeTransposes();
10111023
if (PrintAfterTransposeOpt) {
10121024
dbgs() << "Dump after matrix transpose optimization:\n";
10131025
Func.print(dbgs());
10141026
}
10151027
}
10161028

1017-
bool Changed = false;
10181029
SmallVector<CallInst *, 16> MaybeFusableInsts;
10191030
SmallVector<Instruction *, 16> MatrixInsts;
10201031
SmallVector<IntrinsicInst *, 16> LifetimeEnds;
@@ -1043,7 +1054,7 @@ class LowerMatrixIntrinsics {
10431054
if (!FusedInsts.contains(CI))
10441055
LowerMatrixMultiplyFused(CI, FusedInsts, LifetimeEnds);
10451056

1046-
Changed = !FusedInsts.empty();
1057+
Changed |= !FusedInsts.empty();
10471058

10481059
// Fourth, lower remaining instructions with shape information.
10491060
for (Instruction *Inst : MatrixInsts) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,18 +2082,12 @@ class GeneratedRTChecks {
20822082

20832083
auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
20842084
BranchInst::Create(LoopVectorPreHeader, SCEVCheckBlock);
2085-
// Create new preheader for vector loop.
2086-
if (OuterLoop)
2087-
OuterLoop->addBasicBlockToLoop(SCEVCheckBlock, *LI);
20882085

20892086
SCEVCheckBlock->getTerminator()->eraseFromParent();
20902087
SCEVCheckBlock->moveBefore(LoopVectorPreHeader);
20912088
Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
20922089
SCEVCheckBlock);
20932090

2094-
DT->addNewBlock(SCEVCheckBlock, Pred);
2095-
DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock);
2096-
20972091
BranchInst &BI =
20982092
*BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheckCond);
20992093
if (AddBranchWeights)
@@ -2118,13 +2112,8 @@ class GeneratedRTChecks {
21182112
Pred->getTerminator()->replaceSuccessorWith(LoopVectorPreHeader,
21192113
MemCheckBlock);
21202114

2121-
DT->addNewBlock(MemCheckBlock, Pred);
2122-
DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock);
21232115
MemCheckBlock->moveBefore(LoopVectorPreHeader);
21242116

2125-
if (OuterLoop)
2126-
OuterLoop->addBasicBlockToLoop(MemCheckBlock, *LI);
2127-
21282117
BranchInst &BI =
21292118
*BranchInst::Create(Bypass, LoopVectorPreHeader, MemRuntimeCheckCond);
21302119
if (AddBranchWeights) {
@@ -2562,13 +2551,9 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
25622551
}
25632552

25642553
// Create new preheader for vector loop.
2565-
LoopVectorPreHeader =
2566-
SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(), DT, LI, nullptr,
2567-
"vector.ph");
2568-
2569-
assert(DT->properlyDominates(DT->getNode(TCCheckBlock),
2570-
DT->getNode(Bypass)->getIDom()) &&
2571-
"TC check is expected to dominate Bypass");
2554+
LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
2555+
static_cast<DominatorTree *>(nullptr), LI,
2556+
nullptr, "vector.ph");
25722557

25732558
BranchInst &BI =
25742559
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
@@ -7963,13 +7948,10 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
79637948

79647949
// Create new preheader for vector loop.
79657950
LoopVectorPreHeader = SplitBlock(TCCheckBlock, TCCheckBlock->getTerminator(),
7966-
DT, LI, nullptr, "vector.ph");
7951+
static_cast<DominatorTree *>(nullptr), LI,
7952+
nullptr, "vector.ph");
79677953

79687954
if (ForEpilogue) {
7969-
assert(DT->properlyDominates(DT->getNode(TCCheckBlock),
7970-
DT->getNode(Bypass)->getIDom()) &&
7971-
"TC check is expected to dominate Bypass");
7972-
79737955
LoopBypassBlocks.push_back(TCCheckBlock);
79747956

79757957
// Save the trip count so we don't have to regenerate it in the
@@ -8073,10 +8055,6 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
80738055

80748056
assert(EPI.TripCount &&
80758057
"Expected trip count to have been saved in the first pass.");
8076-
assert(
8077-
(!isa<Instruction>(EPI.TripCount) ||
8078-
DT->dominates(cast<Instruction>(EPI.TripCount)->getParent(), Insert)) &&
8079-
"saved trip count does not dominate insertion point.");
80808058
Value *TC = EPI.TripCount;
80818059
IRBuilder<> Builder(Insert->getTerminator());
80828060
Value *Count = Builder.CreateSub(TC, EPI.VectorTripCount, "n.vec.remaining");

llvm/lib/Transforms/Vectorize/VPlan.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,19 @@ BasicBlock *VPBasicBlock::createEmptyBasicBlock(VPTransformState &State) {
432432
void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
433433
auto &CFG = State.CFG;
434434
BasicBlock *NewBB = CFG.VPBB2IRBB[this];
435+
436+
// Register NewBB in its loop. In innermost loops its the same for all
437+
// BB's.
438+
Loop *ParentLoop = State.CurrentParentLoop;
439+
// If this block has a sole successor that is an exit block then it needs
440+
// adding to the same parent loop as the exit block.
441+
VPBlockBase *SuccVPBB = getSingleSuccessor();
442+
if (SuccVPBB && State.Plan->isExitBlock(SuccVPBB))
443+
ParentLoop =
444+
State.LI->getLoopFor(cast<VPIRBasicBlock>(SuccVPBB)->getIRBasicBlock());
445+
if (ParentLoop && !State.LI->getLoopFor(NewBB))
446+
ParentLoop->addBasicBlockToLoop(NewBB, *State.LI);
447+
435448
// Hook up the new basic block to its predecessors.
436449
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
437450
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
@@ -517,17 +530,6 @@ void VPBasicBlock::execute(VPTransformState *State) {
517530
State->Builder.SetInsertPoint(NewBB);
518531
// Temporarily terminate with unreachable until CFG is rewired.
519532
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
520-
// Register NewBB in its loop. In innermost loops its the same for all
521-
// BB's.
522-
Loop *ParentLoop = State->CurrentParentLoop;
523-
// If this block has a sole successor that is an exit block then it needs
524-
// adding to the same parent loop as the exit block.
525-
VPBlockBase *SuccVPBB = getSingleSuccessor();
526-
if (SuccVPBB && State->Plan->isExitBlock(SuccVPBB))
527-
ParentLoop = State->LI->getLoopFor(
528-
cast<VPIRBasicBlock>(SuccVPBB)->getIRBasicBlock());
529-
if (ParentLoop)
530-
ParentLoop->addBasicBlockToLoop(NewBB, *State->LI);
531533
State->Builder.SetInsertPoint(Terminator);
532534

533535
State->CFG.PrevBB = NewBB;

0 commit comments

Comments
 (0)