Skip to content

Commit 6ecd10b

Browse files
sys-igcigcbot
authored andcommitted
[Autobackout][FunctionalRegression]Revert of change: 5e0b327: Match and fold whole region override chains in VC
When the vector is overwritten by a series of wrregion instructions, and then inserted into a larger vector, VC can replace the initial chain of wrregion instructions with an alternative chain that writes the data directly into the larger vector. This transformation eliminates false data dependencies and unnecessary MOV instructions.
1 parent f0aa7db commit 6ecd10b

File tree

2 files changed

+3
-200
lines changed

2 files changed

+3
-200
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 3 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ SPDX-License-Identifier: MIT
4949
#include "GenXTargetMachine.h"
5050
#include "GenXUtil.h"
5151
#include "GenXVectorDecomposer.h"
52-
53-
#include "llvm/ADT/IntervalMap.h"
5452
#include "llvm/ADT/PostOrderIterator.h"
5553
#include "llvm/ADT/Statistic.h"
5654
#include "llvm/Analysis/ConstantFolding.h"
@@ -64,6 +62,7 @@ SPDX-License-Identifier: MIT
6462
#include "llvm/IR/IRBuilder.h"
6563
#include "llvm/IR/InstIterator.h"
6664
#include "llvm/IR/InstVisitor.h"
65+
#include "llvmWrapper/IR/Instructions.h"
6766
#include "llvm/IR/Intrinsics.h"
6867
#include "llvm/IR/Module.h"
6968
#include "llvm/IR/PatternMatch.h"
@@ -78,10 +77,9 @@ SPDX-License-Identifier: MIT
7877
#include "llvmWrapper/ADT/APInt.h"
7978
#include "llvmWrapper/IR/Constants.h"
8079
#include "llvmWrapper/IR/DerivedTypes.h"
81-
#include "llvmWrapper/IR/Instructions.h"
8280
#include "llvmWrapper/Support/TypeSize.h"
83-
#include "llvmWrapper/Transforms/Utils/BasicBlockUtils.h"
8481
#include "llvmWrapper/Transforms/Utils/Local.h"
82+
#include "llvmWrapper/Transforms/Utils/BasicBlockUtils.h"
8583

8684
#include "vc/Utils/GenX/Intrinsics.h"
8785
#include "vc/Utils/General/InstRebuilder.h"
@@ -186,7 +184,7 @@ class GenXPatternMatch : public FunctionPass,
186184
bool foldBoolAnd(Instruction *Inst);
187185
bool simplifyPredRegion(CallInst *Inst);
188186
bool simplifyWrRegion(CallInst *Inst);
189-
bool simplifyRdRegion(CallInst *Inst);
187+
bool simplifyRdRegion(CallInst* Inst);
190188
bool simplifyTruncSat(CallInst *Inst);
191189
bool simplifySelect(Function *F);
192190
bool mergeLscLoad(Function *F);
@@ -208,15 +206,6 @@ class GenXPatternMatch : public FunctionPass,
208206
bool matchPredAny(CallInst *Inst) const;
209207

210208
bool matchBFN(Function &F);
211-
212-
bool foldWriteRegion(Function &F);
213-
bool propagateWriteRegionChain(CallInst *Inst, ArrayRef<CallInst *> Chain);
214-
SmallVector<CallInst *, 4> getFullOverrideChain(CallInst *Inst);
215-
216-
using IntervalMapTy =
217-
IntervalMap<unsigned, bool, 8, IntervalMapHalfOpenInfo<unsigned>>;
218-
IntervalMapTy::Allocator Alloc;
219-
SmallPtrSet<Instruction *, 8> VisitedWrRegions;
220209
};
221210

222211
} // namespace
@@ -252,7 +241,6 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
252241
loadPhiConstants(F, DT, *ST, *DL, true);
253242
Changed |= distributeIntegerMul(&F);
254243
Changed |= propagateFoldableRegion(&F);
255-
Changed |= foldWriteRegion(F);
256244
Changed |= reassociateIntegerMad(&F);
257245
}
258246

@@ -2949,147 +2937,6 @@ bool GenXPatternMatch::simplifyWrRegion(CallInst *Inst) {
29492937
return false;
29502938
}
29512939

2952-
bool GenXPatternMatch::foldWriteRegion(Function &F) {
2953-
using namespace GenXIntrinsic::GenXRegion;
2954-
if (vc::requiresStackCall(&F) || ST->hasFusedEU())
2955-
return false;
2956-
2957-
bool Changed = false;
2958-
SmallVector<Instruction *> ToErase;
2959-
2960-
for (auto &BB : F) {
2961-
for (auto &I : reverse(BB)) {
2962-
if (!GenXIntrinsic::isWrRegion(&I) || I.use_empty())
2963-
continue;
2964-
2965-
auto *InsertTo = I.getOperand(OldValueOperandNum);
2966-
while (auto *Cast = dyn_cast<BitCastInst>(InsertTo))
2967-
InsertTo = Cast->getOperand(0);
2968-
2969-
// If the wrregion destination is a load instruction, than it's the access
2970-
// to the _GENX_VOLATILE_ object. We shouldn't optimize it.
2971-
if (isa_and_nonnull<LoadInst>(InsertTo))
2972-
continue;
2973-
2974-
auto *Inst = cast<CallInst>(&I);
2975-
auto WholeChain = getFullOverrideChain(Inst);
2976-
if (WholeChain.empty())
2977-
continue;
2978-
2979-
Changed = true;
2980-
2981-
auto *DropDep = WholeChain.back();
2982-
DropDep->setArgOperand(OldValueOperandNum,
2983-
UndefValue::get(DropDep->getType()));
2984-
2985-
// Check if the whole chain is inserted into some other region
2986-
if (propagateWriteRegionChain(Inst, WholeChain)) {
2987-
ToErase.push_back(Inst);
2988-
append_range(ToErase, WholeChain);
2989-
}
2990-
}
2991-
}
2992-
2993-
for (auto *I : ToErase) {
2994-
if (I->use_empty())
2995-
I->eraseFromParent();
2996-
}
2997-
2998-
VisitedWrRegions.clear();
2999-
return Changed;
3000-
}
3001-
3002-
bool GenXPatternMatch::propagateWriteRegionChain(CallInst *Inst,
3003-
ArrayRef<CallInst *> Chain) {
3004-
using namespace GenXIntrinsic::GenXRegion;
3005-
IGC_ASSERT(GenXIntrinsic::isWrRegion(Inst));
3006-
3007-
auto R = genx::makeRegionWithOffset(Inst);
3008-
if (R.Indirect || !R.isContiguous())
3009-
return false;
3010-
3011-
auto *Pred = dyn_cast<Constant>(Inst->getOperand(PredicateOperandNum));
3012-
if (!Pred || !Pred->isAllOnesValue())
3013-
return false;
3014-
3015-
auto *Insert = Inst->getOperand(OldValueOperandNum);
3016-
3017-
for (auto *I : reverse(Chain)) {
3018-
auto *Src = I->getOperand(NewValueOperandNum);
3019-
auto Rgn = genx::makeRegionWithOffset(I);
3020-
Rgn.Offset += R.Offset;
3021-
3022-
Insert = Rgn.createWrRegion(Insert, Src, "", Inst, Inst->getDebugLoc());
3023-
VisitedWrRegions.insert(cast<CallInst>(Insert));
3024-
LLVM_DEBUG(dbgs() << "Propagating wrregion instruction: " << *Insert
3025-
<< "\n");
3026-
}
3027-
3028-
Inst->replaceAllUsesWith(Insert);
3029-
return true;
3030-
}
3031-
3032-
SmallVector<CallInst *, 4>
3033-
GenXPatternMatch::getFullOverrideChain(CallInst *Inst) {
3034-
using namespace GenXIntrinsic::GenXRegion;
3035-
IGC_ASSERT(GenXIntrinsic::isWrRegion(Inst));
3036-
3037-
// Stores the intevals, that are overwritten by each particular wrregion
3038-
IntervalMapTy Intervals(Alloc);
3039-
SmallVector<CallInst *, 4> Chain;
3040-
3041-
auto *Curr = dyn_cast<CallInst>(Inst->getArgOperand(NewValueOperandNum));
3042-
if (!Curr)
3043-
return {};
3044-
3045-
auto *Ty = Curr->getType();
3046-
auto Size = DL->getTypeStoreSize(Ty);
3047-
3048-
bool IsOverwrite = false;
3049-
while (!IsOverwrite && Curr && GenXIntrinsic::isWrRegion(Curr)) {
3050-
if (VisitedWrRegions.contains(Curr))
3051-
break;
3052-
3053-
LLVM_DEBUG(dbgs() << "Looking for wrregion chain: " << *Curr << "\n");
3054-
auto R = genx::makeRegionWithOffset(Curr);
3055-
if (R.Indirect || !R.isContiguous())
3056-
break;
3057-
3058-
auto *Cond = dyn_cast<Constant>(Curr->getOperand(PredicateOperandNum));
3059-
if (!Cond || !Cond->isAllOnesValue())
3060-
break;
3061-
3062-
unsigned Start = R.Offset;
3063-
unsigned End = Start + R.NumElements * R.ElementBytes;
3064-
3065-
// Do not allow overlapping regions
3066-
if (Intervals.overlaps(Start, End))
3067-
break;
3068-
3069-
LLVM_DEBUG(dbgs() << " Overwriting [" << Start << ", " << End << ")\n");
3070-
Intervals.insert(Start, End, true);
3071-
3072-
VisitedWrRegions.insert(Curr);
3073-
Chain.push_back(Curr);
3074-
3075-
auto *Next = Curr->getOperand(OldValueOperandNum);
3076-
3077-
// Whole region is covered, no need to continue
3078-
auto FirstIt = Intervals.begin();
3079-
IsOverwrite = isa<UndefValue>(Next) ||
3080-
(FirstIt.start() == 0 && FirstIt.stop() == Size);
3081-
3082-
Curr = dyn_cast<CallInst>(Next);
3083-
if (!Next->hasOneUse())
3084-
break;
3085-
}
3086-
3087-
if (!IsOverwrite)
3088-
return {};
3089-
3090-
return Chain;
3091-
}
3092-
30932940
// Simplify (trunc.sat (ext V)) to (trunc.sat V). Even if the source and
30942941
// destination has the same type, it's incorrect to fold them into V directly
30952942
// as the saturation is necessary.

IGC/VectorCompiler/test/PatternMatch/simplify-write-region.ll

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)