@@ -49,8 +49,6 @@ SPDX-License-Identifier: MIT
4949#include " GenXTargetMachine.h"
5050#include " GenXUtil.h"
5151#include " GenXVectorDecomposer.h"
52-
53- #include " llvm/ADT/IntervalMap.h"
5452#include " llvm/ADT/PostOrderIterator.h"
5553#include " llvm/ADT/Statistic.h"
5654#include " llvm/Analysis/ConstantFolding.h"
@@ -64,6 +62,7 @@ SPDX-License-Identifier: MIT
6462#include " llvm/IR/IRBuilder.h"
6563#include " llvm/IR/InstIterator.h"
6664#include " llvm/IR/InstVisitor.h"
65+ #include " llvmWrapper/IR/Instructions.h"
6766#include " llvm/IR/Intrinsics.h"
6867#include " llvm/IR/Module.h"
6968#include " llvm/IR/PatternMatch.h"
@@ -78,10 +77,9 @@ SPDX-License-Identifier: MIT
7877#include " llvmWrapper/ADT/APInt.h"
7978#include " llvmWrapper/IR/Constants.h"
8079#include " llvmWrapper/IR/DerivedTypes.h"
81- #include " llvmWrapper/IR/Instructions.h"
8280#include " llvmWrapper/Support/TypeSize.h"
83- #include " llvmWrapper/Transforms/Utils/BasicBlockUtils.h"
8481#include " llvmWrapper/Transforms/Utils/Local.h"
82+ #include " llvmWrapper/Transforms/Utils/BasicBlockUtils.h"
8583
8684#include " vc/Utils/GenX/Intrinsics.h"
8785#include " vc/Utils/General/InstRebuilder.h"
@@ -186,7 +184,7 @@ class GenXPatternMatch : public FunctionPass,
186184 bool foldBoolAnd (Instruction *Inst);
187185 bool simplifyPredRegion (CallInst *Inst);
188186 bool simplifyWrRegion (CallInst *Inst);
189- bool simplifyRdRegion (CallInst * Inst);
187+ bool simplifyRdRegion (CallInst* Inst);
190188 bool simplifyTruncSat (CallInst *Inst);
191189 bool simplifySelect (Function *F);
192190 bool mergeLscLoad (Function *F);
@@ -208,15 +206,6 @@ class GenXPatternMatch : public FunctionPass,
208206 bool matchPredAny (CallInst *Inst) const ;
209207
210208 bool matchBFN (Function &F);
211-
212- bool foldWriteRegion (Function &F);
213- bool propagateWriteRegionChain (CallInst *Inst, ArrayRef<CallInst *> Chain);
214- SmallVector<CallInst *, 4 > getFullOverrideChain (CallInst *Inst);
215-
216- using IntervalMapTy =
217- IntervalMap<unsigned , bool , 8 , IntervalMapHalfOpenInfo<unsigned >>;
218- IntervalMapTy::Allocator Alloc;
219- SmallPtrSet<Instruction *, 8 > VisitedWrRegions;
220209};
221210
222211} // namespace
@@ -252,7 +241,6 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
252241 loadPhiConstants (F, DT, *ST, *DL, true );
253242 Changed |= distributeIntegerMul (&F);
254243 Changed |= propagateFoldableRegion (&F);
255- Changed |= foldWriteRegion (F);
256244 Changed |= reassociateIntegerMad (&F);
257245 }
258246
@@ -2949,147 +2937,6 @@ bool GenXPatternMatch::simplifyWrRegion(CallInst *Inst) {
29492937 return false ;
29502938}
29512939
2952- bool GenXPatternMatch::foldWriteRegion (Function &F) {
2953- using namespace GenXIntrinsic ::GenXRegion;
2954- if (vc::requiresStackCall (&F) || ST->hasFusedEU ())
2955- return false ;
2956-
2957- bool Changed = false ;
2958- SmallVector<Instruction *> ToErase;
2959-
2960- for (auto &BB : F) {
2961- for (auto &I : reverse (BB)) {
2962- if (!GenXIntrinsic::isWrRegion (&I) || I.use_empty ())
2963- continue ;
2964-
2965- auto *InsertTo = I.getOperand (OldValueOperandNum);
2966- while (auto *Cast = dyn_cast<BitCastInst>(InsertTo))
2967- InsertTo = Cast->getOperand (0 );
2968-
2969- // If the wrregion destination is a load instruction, than it's the access
2970- // to the _GENX_VOLATILE_ object. We shouldn't optimize it.
2971- if (isa_and_nonnull<LoadInst>(InsertTo))
2972- continue ;
2973-
2974- auto *Inst = cast<CallInst>(&I);
2975- auto WholeChain = getFullOverrideChain (Inst);
2976- if (WholeChain.empty ())
2977- continue ;
2978-
2979- Changed = true ;
2980-
2981- auto *DropDep = WholeChain.back ();
2982- DropDep->setArgOperand (OldValueOperandNum,
2983- UndefValue::get (DropDep->getType ()));
2984-
2985- // Check if the whole chain is inserted into some other region
2986- if (propagateWriteRegionChain (Inst, WholeChain)) {
2987- ToErase.push_back (Inst);
2988- append_range (ToErase, WholeChain);
2989- }
2990- }
2991- }
2992-
2993- for (auto *I : ToErase) {
2994- if (I->use_empty ())
2995- I->eraseFromParent ();
2996- }
2997-
2998- VisitedWrRegions.clear ();
2999- return Changed;
3000- }
3001-
3002- bool GenXPatternMatch::propagateWriteRegionChain (CallInst *Inst,
3003- ArrayRef<CallInst *> Chain) {
3004- using namespace GenXIntrinsic ::GenXRegion;
3005- IGC_ASSERT (GenXIntrinsic::isWrRegion (Inst));
3006-
3007- auto R = genx::makeRegionWithOffset (Inst);
3008- if (R.Indirect || !R.isContiguous ())
3009- return false ;
3010-
3011- auto *Pred = dyn_cast<Constant>(Inst->getOperand (PredicateOperandNum));
3012- if (!Pred || !Pred->isAllOnesValue ())
3013- return false ;
3014-
3015- auto *Insert = Inst->getOperand (OldValueOperandNum);
3016-
3017- for (auto *I : reverse (Chain)) {
3018- auto *Src = I->getOperand (NewValueOperandNum);
3019- auto Rgn = genx::makeRegionWithOffset (I);
3020- Rgn.Offset += R.Offset ;
3021-
3022- Insert = Rgn.createWrRegion (Insert, Src, " " , Inst, Inst->getDebugLoc ());
3023- VisitedWrRegions.insert (cast<CallInst>(Insert));
3024- LLVM_DEBUG (dbgs () << " Propagating wrregion instruction: " << *Insert
3025- << " \n " );
3026- }
3027-
3028- Inst->replaceAllUsesWith (Insert);
3029- return true ;
3030- }
3031-
3032- SmallVector<CallInst *, 4 >
3033- GenXPatternMatch::getFullOverrideChain (CallInst *Inst) {
3034- using namespace GenXIntrinsic ::GenXRegion;
3035- IGC_ASSERT (GenXIntrinsic::isWrRegion (Inst));
3036-
3037- // Stores the intevals, that are overwritten by each particular wrregion
3038- IntervalMapTy Intervals (Alloc);
3039- SmallVector<CallInst *, 4 > Chain;
3040-
3041- auto *Curr = dyn_cast<CallInst>(Inst->getArgOperand (NewValueOperandNum));
3042- if (!Curr)
3043- return {};
3044-
3045- auto *Ty = Curr->getType ();
3046- auto Size = DL->getTypeStoreSize (Ty);
3047-
3048- bool IsOverwrite = false ;
3049- while (!IsOverwrite && Curr && GenXIntrinsic::isWrRegion (Curr)) {
3050- if (VisitedWrRegions.contains (Curr))
3051- break ;
3052-
3053- LLVM_DEBUG (dbgs () << " Looking for wrregion chain: " << *Curr << " \n " );
3054- auto R = genx::makeRegionWithOffset (Curr);
3055- if (R.Indirect || !R.isContiguous ())
3056- break ;
3057-
3058- auto *Cond = dyn_cast<Constant>(Curr->getOperand (PredicateOperandNum));
3059- if (!Cond || !Cond->isAllOnesValue ())
3060- break ;
3061-
3062- unsigned Start = R.Offset ;
3063- unsigned End = Start + R.NumElements * R.ElementBytes ;
3064-
3065- // Do not allow overlapping regions
3066- if (Intervals.overlaps (Start, End))
3067- break ;
3068-
3069- LLVM_DEBUG (dbgs () << " Overwriting [" << Start << " , " << End << " )\n " );
3070- Intervals.insert (Start, End, true );
3071-
3072- VisitedWrRegions.insert (Curr);
3073- Chain.push_back (Curr);
3074-
3075- auto *Next = Curr->getOperand (OldValueOperandNum);
3076-
3077- // Whole region is covered, no need to continue
3078- auto FirstIt = Intervals.begin ();
3079- IsOverwrite = isa<UndefValue>(Next) ||
3080- (FirstIt.start () == 0 && FirstIt.stop () == Size);
3081-
3082- Curr = dyn_cast<CallInst>(Next);
3083- if (!Next->hasOneUse ())
3084- break ;
3085- }
3086-
3087- if (!IsOverwrite)
3088- return {};
3089-
3090- return Chain;
3091- }
3092-
30932940// Simplify (trunc.sat (ext V)) to (trunc.sat V). Even if the source and
30942941// destination has the same type, it's incorrect to fold them into V directly
30952942// as the saturation is necessary.
0 commit comments