@@ -145,7 +145,8 @@ class MVEGatherScatterLowering : public FunctionPass {
145145 // Optimise the base and offsets of the given address
146146 bool optimiseAddress (Value *Address, BasicBlock *BB, LoopInfo *LI);
147147 // Try to fold consecutive geps together into one
148- Value *foldGEP (GetElementPtrInst *GEP, Value *&Offsets, IRBuilder<> &Builder);
148+ Value *foldGEP (GetElementPtrInst *GEP, Value *&Offsets, unsigned &Scale,
149+ IRBuilder<> &Builder);
149150 // Check whether these offsets could be moved out of the loop they're in
150151 bool optimiseOffsets (Value *Offsets, BasicBlock *BB, LoopInfo *LI);
151152 // Pushes the given add out of the loop
@@ -1103,8 +1104,8 @@ bool MVEGatherScatterLowering::optimiseOffsets(Value *Offsets, BasicBlock *BB,
11031104 return true ;
11041105}
11051106
1106- static Value *CheckAndCreateOffsetAdd (Value *X, Value *Y , Value *GEP ,
1107- IRBuilder<> &Builder) {
1107+ static Value *CheckAndCreateOffsetAdd (Value *X, unsigned ScaleX , Value *Y ,
1108+ unsigned ScaleY, IRBuilder<> &Builder) {
11081109 // Splat the non-vector value to a vector of the given type - if the value is
11091110 // a constant (and its value isn't too big), we can even use this opportunity
11101111 // to scale it to the size of the vector elements
@@ -1156,40 +1157,49 @@ static Value *CheckAndCreateOffsetAdd(Value *X, Value *Y, Value *GEP,
11561157 ConstantInt *ConstYEl =
11571158 dyn_cast<ConstantInt>(ConstY->getAggregateElement (i));
11581159 if (!ConstXEl || !ConstYEl ||
1159- ConstXEl->getZExtValue () + ConstYEl->getZExtValue () >=
1160+ ConstXEl->getZExtValue () * ScaleX +
1161+ ConstYEl->getZExtValue () * ScaleY >=
11601162 (unsigned )(1 << (TargetElemSize - 1 )))
11611163 return nullptr ;
11621164 }
11631165 }
11641166
1165- Value *Add = Builder.CreateAdd (X, Y);
1167+ Value *XScale = Builder.CreateVectorSplat (
1168+ XElType->getNumElements (),
1169+ Builder.getIntN (XElType->getScalarSizeInBits (), ScaleX));
1170+ Value *YScale = Builder.CreateVectorSplat (
1171+ YElType->getNumElements (),
1172+ Builder.getIntN (YElType->getScalarSizeInBits (), ScaleY));
1173+ Value *Add = Builder.CreateAdd (Builder.CreateMul (X, XScale),
1174+ Builder.CreateMul (Y, YScale));
11661175
1167- FixedVectorType *GEPType = cast<FixedVectorType>(GEP->getType ());
1168- if (checkOffsetSize (Add, GEPType->getNumElements ()))
1176+ if (checkOffsetSize (Add, XElType->getNumElements ()))
11691177 return Add;
11701178 else
11711179 return nullptr ;
11721180}
11731181
11741182Value *MVEGatherScatterLowering::foldGEP (GetElementPtrInst *GEP,
1175- Value *&Offsets,
1183+ Value *&Offsets, unsigned &Scale,
11761184 IRBuilder<> &Builder) {
11771185 Value *GEPPtr = GEP->getPointerOperand ();
11781186 Offsets = GEP->getOperand (1 );
1187+ Scale = DL->getTypeAllocSize (GEP->getSourceElementType ());
11791188 // We only merge geps with constant offsets, because only for those
11801189 // we can make sure that we do not cause an overflow
1181- if (!isa<Constant>(Offsets))
1190+ if (GEP-> getNumIndices () != 1 || !isa<Constant>(Offsets))
11821191 return nullptr ;
1183- GetElementPtrInst *BaseGEP;
1184- if ((BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr))) {
1192+ if (GetElementPtrInst *BaseGEP = dyn_cast<GetElementPtrInst>(GEPPtr)) {
11851193 // Merge the two geps into one
1186- Value *BaseBasePtr = foldGEP (BaseGEP, Offsets, Builder);
1194+ Value *BaseBasePtr = foldGEP (BaseGEP, Offsets, Scale, Builder);
11871195 if (!BaseBasePtr)
11881196 return nullptr ;
1189- Offsets =
1190- CheckAndCreateOffsetAdd (Offsets, GEP->getOperand (1 ), GEP, Builder);
1197+ Offsets = CheckAndCreateOffsetAdd (
1198+ Offsets, Scale, GEP->getOperand (1 ),
1199+ DL->getTypeAllocSize (GEP->getSourceElementType ()), Builder);
11911200 if (Offsets == nullptr )
11921201 return nullptr ;
1202+ Scale = 1 ; // Scale is always an i8 at this point.
11931203 return BaseBasePtr;
11941204 }
11951205 return GEPPtr;
@@ -1206,15 +1216,24 @@ bool MVEGatherScatterLowering::optimiseAddress(Value *Address, BasicBlock *BB,
12061216 Builder.SetInsertPoint (GEP);
12071217 Builder.SetCurrentDebugLocation (GEP->getDebugLoc ());
12081218 Value *Offsets;
1209- Value *Base = foldGEP (GEP, Offsets, Builder);
1219+ unsigned Scale;
1220+ Value *Base = foldGEP (GEP, Offsets, Scale, Builder);
12101221 // We only want to merge the geps if there is a real chance that they can be
12111222 // used by an MVE gather; thus the offset has to have the correct size
12121223 // (always i32 if it is not of vector type) and the base has to be a
12131224 // pointer.
12141225 if (Offsets && Base && Base != GEP) {
1226+ assert (Scale == 1 && " Expected to fold GEP to a scale of 1" );
1227+ Type *BaseTy = Builder.getInt8PtrTy ();
1228+ if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType ()))
1229+ BaseTy = FixedVectorType::get (BaseTy, VecTy);
12151230 GetElementPtrInst *NewAddress = GetElementPtrInst::Create (
1216- GEP->getSourceElementType (), Base, Offsets, " gep.merged" , GEP);
1217- GEP->replaceAllUsesWith (NewAddress);
1231+ Builder.getInt8Ty (), Builder.CreateBitCast (Base, BaseTy), Offsets,
1232+ " gep.merged" , GEP);
1233+ LLVM_DEBUG (dbgs () << " Folded GEP: " << *GEP
1234+ << " \n new : " << *NewAddress << " \n " );
1235+ GEP->replaceAllUsesWith (
1236+ Builder.CreateBitCast (NewAddress, GEP->getType ()));
12181237 GEP = NewAddress;
12191238 Changed = true ;
12201239 }
0 commit comments