@@ -62,6 +62,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6262#define DEBUG_TYPE " GENX_PATTERN_MATCH"
6363#include " GenX.h"
6464#include " GenXConstants.h"
65+ #include " GenXLowering.h"
6566#include " GenXModule.h"
6667#include " GenXRegion.h"
6768#include " GenXSubtarget.h"
@@ -79,6 +80,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7980#include " llvm/IR/Dominators.h"
8081#include " llvm/IR/Function.h"
8182#include " llvm/IR/IRBuilder.h"
83+ #include " llvm/IR/InstIterator.h"
8284#include " llvm/IR/InstVisitor.h"
8385#include " llvm/IR/Instructions.h"
8486#include " llvm/IR/Intrinsics.h"
@@ -175,6 +177,8 @@ class GenXPatternMatch : public FunctionPass,
175177 bool simplifyNullDst (CallInst *Inst);
176178 // Transform logic operation with a mask from <N x iM> to <N/(32/M) x i32>
177179 bool extendMask (BinaryOperator *BO);
180+
181+ bool decomposeSdiv (Function *F);
178182};
179183
180184} // namespace
@@ -219,6 +223,8 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
219223 // Break big predicate variables and run after min/max pattern match.
220224 Changed |= decomposeSelect (&F);
221225
226+ Changed |= decomposeSdiv (&F);
227+
222228 return Changed;
223229}
224230
@@ -2123,6 +2129,190 @@ bool GenXPatternMatch::simplifyVolatileGlobals(Function *F) {
21232129 return Changed;
21242130}
21252131
2132+ // a helper routine for decomposeSdivPow2
2133+ // return a new ConstantVector with the same type as input vector, that consists
2134+ // of log2 of original vector;
2135+ // input vector consists of only positive integer
2136+ static Constant *getLog2Vector (const ConstantDataVector &C) {
2137+ VectorType *Ty = C.getType ();
2138+ SmallVector<Constant *, 4 > Elts;
2139+ for (int V = 0 ; V != C.getNumElements (); ++V) {
2140+ ConstantInt *Elt = dyn_cast<ConstantInt>(C.getElementAsConstant (V));
2141+ Constant *Log2 =
2142+ ConstantInt::get (Ty->getScalarType (), Elt->getValue ().logBase2 ());
2143+ Elts.push_back (Log2);
2144+ }
2145+ return ConstantVector::get (Elts);
2146+ }
2147+
2148+ // optimization path if second operand of sdiv is power of 2
2149+ // input:
2150+ // Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2151+ // Optimization for positive y:
2152+ // x / y = ashr( x + lshr( ashr(x, 31), 32 - log2(y)), log2(y))
2153+ static void decomposeSdivPow2 (Instruction &Sdiv,
2154+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2155+ const llvm::Twine Name = " genxSdivOpt" ;
2156+ Value *Op0 = Sdiv.getOperand (0 );
2157+ ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand (1 ));
2158+ IGC_ASSERT (Op1 != nullptr && " Error: Sdiv operand not const" );
2159+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Negative ()) &&
2160+ " Error: Sdiv operand non-positive" );
2161+ IGC_ASSERT (Sdiv.getType ()->isVectorTy () && " Error: Sdiv operand not vector" );
2162+ IGC_ASSERT (Sdiv.getType ()->getVectorElementType ()->isIntegerTy () &&
2163+ " Error: Sdiv operand not vector of int" );
2164+ IGC_ASSERT (Op1->getType ()->getVectorElementType ()->getIntegerBitWidth () ==
2165+ 32 &&
2166+ " Error: Sdiv vector element width not 32, may be wrong" );
2167+ IGC_ASSERT (PatternMatch::match (Op1, PatternMatch::m_Power2 ()) &&
2168+ " Error: Sdiv operand not power of 2" );
2169+
2170+ IRBuilder<> Builder (&Sdiv);
2171+ Builder.SetCurrentDebugLocation (Sdiv.getDebugLoc ());
2172+ unsigned OperandWidth = Op1->getType ()->getVectorNumElements ();
2173+ Constant *VecSignBit = ConstantDataVector::getSplat (
2174+ OperandWidth,
2175+ ConstantInt::get (
2176+ Op1->getType ()->getVectorElementType (),
2177+ Op0->getType ()->getVectorElementType ()->getIntegerBitWidth () - 1 ));
2178+ IGC_ASSERT (VecSignBit != nullptr && " Creating ConstantVector error" );
2179+ Constant *VecBitWidth = ConstantVector::getSplat (
2180+ OperandWidth,
2181+ ConstantInt::get (
2182+ Op1->getType ()->getVectorElementType (),
2183+ Op0->getType ()->getVectorElementType ()->getIntegerBitWidth ()));
2184+ IGC_ASSERT (VecBitWidth != nullptr && " Creating ConstantVector error" );
2185+ Constant *Log2Op1 = getLog2Vector (*Op1);
2186+ IGC_ASSERT (Log2Op1 != nullptr && " getLog2Vector return null" );
2187+
2188+ Value *ShiftSize = Builder.CreateSub (VecBitWidth, Log2Op1, Name);
2189+ // if op0 is negative, Signdetect all ones, else all zeros
2190+ Value *SignDetect = Builder.CreateAShr (Op0, VecSignBit, Name);
2191+ Value *Addition = Builder.CreateLShr (SignDetect, ShiftSize, Name);
2192+ Value *NewRhs = Builder.CreateAdd (Op0, Addition, Name);
2193+ Value *Answer = Builder.CreateAShr (NewRhs, Log2Op1, Name);
2194+ Sdiv.replaceAllUsesWith (Answer);
2195+ ToErase.push_back (&Sdiv);
2196+ }
2197+
2198+ // optimization path if second operand of sdiv is not power of 2
2199+ // Warning: earlier must check that machine support int64 type
2200+ // input:
2201+ // Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2202+ // Optimization for positive y and positive x:
2203+ // x / y = (x *(0xFFFFFFFF / y + 1))>>32
2204+ // if positive y and negative x:
2205+ // x / y = (x * (0xFFFFFFFF / y + 1))>>32 + 1
2206+ // 0xFFFFFFFF = 2^32 -1
2207+ // The optimization can be found in Hackers Delight, chapter 10
2208+ static void
2209+ decomposeSdivNotPow2 (Instruction &Sdiv,
2210+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2211+ const llvm::Twine Name = " genxSdivOpt" ;
2212+
2213+ Value *Op0 = Sdiv.getOperand (0 );
2214+ ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand (1 ));
2215+ IGC_ASSERT (Op1 != nullptr && " Error: Sdiv operand not const" );
2216+ IGC_ASSERT (Sdiv.getType ()->isVectorTy () && " Error: Sdiv operand not vector" );
2217+ IGC_ASSERT (Sdiv.getType ()->getVectorElementType ()->isIntegerTy () &&
2218+ " Error: Sdiv operand not vector of int" );
2219+ IGC_ASSERT (Op1->getType ()->getVectorElementType ()->getIntegerBitWidth () ==
2220+ 32 &&
2221+ " Error: Sdiv vector element width not 32, optimization error" );
2222+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Negative ()) &&
2223+ " Error: Sdiv operand non-positive" );
2224+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Power2 ()) &&
2225+ " Error: Sdiv operand wrong optimization path" );
2226+ IRBuilder<> Builder (&Sdiv);
2227+ Builder.SetCurrentDebugLocation (Sdiv.getDebugLoc ());
2228+ unsigned OperandWidth = Op1->getType ()->getVectorNumElements ();
2229+
2230+ VectorType *Vec64ty = VectorType::get (Builder.getInt64Ty (), OperandWidth);
2231+ VectorType *Vec32ty = VectorType::get (Builder.getInt32Ty (), OperandWidth);
2232+ Value *Op0Wide = Builder.CreateSExt (Op0, Vec64ty, Name);
2233+ Value *Op1Wide = Builder.CreateSExt (Op1, Vec64ty, Name);
2234+
2235+ Constant *Vec1 = ConstantVector::getSplat (
2236+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), 1 ));
2237+ // max uint32 value
2238+ Constant *Vecmax32u = ConstantVector::getSplat (
2239+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), (1ull << 32 ) - 1 ));
2240+ Constant *Vec32 = ConstantVector::getSplat (
2241+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), 32 ));
2242+ Constant *Vec31 = ConstantVector::getSplat (
2243+ OperandWidth, ConstantInt::get (Builder.getInt32Ty (), 31 ));
2244+ // calculations
2245+ // should be a constant
2246+ Constant *Quotient =
2247+ dyn_cast<Constant>(Builder.CreateSDiv (Vecmax32u, Op1Wide, Name));
2248+ IGC_ASSERT (Quotient != nullptr && " Error: non-constant result" );
2249+ Value *SecondMultiplier = Builder.CreateAdd (Quotient, Vec1, Name);
2250+ Instruction *MulResult =
2251+ dyn_cast<Instruction>(Builder.CreateMul (Op0Wide, SecondMultiplier, Name));
2252+ Value *PositiveAnswer = Builder.CreateAShr (MulResult, Vec32, Name);
2253+ // narror back to 32 bits
2254+ Instruction *Narrow =
2255+ dyn_cast<Instruction>(Builder.CreateTrunc (PositiveAnswer, Vec32ty, Name));
2256+ // if the value is negative, we need to add 1
2257+ Value *Sign = Builder.CreateLShr (Op0, Vec31, Name);
2258+ Value *Answer = Builder.CreateAdd (Narrow, Sign, Name);
2259+ // genx is not support trunc and mul64 from GenXLowering,
2260+ // need to call GenXLowering functions that work with mul65 and trunc
2261+ genx::lowerMul64Impl (MulResult, ToErase);
2262+ genx::lowerTruncImpl (Narrow, ToErase);
2263+ Sdiv.replaceAllUsesWith (Answer);
2264+ ToErase.push_back (&Sdiv);
2265+ }
2266+
2267+ static bool
2268+ decomposeSdivInstruction (Instruction &Inst, const GenXSubtarget &ST,
2269+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2270+ if (!isa<SDivOperator>(Inst))
2271+ return false ; // not interesting
2272+ // from this point operands are signed
2273+ Value *Op1 = Inst.getOperand (1 );
2274+ if (!isa<Constant>(Op1))
2275+ return false ;
2276+ if (PatternMatch::match (Op1, PatternMatch::m_Negative ())) {
2277+ return false ; // the second operand is negative
2278+ }
2279+ if (!Inst.getType ()->isVectorTy ()) // not vector
2280+ return false ;
2281+ if (!Inst.getType ()
2282+ ->getVectorElementType ()
2283+ ->isIntegerTy ()) // not vector of int
2284+ return false ;
2285+ // wrong BitWidth, no ability to optimize
2286+ if (Inst.getType ()->getVectorElementType ()->getIntegerBitWidth () != 32 )
2287+ return false ;
2288+ if (PatternMatch::match (Op1, PatternMatch::m_Power2 ())) {
2289+ decomposeSdivPow2 (Inst, ToErase);
2290+ return true ;
2291+ }
2292+ // no support long long, need for optimization
2293+ if (!ST.hasLongLong ())
2294+ return false ;
2295+ decomposeSdivNotPow2 (Inst, ToErase);
2296+ return true ;
2297+ }
2298+
2299+ bool GenXPatternMatch::decomposeSdiv (Function *F) {
2300+ bool changed = false ;
2301+ const GenXSubtarget &ST = getAnalysis<TargetPassConfig>()
2302+ .getTM <GenXTargetMachine>()
2303+ .getGenXSubtarget ();
2304+
2305+ llvm::SmallVector<Instruction *, 8 > ToErase;
2306+ for (auto &I : llvm::instructions (F)) {
2307+ changed |= decomposeSdivInstruction (I, ST, ToErase);
2308+ }
2309+ // remove all ToErase inst
2310+ for (auto &Deleted : ToErase) {
2311+ Deleted->eraseFromParent ();
2312+ }
2313+ return changed;
2314+ }
2315+
21262316// Decompose predicate operand for large vector selects.
21272317bool GenXPatternMatch::decomposeSelect (Function *F) {
21282318 const GenXSubtarget *ST = &getAnalysis<TargetPassConfig>()
0 commit comments