@@ -45,12 +45,6 @@ static cl::opt<bool> WidenLoads(
4545 cl::ReallyHidden,
4646 cl::init(false ));
4747
48- static cl::opt<bool > Widen16BitOps (
49- " amdgpu-codegenprepare-widen-16-bit-ops" ,
50- cl::desc (
51- " Widen uniform 16-bit instructions to 32-bit in AMDGPUCodeGenPrepare" ),
52- cl::ReallyHidden, cl::init(false ));
53-
5448static cl::opt<bool >
5549 BreakLargePHIs (" amdgpu-codegenprepare-break-large-phis" ,
5650 cl::desc (" Break large PHI nodes for DAGISel" ),
@@ -150,18 +144,6 @@ class AMDGPUCodeGenPrepareImpl
150144
151145 bool canBreakPHINode (const PHINode &I);
152146
153- // / Copies exact/nsw/nuw flags (if any) from binary operation \p I to
154- // / binary operation \p V.
155- // /
156- // / \returns Binary operation \p V.
157- // / \returns \p T's base element bit width.
158- unsigned getBaseElementBitWidth (const Type *T) const ;
159-
160- // / \returns Equivalent 32 bit integer type for given type \p T. For example,
161- // / if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
162- // / is returned.
163- Type *getI32Ty (IRBuilder<> &B, const Type *T) const ;
164-
165147 // / \returns True if binary operation \p I is a signed binary operation, false
166148 // / otherwise.
167149 bool isSigned (const BinaryOperator &I) const ;
@@ -170,10 +152,6 @@ class AMDGPUCodeGenPrepareImpl
170152 // / signed 'icmp' operation, false otherwise.
171153 bool isSigned (const SelectInst &I) const ;
172154
173- // / \returns True if type \p T needs to be promoted to 32 bit integer type,
174- // / false otherwise.
175- bool needsPromotionToI32 (const Type *T) const ;
176-
177155 // / Return true if \p T is a legal scalar floating point type.
178156 bool isLegalFloatingTy (const Type *T) const ;
179157
@@ -188,52 +166,6 @@ class AMDGPUCodeGenPrepareImpl
188166 computeKnownFPClass (V, fcSubnormal, CtxI).isKnownNeverSubnormal ();
189167 }
190168
191- // / Promotes uniform binary operation \p I to equivalent 32 bit binary
192- // / operation.
193- // /
194- // / \details \p I's base element bit width must be greater than 1 and less
195- // / than or equal 16. Promotion is done by sign or zero extending operands to
196- // / 32 bits, replacing \p I with equivalent 32 bit binary operation, and
197- // / truncating the result of 32 bit binary operation back to \p I's original
198- // / type. Division operation is not promoted.
199- // /
200- // / \returns True if \p I is promoted to equivalent 32 bit binary operation,
201- // / false otherwise.
202- bool promoteUniformOpToI32 (BinaryOperator &I) const ;
203-
204- // / Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
205- // /
206- // / \details \p I's base element bit width must be greater than 1 and less
207- // / than or equal 16. Promotion is done by sign or zero extending operands to
208- // / 32 bits, and replacing \p I with 32 bit 'icmp' operation.
209- // /
210- // / \returns True.
211- bool promoteUniformOpToI32 (ICmpInst &I) const ;
212-
213- // / Promotes uniform 'select' operation \p I to 32 bit 'select'
214- // / operation.
215- // /
216- // / \details \p I's base element bit width must be greater than 1 and less
217- // / than or equal 16. Promotion is done by sign or zero extending operands to
218- // / 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
219- // / result of 32 bit 'select' operation back to \p I's original type.
220- // /
221- // / \returns True.
222- bool promoteUniformOpToI32 (SelectInst &I) const ;
223-
224- // / Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
225- // / intrinsic.
226- // /
227- // / \details \p I's base element bit width must be greater than 1 and less
228- // / than or equal 16. Promotion is done by zero extending the operand to 32
229- // / bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
230- // / result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
231- // / shift amount is 32 minus \p I's base element bit width), and truncating
232- // / the result of the shift operation back to \p I's original type.
233- // /
234- // / \returns True.
235- bool promoteUniformBitreverseToI32 (IntrinsicInst &I) const ;
236-
237169 // / \returns The minimum number of bits needed to store the value of \Op as an
238170 // / unsigned integer. Truncating to this size and then zero-extending to
239171 // / the original will not change the value.
@@ -320,13 +252,11 @@ class AMDGPUCodeGenPrepareImpl
320252 bool visitInstruction (Instruction &I) { return false ; }
321253 bool visitBinaryOperator (BinaryOperator &I);
322254 bool visitLoadInst (LoadInst &I);
323- bool visitICmpInst (ICmpInst &I);
324255 bool visitSelectInst (SelectInst &I);
325256 bool visitPHINode (PHINode &I);
326257 bool visitAddrSpaceCastInst (AddrSpaceCastInst &I);
327258
328259 bool visitIntrinsicInst (IntrinsicInst &I);
329- bool visitBitreverseIntrinsicInst (IntrinsicInst &I);
330260 bool visitFMinLike (IntrinsicInst &I);
331261 bool visitSqrt (IntrinsicInst &I);
332262 bool run ();
@@ -380,22 +310,6 @@ bool AMDGPUCodeGenPrepareImpl::run() {
380310 return MadeChange;
381311}
382312
383- unsigned AMDGPUCodeGenPrepareImpl::getBaseElementBitWidth (const Type *T) const {
384- assert (needsPromotionToI32 (T) && " T does not need promotion to i32" );
385-
386- if (T->isIntegerTy ())
387- return T->getIntegerBitWidth ();
388- return cast<VectorType>(T)->getElementType ()->getIntegerBitWidth ();
389- }
390-
391- Type *AMDGPUCodeGenPrepareImpl::getI32Ty (IRBuilder<> &B, const Type *T) const {
392- assert (needsPromotionToI32 (T) && " T does not need promotion to i32" );
393-
394- if (T->isIntegerTy ())
395- return B.getInt32Ty ();
396- return FixedVectorType::get (B.getInt32Ty (), cast<FixedVectorType>(T));
397- }
398-
399313bool AMDGPUCodeGenPrepareImpl::isSigned (const BinaryOperator &I) const {
400314 return I.getOpcode () == Instruction::AShr ||
401315 I.getOpcode () == Instruction::SDiv || I.getOpcode () == Instruction::SRem;
@@ -406,59 +320,11 @@ bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const {
406320 cast<ICmpInst>(I.getOperand (0 ))->isSigned ();
407321}
408322
409- bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32 (const Type *T) const {
410- if (!Widen16BitOps)
411- return false ;
412-
413- const IntegerType *IntTy = dyn_cast<IntegerType>(T);
414- if (IntTy && IntTy->getBitWidth () > 1 && IntTy->getBitWidth () <= 16 )
415- return true ;
416-
417- if (const VectorType *VT = dyn_cast<VectorType>(T)) {
418- // TODO: The set of packed operations is more limited, so may want to
419- // promote some anyway.
420- if (ST.hasVOP3PInsts ())
421- return false ;
422-
423- return needsPromotionToI32 (VT->getElementType ());
424- }
425-
426- return false ;
427- }
428-
429323bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy (const Type *Ty) const {
430324 return Ty->isFloatTy () || Ty->isDoubleTy () ||
431325 (Ty->isHalfTy () && ST.has16BitInsts ());
432326}
433327
434- // Return true if the op promoted to i32 should have nsw set.
435- static bool promotedOpIsNSW (const Instruction &I) {
436- switch (I.getOpcode ()) {
437- case Instruction::Shl:
438- case Instruction::Add:
439- case Instruction::Sub:
440- return true ;
441- case Instruction::Mul:
442- return I.hasNoUnsignedWrap ();
443- default :
444- return false ;
445- }
446- }
447-
448- // Return true if the op promoted to i32 should have nuw set.
449- static bool promotedOpIsNUW (const Instruction &I) {
450- switch (I.getOpcode ()) {
451- case Instruction::Shl:
452- case Instruction::Add:
453- case Instruction::Mul:
454- return true ;
455- case Instruction::Sub:
456- return I.hasNoUnsignedWrap ();
457- default :
458- return false ;
459- }
460- }
461-
462328bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad (LoadInst &I) const {
463329 Type *Ty = I.getType ();
464330 int TySize = DL.getTypeSizeInBits (Ty);
@@ -467,134 +333,6 @@ bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
467333 return I.isSimple () && TySize < 32 && Alignment >= 4 && UA.isUniform (&I);
468334}
469335
470- bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32 (BinaryOperator &I) const {
471- assert (needsPromotionToI32 (I.getType ()) &&
472- " I does not need promotion to i32" );
473-
474- if (I.getOpcode () == Instruction::SDiv ||
475- I.getOpcode () == Instruction::UDiv ||
476- I.getOpcode () == Instruction::SRem ||
477- I.getOpcode () == Instruction::URem)
478- return false ;
479-
480- IRBuilder<> Builder (&I);
481- Builder.SetCurrentDebugLocation (I.getDebugLoc ());
482-
483- Type *I32Ty = getI32Ty (Builder, I.getType ());
484- Value *ExtOp0 = nullptr ;
485- Value *ExtOp1 = nullptr ;
486- Value *ExtRes = nullptr ;
487- Value *TruncRes = nullptr ;
488-
489- if (isSigned (I)) {
490- ExtOp0 = Builder.CreateSExt (I.getOperand (0 ), I32Ty);
491- ExtOp1 = Builder.CreateSExt (I.getOperand (1 ), I32Ty);
492- } else {
493- ExtOp0 = Builder.CreateZExt (I.getOperand (0 ), I32Ty);
494- ExtOp1 = Builder.CreateZExt (I.getOperand (1 ), I32Ty);
495- }
496-
497- ExtRes = Builder.CreateBinOp (I.getOpcode (), ExtOp0, ExtOp1);
498- if (Instruction *Inst = dyn_cast<Instruction>(ExtRes)) {
499- if (promotedOpIsNSW (cast<Instruction>(I)))
500- Inst->setHasNoSignedWrap ();
501-
502- if (promotedOpIsNUW (cast<Instruction>(I)))
503- Inst->setHasNoUnsignedWrap ();
504-
505- if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I))
506- Inst->setIsExact (ExactOp->isExact ());
507- }
508-
509- TruncRes = Builder.CreateTrunc (ExtRes, I.getType ());
510-
511- I.replaceAllUsesWith (TruncRes);
512- I.eraseFromParent ();
513-
514- return true ;
515- }
516-
517- bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32 (ICmpInst &I) const {
518- assert (needsPromotionToI32 (I.getOperand (0 )->getType ()) &&
519- " I does not need promotion to i32" );
520-
521- IRBuilder<> Builder (&I);
522- Builder.SetCurrentDebugLocation (I.getDebugLoc ());
523-
524- Type *I32Ty = getI32Ty (Builder, I.getOperand (0 )->getType ());
525- Value *ExtOp0 = nullptr ;
526- Value *ExtOp1 = nullptr ;
527- Value *NewICmp = nullptr ;
528-
529- if (I.isSigned ()) {
530- ExtOp0 = Builder.CreateSExt (I.getOperand (0 ), I32Ty);
531- ExtOp1 = Builder.CreateSExt (I.getOperand (1 ), I32Ty);
532- } else {
533- ExtOp0 = Builder.CreateZExt (I.getOperand (0 ), I32Ty);
534- ExtOp1 = Builder.CreateZExt (I.getOperand (1 ), I32Ty);
535- }
536- NewICmp = Builder.CreateICmp (I.getPredicate (), ExtOp0, ExtOp1);
537-
538- I.replaceAllUsesWith (NewICmp);
539- I.eraseFromParent ();
540-
541- return true ;
542- }
543-
544- bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32 (SelectInst &I) const {
545- assert (needsPromotionToI32 (I.getType ()) &&
546- " I does not need promotion to i32" );
547-
548- IRBuilder<> Builder (&I);
549- Builder.SetCurrentDebugLocation (I.getDebugLoc ());
550-
551- Type *I32Ty = getI32Ty (Builder, I.getType ());
552- Value *ExtOp1 = nullptr ;
553- Value *ExtOp2 = nullptr ;
554- Value *ExtRes = nullptr ;
555- Value *TruncRes = nullptr ;
556-
557- if (isSigned (I)) {
558- ExtOp1 = Builder.CreateSExt (I.getOperand (1 ), I32Ty);
559- ExtOp2 = Builder.CreateSExt (I.getOperand (2 ), I32Ty);
560- } else {
561- ExtOp1 = Builder.CreateZExt (I.getOperand (1 ), I32Ty);
562- ExtOp2 = Builder.CreateZExt (I.getOperand (2 ), I32Ty);
563- }
564- ExtRes = Builder.CreateSelect (I.getOperand (0 ), ExtOp1, ExtOp2);
565- TruncRes = Builder.CreateTrunc (ExtRes, I.getType ());
566-
567- I.replaceAllUsesWith (TruncRes);
568- I.eraseFromParent ();
569-
570- return true ;
571- }
572-
573- bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32 (
574- IntrinsicInst &I) const {
575- assert (I.getIntrinsicID () == Intrinsic::bitreverse &&
576- " I must be bitreverse intrinsic" );
577- assert (needsPromotionToI32 (I.getType ()) &&
578- " I does not need promotion to i32" );
579-
580- IRBuilder<> Builder (&I);
581- Builder.SetCurrentDebugLocation (I.getDebugLoc ());
582-
583- Type *I32Ty = getI32Ty (Builder, I.getType ());
584- Value *ExtOp = Builder.CreateZExt (I.getOperand (0 ), I32Ty);
585- Value *ExtRes =
586- Builder.CreateIntrinsic (Intrinsic::bitreverse, {I32Ty}, {ExtOp});
587- Value *LShrOp =
588- Builder.CreateLShr (ExtRes, 32 - getBaseElementBitWidth (I.getType ()));
589- Value *TruncRes =
590- Builder.CreateTrunc (LShrOp, I.getType ());
591-
592- I.replaceAllUsesWith (TruncRes);
593- I.eraseFromParent ();
594-
595- return true ;
596- }
597-
598336unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned (Value *Op) const {
599337 return computeKnownBits (Op, DL, AC).countMaxActiveBits ();
600338}
@@ -1635,10 +1373,6 @@ bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
16351373 if (foldBinOpIntoSelect (I))
16361374 return true ;
16371375
1638- if (ST.has16BitInsts () && needsPromotionToI32 (I.getType ()) &&
1639- UA.isUniform (&I) && promoteUniformOpToI32 (I))
1640- return true ;
1641-
16421376 if (UseMul24Intrin && replaceMulWithMul24 (I))
16431377 return true ;
16441378 if (tryNarrowMathIfNoOverflow (&I, ST.getTargetLowering (),
@@ -1770,29 +1504,13 @@ bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
17701504 return false ;
17711505}
17721506
1773- bool AMDGPUCodeGenPrepareImpl::visitICmpInst (ICmpInst &I) {
1774- bool Changed = false ;
1775-
1776- if (ST.has16BitInsts () && needsPromotionToI32 (I.getOperand (0 )->getType ()) &&
1777- UA.isUniform (&I))
1778- Changed |= promoteUniformOpToI32 (I);
1779-
1780- return Changed;
1781- }
1782-
17831507bool AMDGPUCodeGenPrepareImpl::visitSelectInst (SelectInst &I) {
17841508 Value *Cond = I.getCondition ();
17851509 Value *TrueVal = I.getTrueValue ();
17861510 Value *FalseVal = I.getFalseValue ();
17871511 Value *CmpVal;
17881512 CmpPredicate Pred;
17891513
1790- if (ST.has16BitInsts () && needsPromotionToI32 (I.getType ())) {
1791- if (UA.isUniform (&I))
1792- return promoteUniformOpToI32 (I);
1793- return false ;
1794- }
1795-
17961514 // Match fract pattern with nan check.
17971515 if (!match (Cond, m_FCmp (Pred, m_Value (CmpVal), m_NonNaN ())))
17981516 return false ;
@@ -2196,8 +1914,6 @@ bool AMDGPUCodeGenPrepareImpl::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
21961914
21971915bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst (IntrinsicInst &I) {
21981916 switch (I.getIntrinsicID ()) {
2199- case Intrinsic::bitreverse:
2200- return visitBitreverseIntrinsicInst (I);
22011917 case Intrinsic::minnum:
22021918 case Intrinsic::minimumnum:
22031919 case Intrinsic::minimum:
@@ -2209,16 +1925,6 @@ bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
22091925 }
22101926}
22111927
2212- bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst (IntrinsicInst &I) {
2213- bool Changed = false ;
2214-
2215- if (ST.has16BitInsts () && needsPromotionToI32 (I.getType ()) &&
2216- UA.isUniform (&I))
2217- Changed |= promoteUniformBitreverseToI32 (I);
2218-
2219- return Changed;
2220- }
2221-
22221928// / Match non-nan fract pattern.
22231929// / minnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
22241930// / minimumnum(fsub(x, floor(x)), nextafter(1.0, -1.0))
0 commit comments