@@ -136,7 +136,6 @@ class IRPromoter {
136136
137137class TypePromotionImpl {
138138 unsigned TypeSize = 0 ;
139- const TargetLowering *TLI = nullptr ;
140139 LLVMContext *Ctx = nullptr ;
141140 unsigned RegisterBitWidth = 0 ;
142141 SmallPtrSet<Value *, 16 > AllVisited;
@@ -273,58 +272,64 @@ bool TypePromotionImpl::isSink(Value *V) {
273272
274273// / Return whether this instruction can safely wrap.
275274bool TypePromotionImpl::isSafeWrap (Instruction *I) {
276- // We can support a potentially wrapping Add/Sub instruction (I) if:
275+ // We can support a potentially wrapping instruction (I) if:
277276 // - It is only used by an unsigned icmp.
278277 // - The icmp uses a constant.
278+ // - The wrapping value (I) is decreasing, i.e would underflow - wrapping
279+ // around zero to become a larger number than before.
279280 // - The wrapping instruction (I) also uses a constant.
280281 //
281- // This a common pattern emitted to check if a value is within a range.
282+ // We can then use the two constants to calculate whether the result would
283+ // wrap in respect to itself in the original bitwidth. If it doesn't wrap,
284+ // just underflows the range, the icmp would give the same result whether the
285+ // result has been truncated or not. We calculate this by:
286+ // - Zero extending both constants, if needed, to RegisterBitWidth.
287+ // - Take the absolute value of I's constant, adding this to the icmp const.
288+ // - Check that this value is not out of range for small type. If it is, it
289+ // means that it has underflowed enough to wrap around the icmp constant.
282290 //
283291 // For example:
284292 //
285- // %sub = sub i8 %a, C1
286- // %cmp = icmp ule i8 %sub, C2
287- //
288- // or
289- //
290- // %add = add i8 %a, C1
291- // %cmp = icmp ule i8 %add, C2.
292- //
293- // We will treat an add as though it were a subtract by -C1. To promote
294- // the Add/Sub we will zero extend the LHS and the subtracted amount. For Add,
295- // this means we need to negate the constant, zero extend to RegisterBitWidth,
296- // and negate in the larger type.
293+ // %sub = sub i8 %a, 2
294+ // %cmp = icmp ule i8 %sub, 254
297295 //
298- // This will produce a value in the range [-zext(C1), zext(X)-zext(C1)] where
299- // C1 is the subtracted amount. This is either a small unsigned number or a
300- // large unsigned number in the promoted type.
296+ // If %a = 0, %sub = -2 == FE == 254
297+ // But if this is evalulated as a i32
298+ // %sub = -2 == FF FF FF FE == 4294967294
299+ // So the unsigned compares (i8 and i32) would not yield the same result.
301300 //
302- // Now we need to correct the compare constant C2. Values >= C1 in the
303- // original add result range have been remapped to large values in the
304- // promoted range. If the compare constant fell into this range we need to
305- // remap it as well. We can do this as -(zext(-C2)).
301+ // Another way to look at it is:
302+ // %a - 2 <= 254
303+ // %a + 2 <= 254 + 2
304+ // %a <= 256
305+ // And we can't represent 256 in the i8 format, so we don't support it.
306306 //
307- // For example :
307+ // Whereas :
308308 //
309- // %sub = sub i8 %a, 2
309+ // %sub i8 %a, 1
310310 // %cmp = icmp ule i8 %sub, 254
311311 //
312- // becomes
312+ // If %a = 0, %sub = -1 == FF == 255
313+ // As i32:
314+ // %sub = -1 == FF FF FF FF == 4294967295
313315 //
314- // %zext = zext %a to i32
315- // %sub = sub i32 %zext, 2
316- // %cmp = icmp ule i32 %sub, 4294967294
316+ // In this case, the unsigned compare results would be the same and this
317+ // would also be true for ult, uge and ugt:
318+ // - (255 < 254) == (0xFFFFFFFF < 254) == false
319+ // - (255 <= 254) == (0xFFFFFFFF <= 254) == false
320+ // - (255 > 254) == (0xFFFFFFFF > 254) == true
321+ // - (255 >= 254) == (0xFFFFFFFF >= 254) == true
317322 //
318- // Another example :
323+ // To demonstrate why we can't handle increasing values :
319324 //
320- // %sub = sub i8 %a, 1
321- // %cmp = icmp ule i8 %sub, 254
325+ // %add = add i8 %a, 2
326+ // %cmp = icmp ult i8 %add, 127
322327 //
323- // becomes
328+ // If %a = 254, %add = 256 == (i8 1)
329+ // As i32:
330+ // %add = 256
324331 //
325- // %zext = zext %a to i32
326- // %sub = sub i32 %zext, 1
327- // %cmp = icmp ule i32 %sub, 254
332+ // (1 < 127) != (256 < 127)
328333
329334 unsigned Opc = I->getOpcode ();
330335 if (Opc != Instruction::Add && Opc != Instruction::Sub)
@@ -351,23 +356,15 @@ bool TypePromotionImpl::isSafeWrap(Instruction *I) {
351356 APInt OverflowConst = cast<ConstantInt>(I->getOperand (1 ))->getValue ();
352357 if (Opc == Instruction::Sub)
353358 OverflowConst = -OverflowConst;
354-
355- // If the constant is positive, we will end up filling the promoted bits with
356- // all 1s. Make sure that results in a cheap add constant.
357- if (!OverflowConst.isNonPositive ()) {
358- // We don't have the true promoted width, just use 64 so we can create an
359- // int64_t for the isLegalAddImmediate call.
360- if (OverflowConst.getBitWidth () >= 64 )
361- return false ;
362-
363- APInt NewConst = -((-OverflowConst).zext (64 ));
364- if (!TLI->isLegalAddImmediate (NewConst.getSExtValue ()))
365- return false ;
366- }
359+ if (!OverflowConst.isNonPositive ())
360+ return false ;
367361
368362 SafeWrap.insert (I);
369363
370- if (OverflowConst.ugt (ICmpConst)) {
364+ // Using C1 = OverflowConst and C2 = ICmpConst, we can either prove that:
365+ // zext(x) + sext(C1) <u zext(C2) if C1 < 0 and C1 >s C2
366+ // zext(x) + sext(C1) <u sext(C2) if C1 < 0 and C1 <=s C2
367+ if (OverflowConst.sgt (ICmpConst)) {
371368 LLVM_DEBUG (dbgs () << " IR Promotion: Allowing safe overflow for sext "
372369 << " const of " << *I << " \n " );
373370 return true ;
@@ -490,24 +487,18 @@ void IRPromoter::PromoteTree() {
490487 continue ;
491488
492489 if (auto *Const = dyn_cast<ConstantInt>(Op)) {
493- // For subtract, we only need to zext the constant. We only put it in
490+ // For subtract, we don't need to sext the constant. We only put it in
494491 // SafeWrap because SafeWrap.size() is used elsewhere.
495- // For Add and ICmp we need to find how far the constant is from the
496- // top of its original unsigned range and place it the same distance
497- // from the top of its new unsigned range. We can do this by negating
498- // the constant, zero extending it, then negating in the new type.
499- APInt NewConst;
500- if (SafeWrap.contains (I)) {
501- if (I->getOpcode () == Instruction::ICmp)
502- NewConst = -((-Const->getValue ()).zext (PromotedWidth));
503- else if (I->getOpcode () == Instruction::Add && i == 1 )
504- NewConst = -((-Const->getValue ()).zext (PromotedWidth));
505- else
506- NewConst = Const->getValue ().zext (PromotedWidth);
507- } else
508- NewConst = Const->getValue ().zext (PromotedWidth);
509-
510- I->setOperand (i, ConstantInt::get (Const->getContext (), NewConst));
492+ // For cmp, we need to sign extend a constant appearing in either
493+ // operand. For add, we should only sign extend the RHS.
494+ Constant *NewConst =
495+ ConstantInt::get (Const->getContext (),
496+ (SafeWrap.contains (I) &&
497+ (I->getOpcode () == Instruction::ICmp || i == 1 ) &&
498+ I->getOpcode () != Instruction::Sub)
499+ ? Const->getValue ().sext (PromotedWidth)
500+ : Const->getValue ().zext (PromotedWidth));
501+ I->setOperand (i, NewConst);
511502 } else if (isa<UndefValue>(Op))
512503 I->setOperand (i, ConstantInt::get (ExtTy, 0 ));
513504 }
@@ -926,7 +917,7 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
926917 bool MadeChange = false ;
927918 const DataLayout &DL = F.getParent ()->getDataLayout ();
928919 const TargetSubtargetInfo *SubtargetInfo = TM->getSubtargetImpl (F);
929- TLI = SubtargetInfo->getTargetLowering ();
920+ const TargetLowering * TLI = SubtargetInfo->getTargetLowering ();
930921 RegisterBitWidth =
931922 TTI.getRegisterBitWidth (TargetTransformInfo::RGK_Scalar).getFixedValue ();
932923 Ctx = &F.getParent ()->getContext ();
0 commit comments