Skip to content

Commit 7d1df2f

Browse files
committed
reduce number of cmp instr.
use multiple PHIs of scalar types instead of aggregate type. Change-Id: Ie6bc78eda41f454e9edeea7b3bf2c21da1a89693
1 parent cd23298 commit 7d1df2f

File tree

4 files changed

+136
-125
lines changed

4 files changed

+136
-125
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6417,7 +6417,7 @@ bool CodeGenPrepare::optimizeUMulWithOverflow(Instruction *I) {
64176417
unsigned VTHalfBitWidth = VTBitWidth / 2;
64186418
auto *LegalTy = IntegerType::getIntNTy(I->getContext(), VTHalfBitWidth);
64196419

6420-
// Skip the optimizaiton if the type with HalfBitWidth is not legal for the
6420+
// Skip the optimization if the type with HalfBitWidth is not legal for the
64216421
// target.
64226422
if (TLI->getTypeAction(I->getContext(), TLI->getValueType(*DL, LegalTy)) !=
64236423
TargetLowering::TypeLegal)
@@ -6464,32 +6464,40 @@ bool CodeGenPrepare::optimizeUMulWithOverflow(Instruction *I) {
64646464
auto *ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
64656465
auto *ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
64666466
auto *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.no.overflow");
6467-
StructType *STy = StructType::get(
6468-
I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
6469-
Value *StructValNoOverflow = PoisonValue::get(STy);
6470-
StructValNoOverflow =
6471-
Builder.CreateInsertValue(StructValNoOverflow, Mul, {0});
6472-
StructValNoOverflow = Builder.CreateInsertValue(
6473-
StructValNoOverflow, ConstantInt::getFalse(I->getContext()), {1});
64746467
Builder.CreateBr(OverflowResBB);
64756468

64766469
//------------------------------------------------------------------------------
64776470
// BB overflow.res:
64786471
Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6479-
auto *PHINode = Builder.CreatePHI(STy, 2);
6480-
PHINode->addIncoming(StructValNoOverflow, NoOverflowBB);
6472+
auto *PHINode1 = Builder.CreatePHI(Ty, 2);
6473+
PHINode1->addIncoming(Mul, NoOverflowBB);
6474+
auto *PHINode2 =
6475+
Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6476+
PHINode2->addIncoming(ConstantInt::getFalse(I->getContext()), NoOverflowBB);
64816477

6478+
StructType *STy = StructType::get(
6479+
I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
6480+
Value *StructValOverflowRes = PoisonValue::get(STy);
6481+
StructValOverflowRes =
6482+
Builder.CreateInsertValue(StructValOverflowRes, PHINode1, {0});
6483+
StructValOverflowRes =
6484+
Builder.CreateInsertValue(StructValOverflowRes, PHINode2, {1});
64826485
// Before moving the mul.overflow intrinsic to the overflowBB, replace all its
6483-
// uses by PHINode.
6484-
I->replaceAllUsesWith(PHINode);
6486+
// uses by StructValOverflowRes.
6487+
I->replaceAllUsesWith(StructValOverflowRes);
6488+
I->removeFromParent();
64856489

64866490
// BB overflow:
6487-
PHINode->addIncoming(I, OverflowBB);
6488-
I->removeFromParent();
64896491
I->insertInto(OverflowBB, OverflowBB->end());
64906492
Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6493+
auto *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6494+
auto *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
64916495
Builder.CreateBr(OverflowResBB);
64926496

6497+
// Add The Extracted values to the PHINodes in the overflow.res block.
6498+
PHINode1->addIncoming(MulOverflow, OverflowBB);
6499+
PHINode2->addIncoming(OverflowFlag, OverflowBB);
6500+
64936501
// return false to stop reprocessing the function.
64946502
return false;
64956503
}
@@ -6516,7 +6524,7 @@ bool CodeGenPrepare::optimizeSMulWithOverflow(Instruction *I) {
65166524
unsigned VTHalfBitWidth = VTBitWidth / 2;
65176525
auto *LegalTy = IntegerType::getIntNTy(I->getContext(), VTHalfBitWidth);
65186526

6519-
// Skip the optimizaiton if the type with HalfBitWidth is not legal for the
6527+
// Skip the optimization if the type with HalfBitWidth is not legal for the
65206528
// target.
65216529
if (TLI->getTypeAction(I->getContext(), TLI->getValueType(*DL, LegalTy)) !=
65226530
TargetLowering::TypeLegal)
@@ -6553,11 +6561,17 @@ bool CodeGenPrepare::optimizeSMulWithOverflow(Instruction *I) {
65536561
Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
65546562
auto *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
65556563
HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6556-
6557-
auto *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS, SignLoLHS);
6558-
auto *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS, SignLoRHS);
6559-
auto *Or = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6560-
Builder.CreateCondBr(Or, OverflowBB, NoOverflowBB);
6564+
// xor(HiLHS, SignLoLHS) false -> no overflow
6565+
// xor(HiRHS, SignLoRHS) false -> no overflow
6566+
// if either of the above is true, then overflow.
6567+
// auto *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS, SignLoLHS);
6568+
auto *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6569+
auto *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6570+
// auto *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS, SignLoRHS);
6571+
auto *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6572+
auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, Or,
6573+
ConstantInt::get(Or->getType(), 1));
6574+
Builder.CreateCondBr(Cmp, OverflowBB, NoOverflowBB);
65616575
OverflowoEntryBB->getTerminator()->eraseFromParent();
65626576

65636577
//------------------------------------------------------------------------------
@@ -6566,32 +6580,40 @@ bool CodeGenPrepare::optimizeSMulWithOverflow(Instruction *I) {
65666580
auto *ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
65676581
auto *ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
65686582
auto *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.no.overflow");
6569-
StructType *STy = StructType::get(
6570-
I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
6571-
Value *StructValNoOverflow = PoisonValue::get(STy);
6572-
StructValNoOverflow =
6573-
Builder.CreateInsertValue(StructValNoOverflow, Mul, {0});
6574-
StructValNoOverflow = Builder.CreateInsertValue(
6575-
StructValNoOverflow, ConstantInt::getFalse(I->getContext()), {1});
65766583
Builder.CreateBr(OverflowResBB);
65776584

65786585
//------------------------------------------------------------------------------
65796586
// BB overflow.res:
65806587
Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6581-
auto *PHINode = Builder.CreatePHI(STy, 2);
6582-
PHINode->addIncoming(StructValNoOverflow, NoOverflowBB);
6588+
auto *PHINode1 = Builder.CreatePHI(Ty, 2);
6589+
PHINode1->addIncoming(Mul, NoOverflowBB);
6590+
auto *PHINode2 =
6591+
Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6592+
PHINode2->addIncoming(ConstantInt::getFalse(I->getContext()), NoOverflowBB);
65836593

6594+
StructType *STy = StructType::get(
6595+
I->getContext(), {Ty, IntegerType::getInt1Ty(I->getContext())});
6596+
Value *StructValOverflowRes = PoisonValue::get(STy);
6597+
StructValOverflowRes =
6598+
Builder.CreateInsertValue(StructValOverflowRes, PHINode1, {0});
6599+
StructValOverflowRes =
6600+
Builder.CreateInsertValue(StructValOverflowRes, PHINode2, {1});
65846601
// Before moving the mul.overflow intrinsic to the overflowBB, replace all its
6585-
// uses by PHINode.
6586-
I->replaceAllUsesWith(PHINode);
6602+
// uses by StructValOverflowRes.
6603+
I->replaceAllUsesWith(StructValOverflowRes);
6604+
I->removeFromParent();
65876605

65886606
// BB overflow:
6589-
PHINode->addIncoming(I, OverflowBB);
6590-
I->removeFromParent();
65916607
I->insertInto(OverflowBB, OverflowBB->end());
65926608
Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6609+
auto *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6610+
auto *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
65936611
Builder.CreateBr(OverflowResBB);
65946612

6613+
// Add The Extracted values to the PHINodes in the overflow.res block.
6614+
PHINode1->addIncoming(MulOverflow, OverflowBB);
6615+
PHINode2->addIncoming(OverflowFlag, OverflowBB);
6616+
65956617
// return false to stop reprocessing the function.
65966618
return false;
65976619
}

llvm/test/CodeGen/AArch64/i128-math.ll

Lines changed: 43 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -278,13 +278,12 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) {
278278
; CHECK-NEXT: cset w8, ne
279279
; CHECK-NEXT: adds x1, x11, x9
280280
; CHECK-NEXT: csinc w8, w8, wzr, lo
281-
; CHECK-NEXT: b .LBB17_3
281+
; CHECK-NEXT: eor w2, w8, #0x1
282+
; CHECK-NEXT: ret
282283
; CHECK-NEXT: .LBB17_2: // %overflow.no
283284
; CHECK-NEXT: umulh x1, x0, x2
284285
; CHECK-NEXT: mul x0, x0, x2
285-
; CHECK-NEXT: .LBB17_3: // %overflow.res
286-
; CHECK-NEXT: mov w9, #1 // =0x1
287-
; CHECK-NEXT: bic w2, w9, w8
286+
; CHECK-NEXT: eor w2, w8, #0x1
288287
; CHECK-NEXT: ret
289288
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
290289
%2 = extractvalue { i128, i1 } %1, 0
@@ -314,13 +313,12 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) {
314313
; CHECK-NEXT: mul x0, x0, x2
315314
; CHECK-NEXT: cset w8, ne
316315
; CHECK-NEXT: adds x1, x11, x9
317-
; CHECK-NEXT: csinc w8, w8, wzr, lo
318-
; CHECK-NEXT: and w2, w8, #0x1
316+
; CHECK-NEXT: csinc w2, w8, wzr, lo
319317
; CHECK-NEXT: ret
320318
; CHECK-NEXT: .LBB18_2: // %overflow.no
321319
; CHECK-NEXT: umulh x1, x0, x2
322320
; CHECK-NEXT: mul x0, x0, x2
323-
; CHECK-NEXT: and w2, w8, #0x1
321+
; CHECK-NEXT: mov w2, wzr
324322
; CHECK-NEXT: ret
325323
%1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
326324
%2 = extractvalue { i128, i1 } %1, 0
@@ -356,7 +354,7 @@ define i128 @u128_saturating_mul(i128 %x, i128 %y) {
356354
; CHECK-NEXT: mov w10, wzr
357355
; CHECK-NEXT: mul x8, x0, x2
358356
; CHECK-NEXT: .LBB19_3: // %overflow.res
359-
; CHECK-NEXT: tst w10, #0x1
357+
; CHECK-NEXT: cmp w10, #0
360358
; CHECK-NEXT: csinv x0, x8, xzr, eq
361359
; CHECK-NEXT: csinv x1, x9, xzr, eq
362360
; CHECK-NEXT: ret
@@ -382,18 +380,12 @@ define i128 @i128_mul(i128 %x, i128 %y) {
382380
define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
383381
; CHECK-LABEL: i128_checked_mul:
384382
; CHECK: // %bb.0: // %overflow.entry
385-
; CHECK-NEXT: cmp x1, x0, asr #63
386-
; CHECK-NEXT: b.ne .LBB21_3
387-
; CHECK-NEXT: // %bb.1: // %overflow.entry
388-
; CHECK-NEXT: asr x8, x2, #63
389-
; CHECK-NEXT: cmp x3, x8
390-
; CHECK-NEXT: b.ne .LBB21_3
391-
; CHECK-NEXT: // %bb.2: // %overflow.no
392-
; CHECK-NEXT: smulh x1, x0, x2
393-
; CHECK-NEXT: mov w8, wzr
394-
; CHECK-NEXT: mul x0, x0, x2
395-
; CHECK-NEXT: b .LBB21_4
396-
; CHECK-NEXT: .LBB21_3: // %overflow
383+
; CHECK-NEXT: eor x8, x3, x2, asr #63
384+
; CHECK-NEXT: eor x9, x1, x0, asr #63
385+
; CHECK-NEXT: orr x8, x9, x8
386+
; CHECK-NEXT: cmp x8, #1
387+
; CHECK-NEXT: b.ne .LBB21_2
388+
; CHECK-NEXT: // %bb.1: // %overflow
397389
; CHECK-NEXT: asr x9, x1, #63
398390
; CHECK-NEXT: umulh x10, x0, x2
399391
; CHECK-NEXT: asr x13, x3, #63
@@ -421,9 +413,12 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
421413
; CHECK-NEXT: cmp x8, x10
422414
; CHECK-NEXT: ccmp x9, x10, #0, eq
423415
; CHECK-NEXT: cset w8, ne
424-
; CHECK-NEXT: .LBB21_4: // %overflow.res
425-
; CHECK-NEXT: mov w9, #1 // =0x1
426-
; CHECK-NEXT: bic w2, w9, w8
416+
; CHECK-NEXT: eor w2, w8, #0x1
417+
; CHECK-NEXT: ret
418+
; CHECK-NEXT: .LBB21_2: // %overflow.no
419+
; CHECK-NEXT: smulh x1, x0, x2
420+
; CHECK-NEXT: mul x0, x0, x2
421+
; CHECK-NEXT: eor w2, wzr, #0x1
427422
; CHECK-NEXT: ret
428423
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
429424
%2 = extractvalue { i128, i1 } %1, 0
@@ -438,18 +433,12 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
438433
define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
439434
; CHECK-LABEL: i128_overflowing_mul:
440435
; CHECK: // %bb.0: // %overflow.entry
441-
; CHECK-NEXT: cmp x1, x0, asr #63
442-
; CHECK-NEXT: b.ne .LBB22_3
443-
; CHECK-NEXT: // %bb.1: // %overflow.entry
444-
; CHECK-NEXT: asr x8, x2, #63
445-
; CHECK-NEXT: cmp x3, x8
446-
; CHECK-NEXT: b.ne .LBB22_3
447-
; CHECK-NEXT: // %bb.2: // %overflow.no
448-
; CHECK-NEXT: smulh x1, x0, x2
449-
; CHECK-NEXT: mul x0, x0, x2
450-
; CHECK-NEXT: and w2, wzr, #0x1
451-
; CHECK-NEXT: ret
452-
; CHECK-NEXT: .LBB22_3: // %overflow
436+
; CHECK-NEXT: eor x8, x3, x2, asr #63
437+
; CHECK-NEXT: eor x9, x1, x0, asr #63
438+
; CHECK-NEXT: orr x8, x9, x8
439+
; CHECK-NEXT: cmp x8, #1
440+
; CHECK-NEXT: b.ne .LBB22_2
441+
; CHECK-NEXT: // %bb.1: // %overflow
453442
; CHECK-NEXT: asr x9, x1, #63
454443
; CHECK-NEXT: umulh x10, x0, x2
455444
; CHECK-NEXT: asr x13, x3, #63
@@ -476,8 +465,12 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
476465
; CHECK-NEXT: adc x9, x9, x11
477466
; CHECK-NEXT: cmp x8, x10
478467
; CHECK-NEXT: ccmp x9, x10, #0, eq
479-
; CHECK-NEXT: cset w8, ne
480-
; CHECK-NEXT: and w2, w8, #0x1
468+
; CHECK-NEXT: cset w2, ne
469+
; CHECK-NEXT: ret
470+
; CHECK-NEXT: .LBB22_2: // %overflow.no
471+
; CHECK-NEXT: smulh x1, x0, x2
472+
; CHECK-NEXT: mul x0, x0, x2
473+
; CHECK-NEXT: mov w2, wzr
481474
; CHECK-NEXT: ret
482475
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
483476
%2 = extractvalue { i128, i1 } %1, 0
@@ -491,18 +484,12 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
491484
define i128 @i128_saturating_mul(i128 %x, i128 %y) {
492485
; CHECK-LABEL: i128_saturating_mul:
493486
; CHECK: // %bb.0: // %overflow.entry
494-
; CHECK-NEXT: cmp x1, x0, asr #63
495-
; CHECK-NEXT: b.ne .LBB23_3
496-
; CHECK-NEXT: // %bb.1: // %overflow.entry
497-
; CHECK-NEXT: asr x8, x2, #63
498-
; CHECK-NEXT: cmp x3, x8
499-
; CHECK-NEXT: b.ne .LBB23_3
500-
; CHECK-NEXT: // %bb.2: // %overflow.no
501-
; CHECK-NEXT: smulh x8, x0, x2
502-
; CHECK-NEXT: mov w10, wzr
503-
; CHECK-NEXT: mul x9, x0, x2
504-
; CHECK-NEXT: b .LBB23_4
505-
; CHECK-NEXT: .LBB23_3: // %overflow
487+
; CHECK-NEXT: eor x8, x3, x2, asr #63
488+
; CHECK-NEXT: eor x9, x1, x0, asr #63
489+
; CHECK-NEXT: orr x8, x9, x8
490+
; CHECK-NEXT: cmp x8, #1
491+
; CHECK-NEXT: b.ne .LBB23_2
492+
; CHECK-NEXT: // %bb.1: // %overflow
506493
; CHECK-NEXT: asr x9, x1, #63
507494
; CHECK-NEXT: umulh x10, x0, x2
508495
; CHECK-NEXT: asr x13, x3, #63
@@ -529,9 +516,14 @@ define i128 @i128_saturating_mul(i128 %x, i128 %y) {
529516
; CHECK-NEXT: cmp x11, x14
530517
; CHECK-NEXT: ccmp x10, x14, #0, eq
531518
; CHECK-NEXT: cset w10, ne
532-
; CHECK-NEXT: .LBB23_4: // %overflow.res
519+
; CHECK-NEXT: b .LBB23_3
520+
; CHECK-NEXT: .LBB23_2: // %overflow.no
521+
; CHECK-NEXT: smulh x8, x0, x2
522+
; CHECK-NEXT: mov w10, wzr
523+
; CHECK-NEXT: mul x9, x0, x2
524+
; CHECK-NEXT: .LBB23_3: // %overflow.res
533525
; CHECK-NEXT: eor x11, x3, x1
534-
; CHECK-NEXT: tst w10, #0x1
526+
; CHECK-NEXT: cmp w10, #0
535527
; CHECK-NEXT: asr x11, x11, #63
536528
; CHECK-NEXT: eor x12, x11, #0x7fffffffffffffff
537529
; CHECK-NEXT: csinv x0, x9, x11, eq

llvm/test/CodeGen/AArch64/i128_with_overflow.ll

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,12 @@ define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
240240
; CHECK-NEXT: cset w8, ne
241241
; CHECK-NEXT: adds x1, x11, x9
242242
; CHECK-NEXT: csinc w8, w8, wzr, lo
243-
; CHECK-NEXT: tbnz w8, #0, .LBB4_3
243+
; CHECK-NEXT: cbnz w8, .LBB4_3
244244
; CHECK-NEXT: b .LBB4_4
245245
; CHECK-NEXT: .LBB4_2: // %overflow.no
246246
; CHECK-NEXT: umulh x1, x0, x2
247247
; CHECK-NEXT: mul x0, x0, x2
248-
; CHECK-NEXT: tbz w8, #0, .LBB4_4
248+
; CHECK-NEXT: cbz w8, .LBB4_4
249249
; CHECK-NEXT: .LBB4_3: // %if.then
250250
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
251251
; CHECK-NEXT: .cfi_def_cfa_offset 16
@@ -279,19 +279,12 @@ cleanup:
279279
define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
280280
; CHECK-LABEL: test_smul_i128:
281281
; CHECK: // %bb.0: // %overflow.entry
282-
; CHECK-NEXT: cmp x1, x0, asr #63
283-
; CHECK-NEXT: b.ne .LBB5_3
284-
; CHECK-NEXT: // %bb.1: // %overflow.entry
285-
; CHECK-NEXT: asr x8, x2, #63
286-
; CHECK-NEXT: cmp x3, x8
287-
; CHECK-NEXT: b.ne .LBB5_3
288-
; CHECK-NEXT: // %bb.2: // %overflow.no
289-
; CHECK-NEXT: smulh x1, x0, x2
290-
; CHECK-NEXT: mov w8, wzr
291-
; CHECK-NEXT: mul x0, x0, x2
292-
; CHECK-NEXT: tbnz w8, #0, .LBB5_4
293-
; CHECK-NEXT: b .LBB5_5
294-
; CHECK-NEXT: .LBB5_3: // %overflow
282+
; CHECK-NEXT: eor x8, x3, x2, asr #63
283+
; CHECK-NEXT: eor x9, x1, x0, asr #63
284+
; CHECK-NEXT: orr x8, x9, x8
285+
; CHECK-NEXT: cmp x8, #1
286+
; CHECK-NEXT: b.ne .LBB5_2
287+
; CHECK-NEXT: // %bb.1: // %overflow
295288
; CHECK-NEXT: asr x9, x1, #63
296289
; CHECK-NEXT: umulh x10, x0, x2
297290
; CHECK-NEXT: asr x13, x3, #63
@@ -319,8 +312,14 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
319312
; CHECK-NEXT: cmp x8, x10
320313
; CHECK-NEXT: ccmp x9, x10, #0, eq
321314
; CHECK-NEXT: cset w8, ne
322-
; CHECK-NEXT: tbz w8, #0, .LBB5_5
323-
; CHECK-NEXT: .LBB5_4: // %if.then
315+
; CHECK-NEXT: cbnz w8, .LBB5_3
316+
; CHECK-NEXT: b .LBB5_4
317+
; CHECK-NEXT: .LBB5_2: // %overflow.no
318+
; CHECK-NEXT: smulh x1, x0, x2
319+
; CHECK-NEXT: mov w8, wzr
320+
; CHECK-NEXT: mul x0, x0, x2
321+
; CHECK-NEXT: cbz w8, .LBB5_4
322+
; CHECK-NEXT: .LBB5_3: // %if.then
324323
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
325324
; CHECK-NEXT: .cfi_def_cfa_offset 16
326325
; CHECK-NEXT: .cfi_offset w30, -16
@@ -329,7 +328,7 @@ define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
329328
; CHECK-NEXT: sxtw x0, w0
330329
; CHECK-NEXT: asr x1, x0, #63
331330
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
332-
; CHECK-NEXT: .LBB5_5: // %cleanup
331+
; CHECK-NEXT: .LBB5_4: // %cleanup
333332
; CHECK-NEXT: ret
334333
entry:
335334
%0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)

0 commit comments

Comments
 (0)