Skip to content

Commit 08c498c

Browse files
committed
detect optimization pattern to stop reprocessing
1 parent 9fa4927 commit 08c498c

File tree

2 files changed

+162
-8
lines changed

2 files changed

+162
-8
lines changed

llvm/lib/CodeGen/CodeGenPrepare.cpp

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -338,10 +338,6 @@ class CodeGenPrepare {
338338
/// Keep track of instructions removed during promotion.
339339
SetOfInstrs RemovedInsts;
340340

341-
/// Keep track of seen mul_with_overflow intrinsics to avoid
342-
// reprocessing them.
343-
DenseMap<Instruction *, bool> SeenMulWithOverflowInstrs;
344-
345341
/// Keep track of sext chains based on their initial value.
346342
DenseMap<Value *, Instruction *> SeenChainsForSExt;
347343

@@ -778,7 +774,6 @@ bool CodeGenPrepare::_run(Function &F) {
778774
verifyBFIUpdates(F);
779775
#endif
780776

781-
SeenMulWithOverflowInstrs.clear();
782777
return EverMadeChange;
783778
}
784779

@@ -6409,9 +6404,36 @@ bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
64096404
ModifyDT &ModifiedDT) {
64106405
if (!TLI->shouldOptimizeMulOverflowIntrinsic())
64116406
return false;
6412-
// If we have already seen this instruction, don't process it again.
6413-
if (!SeenMulWithOverflowInstrs.insert(std::make_pair(I, true)).second)
6414-
return false;
6407+
6408+
// Check if we had already optimized this intrinsic by detecting the pattern of the changes we had made:
6409+
// Check if we are testing the high bits of the operands:
6410+
if (BasicBlock *BrBB = I->getParent()->getSinglePredecessor()) {
6411+
if (BranchInst *Br = dyn_cast<BranchInst>(BrBB->getTerminator()); Br && Br->isConditional()) {
6412+
if (IsSigned) {
6413+
// Check: cmp(or(xor(trunc(lshr(x))), xor(trunc(lshr(x)))))
6414+
if (match(Br->getCondition(),
6415+
m_Cmp(m_Or(m_Xor(m_Trunc(m_LShr(m_Specific(I->getOperand(0)), m_Value())), m_Value()),
6416+
m_Xor(m_Trunc(m_LShr(m_Specific(I->getOperand(1)), m_Value())), m_Value())),
6417+
m_Value()))) {
6418+
LLVM_DEBUG(dbgs() << "CGP: pattern detected - bail out\n");
6419+
// Pattern detected, bail out.
6420+
return false;
6421+
}
6422+
}
6423+
else
6424+
{
6425+
// Check: or(cmp(trunc(lshr(x)), cmp(trunc(lshr(y))))
6426+
if (match(Br->getCondition(),
6427+
m_Or(m_Cmp(m_Trunc(m_LShr(m_Specific(I->getOperand(0)), m_Value())), m_Value()),
6428+
m_Cmp(m_Trunc(m_LShr(m_Specific(I->getOperand(1)), m_Value())), m_Value())))) {
6429+
LLVM_DEBUG(dbgs() << "CGP: pattern detected - bail out\n");
6430+
// Pattern detected, bail out.
6431+
return false;
6432+
}
6433+
}
6434+
6435+
}
6436+
}
64156437

64166438
if (TLI->getTypeAction(
64176439
I->getContext(),
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; REQUIRES: asserts
3+
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -debug-only=codegenprepare 2>%t | FileCheck %s --check-prefixes=CHECK
4+
; RUN: cat %t | FileCheck %s --check-prefix=DEBUG
5+
6+
; DEBUG: CGP: pattern detected - bail out
7+
8+
define i128 @test_umul_i128(i128 noundef %x, i128 noundef %y) {
9+
; CHECK-LABEL: test_umul_i128:
10+
; CHECK: // %bb.0: // %entry
11+
; CHECK-NEXT: orr x8, x1, x3
12+
; CHECK-NEXT: cbz x8, .LBB0_3
13+
; CHECK-NEXT: // %bb.1: // %overflow
14+
; CHECK-NEXT: mul x9, x3, x0
15+
; CHECK-NEXT: cmp x1, #0
16+
; CHECK-NEXT: ccmp x3, #0, #4, ne
17+
; CHECK-NEXT: umulh x10, x1, x2
18+
; CHECK-NEXT: umulh x8, x3, x0
19+
; CHECK-NEXT: madd x9, x1, x2, x9
20+
; CHECK-NEXT: ccmp xzr, x10, #0, eq
21+
; CHECK-NEXT: umulh x11, x0, x2
22+
; CHECK-NEXT: ccmp xzr, x8, #0, eq
23+
; CHECK-NEXT: cset w8, ne
24+
; CHECK-NEXT: adds x1, x11, x9
25+
; CHECK-NEXT: csinc w8, w8, wzr, lo
26+
; CHECK-NEXT: cmp w8, #1
27+
; CHECK-NEXT: b.ne .LBB0_4
28+
; CHECK-NEXT: // %bb.2: // %if.then
29+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
30+
; CHECK-NEXT: .cfi_def_cfa_offset 16
31+
; CHECK-NEXT: .cfi_offset w30, -16
32+
; CHECK-NEXT: bl error
33+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
34+
; CHECK-NEXT: sxtw x0, w0
35+
; CHECK-NEXT: asr x1, x0, #63
36+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
37+
; CHECK-NEXT: ret
38+
; CHECK-NEXT: .LBB0_3: // %overflow.no
39+
; CHECK-NEXT: umulh x1, x0, x2
40+
; CHECK-NEXT: .LBB0_4:
41+
; CHECK-NEXT: mul x0, x0, x2
42+
; CHECK-NEXT: ret
43+
entry:
44+
%0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y)
45+
%1 = extractvalue { i128, i1 } %0, 1
46+
br i1 %1, label %if.then, label %if.end
47+
48+
if.then:
49+
%call = tail call i32 @error()
50+
%conv1 = sext i32 %call to i128
51+
br label %cleanup
52+
53+
if.end:
54+
%2 = extractvalue { i128, i1 } %0, 0
55+
br label %cleanup
56+
57+
cleanup:
58+
%retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ]
59+
ret i128 %retval.0
60+
}
61+
62+
; DEBUG: CGP: pattern detected - bail out
63+
64+
define i128 @test_smul_i128(i128 noundef %x, i128 noundef %y) {
65+
; CHECK-LABEL: test_smul_i128:
66+
; CHECK: // %bb.0: // %entry
67+
; CHECK-NEXT: eor x8, x3, x2, asr #63
68+
; CHECK-NEXT: eor x9, x1, x0, asr #63
69+
; CHECK-NEXT: orr x8, x9, x8
70+
; CHECK-NEXT: cbz x8, .LBB1_3
71+
; CHECK-NEXT: // %bb.1: // %overflow
72+
; CHECK-NEXT: asr x9, x1, #63
73+
; CHECK-NEXT: umulh x10, x0, x2
74+
; CHECK-NEXT: asr x13, x3, #63
75+
; CHECK-NEXT: mul x11, x1, x2
76+
; CHECK-NEXT: umulh x8, x1, x2
77+
; CHECK-NEXT: mul x9, x9, x2
78+
; CHECK-NEXT: adds x10, x11, x10
79+
; CHECK-NEXT: mul x14, x0, x3
80+
; CHECK-NEXT: umulh x12, x0, x3
81+
; CHECK-NEXT: adc x8, x8, x9
82+
; CHECK-NEXT: mov x9, x1
83+
; CHECK-NEXT: mul x13, x0, x13
84+
; CHECK-NEXT: asr x11, x8, #63
85+
; CHECK-NEXT: mul x15, x1, x3
86+
; CHECK-NEXT: adds x1, x14, x10
87+
; CHECK-NEXT: smulh x9, x9, x3
88+
; CHECK-NEXT: adc x10, x12, x13
89+
; CHECK-NEXT: asr x12, x10, #63
90+
; CHECK-NEXT: adds x8, x8, x10
91+
; CHECK-NEXT: adc x10, x11, x12
92+
; CHECK-NEXT: adds x8, x15, x8
93+
; CHECK-NEXT: asr x11, x1, #63
94+
; CHECK-NEXT: adc x9, x9, x10
95+
; CHECK-NEXT: cmp x9, x11
96+
; CHECK-NEXT: ccmp x8, x11, #0, eq
97+
; CHECK-NEXT: b.eq .LBB1_4
98+
; CHECK-NEXT: // %bb.2: // %if.then
99+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
100+
; CHECK-NEXT: .cfi_def_cfa_offset 16
101+
; CHECK-NEXT: .cfi_offset w30, -16
102+
; CHECK-NEXT: bl error
103+
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
104+
; CHECK-NEXT: sxtw x0, w0
105+
; CHECK-NEXT: asr x1, x0, #63
106+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
107+
; CHECK-NEXT: ret
108+
; CHECK-NEXT: .LBB1_3: // %overflow.no
109+
; CHECK-NEXT: smulh x1, x0, x2
110+
; CHECK-NEXT: .LBB1_4:
111+
; CHECK-NEXT: mul x0, x0, x2
112+
; CHECK-NEXT: ret
113+
entry:
114+
%0 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
115+
%1 = extractvalue { i128, i1 } %0, 1
116+
br i1 %1, label %if.then, label %if.end
117+
118+
if.then:
119+
%call = tail call i32 @error()
120+
%conv1 = sext i32 %call to i128
121+
br label %cleanup
122+
123+
if.end:
124+
%2 = extractvalue { i128, i1 } %0, 0
125+
br label %cleanup
126+
127+
cleanup:
128+
%retval.0 = phi i128 [ %conv1, %if.then ], [ %2, %if.end ]
129+
ret i128 %retval.0
130+
}
131+
132+
declare i32 @error()

0 commit comments

Comments
 (0)