Skip to content

Commit 543a476

Browse files
dtcxzywmahesh-attarde
authored andcommitted
[SCCP] Simplify [us]cmp(X, Y) into X - Y (llvm#144717)
If the difference between [us]cmp's operands is not greater than 1, we can simplify it into `X - Y`. Alive2: https://alive2.llvm.org/ce/z/JS55so llvm-opt-benchmark diff: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2464/files
1 parent 380e10a commit 543a476

File tree

2 files changed

+220
-1
lines changed

2 files changed

+220
-1
lines changed

llvm/lib/Transforms/Utils/SCCPSolver.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,9 @@
1919
#include "llvm/Analysis/ValueLattice.h"
2020
#include "llvm/Analysis/ValueLatticeUtils.h"
2121
#include "llvm/Analysis/ValueTracking.h"
22+
#include "llvm/IR/IRBuilder.h"
2223
#include "llvm/IR/InstVisitor.h"
24+
#include "llvm/IR/NoFolder.h"
2325
#include "llvm/IR/PatternMatch.h"
2426
#include "llvm/Support/Casting.h"
2527
#include "llvm/Support/Debug.h"
@@ -245,11 +247,43 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
245247
const APInt *RHSC;
246248
// Remove masking operations.
247249
if (match(&Inst, m_And(m_Value(X), m_LowBitMask(RHSC)))) {
248-
ConstantRange LRange = GetRange(Inst.getOperand(0));
250+
ConstantRange LRange = GetRange(X);
249251
if (LRange.getUnsignedMax().ule(*RHSC))
250252
return X;
251253
}
252254

255+
// Check if we can simplify [us]cmp(X, Y) to X - Y.
256+
if (auto *Cmp = dyn_cast<CmpIntrinsic>(&Inst)) {
257+
Value *LHS = Cmp->getOperand(0);
258+
Value *RHS = Cmp->getOperand(1);
259+
unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
260+
// Bail out on 1-bit comparisons.
261+
if (BitWidth == 1)
262+
return nullptr;
263+
ConstantRange LRange = GetRange(LHS);
264+
if (LRange.isSizeLargerThan(3))
265+
return nullptr;
266+
ConstantRange RRange = GetRange(RHS);
267+
if (RRange.isSizeLargerThan(3))
268+
return nullptr;
269+
ConstantRange RHSLower = RRange.sub(APInt(BitWidth, 1));
270+
ConstantRange RHSUpper = RRange.add(APInt(BitWidth, 1));
271+
ICmpInst::Predicate Pred =
272+
Cmp->isSigned() ? CmpInst::ICMP_SLE : CmpInst::ICMP_ULE;
273+
if (!RHSLower.icmp(Pred, LRange) || !LRange.icmp(Pred, RHSUpper))
274+
return nullptr;
275+
276+
IRBuilder<NoFolder> Builder(&Inst);
277+
Value *Sub = Builder.CreateSub(LHS, RHS, Inst.getName(), /*HasNUW=*/false,
278+
/*HasNSW=*/Cmp->isSigned());
279+
InsertedValues.insert(Sub);
280+
if (Sub->getType() != Inst.getType()) {
281+
Sub = Builder.CreateSExtOrTrunc(Sub, Inst.getType());
282+
InsertedValues.insert(Sub);
283+
}
284+
return Sub;
285+
}
286+
253287
return nullptr;
254288
}
255289

llvm/test/Transforms/SCCP/uscmp.ll

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=sccp -S < %s | FileCheck %s
3+
4+
define i32 @scmp_to_sub(i32 range(i32 -1, 2) %a) {
5+
; CHECK-LABEL: define i32 @scmp_to_sub(
6+
; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) {
7+
; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[A]], 0
8+
; CHECK-NEXT: ret i32 [[SCMP]]
9+
;
10+
%scmp = call i32 @llvm.scmp(i32 %a, i32 0)
11+
ret i32 %scmp
12+
}
13+
14+
define i32 @scmp_zext_to_sub(i1 %a, i1 %b) {
15+
; CHECK-LABEL: define i32 @scmp_zext_to_sub(
16+
; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
17+
; CHECK-NEXT: [[ZEXT_A:%.*]] = zext i1 [[A]] to i32
18+
; CHECK-NEXT: [[ZEXT_B:%.*]] = zext i1 [[B]] to i32
19+
; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[ZEXT_A]], [[ZEXT_B]]
20+
; CHECK-NEXT: ret i32 [[SCMP]]
21+
;
22+
%zext_a = zext i1 %a to i32
23+
%zext_b = zext i1 %b to i32
24+
%scmp = call i32 @llvm.scmp(i32 %zext_a, i32 %zext_b)
25+
ret i32 %scmp
26+
}
27+
28+
define i8 @scmp_to_sub_trunc(i32 range(i32 -1, 2) %a) {
29+
; CHECK-LABEL: define i8 @scmp_to_sub_trunc(
30+
; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) {
31+
; CHECK-NEXT: [[SCMP1:%.*]] = sub nsw i32 [[A]], 0
32+
; CHECK-NEXT: [[SCMP:%.*]] = trunc i32 [[SCMP1]] to i8
33+
; CHECK-NEXT: ret i8 [[SCMP]]
34+
;
35+
%scmp = call i8 @llvm.scmp(i32 %a, i32 0)
36+
ret i8 %scmp
37+
}
38+
39+
define i64 @scmp_to_sub_sext(i32 range(i32 -1, 2) %a) {
40+
; CHECK-LABEL: define i64 @scmp_to_sub_sext(
41+
; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) {
42+
; CHECK-NEXT: [[SCMP1:%.*]] = sub nsw i32 [[A]], 0
43+
; CHECK-NEXT: [[SCMP:%.*]] = sext i32 [[SCMP1]] to i64
44+
; CHECK-NEXT: ret i64 [[SCMP]]
45+
;
46+
%scmp = call i64 @llvm.scmp(i32 %a, i32 0)
47+
ret i64 %scmp
48+
}
49+
50+
define i32 @scmp_to_sub_small_range(i32 range(i32 -1, 1) %a) {
51+
; CHECK-LABEL: define i32 @scmp_to_sub_small_range(
52+
; CHECK-SAME: i32 range(i32 -1, 1) [[A:%.*]]) {
53+
; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[A]], 0
54+
; CHECK-NEXT: ret i32 [[SCMP]]
55+
;
56+
%scmp = call i32 @llvm.scmp(i32 %a, i32 0)
57+
ret i32 %scmp
58+
}
59+
60+
define i32 @ucmp_to_sub(i32 range(i32 0, 3) %a) {
61+
; CHECK-LABEL: define i32 @ucmp_to_sub(
62+
; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) {
63+
; CHECK-NEXT: [[SCMP:%.*]] = sub i32 [[A]], 1
64+
; CHECK-NEXT: ret i32 [[SCMP]]
65+
;
66+
%ucmp = call i32 @llvm.ucmp(i32 %a, i32 1)
67+
ret i32 %ucmp
68+
}
69+
70+
define i8 @ucmp_to_sub_trunc(i32 range(i32 0, 3) %a) {
71+
; CHECK-LABEL: define i8 @ucmp_to_sub_trunc(
72+
; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) {
73+
; CHECK-NEXT: [[UCMP1:%.*]] = sub i32 [[A]], 1
74+
; CHECK-NEXT: [[UCMP:%.*]] = trunc i32 [[UCMP1]] to i8
75+
; CHECK-NEXT: ret i8 [[UCMP]]
76+
;
77+
%ucmp = call i8 @llvm.ucmp(i32 %a, i32 1)
78+
ret i8 %ucmp
79+
}
80+
81+
define i64 @ucmp_to_sub_sext(i32 range(i32 0, 3) %a) {
82+
; CHECK-LABEL: define i64 @ucmp_to_sub_sext(
83+
; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) {
84+
; CHECK-NEXT: [[UCMP1:%.*]] = sub i32 [[A]], 1
85+
; CHECK-NEXT: [[UCMP:%.*]] = sext i32 [[UCMP1]] to i64
86+
; CHECK-NEXT: ret i64 [[UCMP]]
87+
;
88+
%ucmp = call i64 @llvm.ucmp(i32 %a, i32 1)
89+
ret i64 %ucmp
90+
}
91+
92+
; TODO: we can fold this into %a.
93+
define i32 @ucmp_to_sub_small_range(i32 range(i32 0, 2) %a) {
94+
; CHECK-LABEL: define i32 @ucmp_to_sub_small_range(
95+
; CHECK-SAME: i32 range(i32 0, 2) [[A:%.*]]) {
96+
; CHECK-NEXT: [[UCMP:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[A]], i32 0)
97+
; CHECK-NEXT: ret i32 [[UCMP]]
98+
;
99+
%ucmp = call i32 @llvm.ucmp(i32 %a, i32 0)
100+
ret i32 %ucmp
101+
}
102+
103+
define i32 @scmp_to_sub_large_range(i32 range(i32 -1, 3) %a) {
104+
; CHECK-LABEL: define i32 @scmp_to_sub_large_range(
105+
; CHECK-SAME: i32 range(i32 -1, 3) [[A:%.*]]) {
106+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0)
107+
; CHECK-NEXT: ret i32 [[SCMP]]
108+
;
109+
%scmp = call i32 @llvm.scmp(i32 %a, i32 0)
110+
ret i32 %scmp
111+
}
112+
113+
define i32 @ucmp_to_sub_large_range(i32 range(i32 -1, 3) %a) {
114+
; CHECK-LABEL: define i32 @ucmp_to_sub_large_range(
115+
; CHECK-SAME: i32 range(i32 -1, 3) [[A:%.*]]) {
116+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[A]], i32 0)
117+
; CHECK-NEXT: ret i32 [[SCMP]]
118+
;
119+
%ucmp = call i32 @llvm.ucmp(i32 %a, i32 0)
120+
ret i32 %ucmp
121+
}
122+
123+
define i32 @scmp_to_sub_wrap(i8 range(i8 127, -126) %a) {
124+
; CHECK-LABEL: define i32 @scmp_to_sub_wrap(
125+
; CHECK-SAME: i8 range(i8 127, -126) [[A:%.*]]) {
126+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i8(i8 [[A]], i8 -128)
127+
; CHECK-NEXT: ret i32 [[SCMP]]
128+
;
129+
%scmp = call i32 @llvm.scmp(i8 %a, i8 -128)
130+
ret i32 %scmp
131+
}
132+
133+
define i32 @ucmp_to_sub_wrap(i8 range(i8 -1, 2) %a) {
134+
; CHECK-LABEL: define i32 @ucmp_to_sub_wrap(
135+
; CHECK-SAME: i8 range(i8 -1, 2) [[A:%.*]]) {
136+
; CHECK-NEXT: [[UCMP:%.*]] = call i32 @llvm.ucmp.i32.i8(i8 [[A]], i8 0)
137+
; CHECK-NEXT: ret i32 [[UCMP]]
138+
;
139+
%ucmp = call i32 @llvm.ucmp(i8 %a, i8 0)
140+
ret i32 %ucmp
141+
}
142+
143+
; It is incorrect to convert a ucmp into sub when the input type is i1.
144+
define i32 @ucmp_to_sub_i1_rhs_const(i1 %a) {
145+
; CHECK-LABEL: define i32 @ucmp_to_sub_i1_rhs_const(
146+
; CHECK-SAME: i1 [[A:%.*]]) {
147+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 [[A]], i1 false)
148+
; CHECK-NEXT: ret i32 [[SCMP]]
149+
;
150+
%ucmp = call i32 @llvm.ucmp(i1 %a, i1 false)
151+
ret i32 %ucmp
152+
}
153+
154+
; It is incorrect to convert a ucmp into sub when the input type is i1.
155+
define i32 @ucmp_to_sub_i1_lhs_const(i1 %a) {
156+
; CHECK-LABEL: define i32 @ucmp_to_sub_i1_lhs_const(
157+
; CHECK-SAME: i1 [[A:%.*]]) {
158+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 false, i1 [[A]])
159+
; CHECK-NEXT: ret i32 [[SCMP]]
160+
;
161+
%ucmp = call i32 @llvm.ucmp(i1 false, i1 %a)
162+
ret i32 %ucmp
163+
}
164+
165+
; It is incorrect to convert a ucmp into sub when the input type is i1.
166+
define i32 @ucmp_to_sub_i1(i1 %a, i1 %b) {
167+
; CHECK-LABEL: define i32 @ucmp_to_sub_i1(
168+
; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) {
169+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 [[A]], i1 [[B]])
170+
; CHECK-NEXT: ret i32 [[SCMP]]
171+
;
172+
%ucmp = call i32 @llvm.ucmp(i1 %a, i1 %b)
173+
ret i32 %ucmp
174+
}
175+
176+
; It is incorrect to convert a scmp into sub when the input type is i1.
177+
define i32 @scmp_to_sub_i1_rhs_const(i1 %a) {
178+
; CHECK-LABEL: define i32 @scmp_to_sub_i1_rhs_const(
179+
; CHECK-SAME: i1 [[A:%.*]]) {
180+
; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i1(i1 [[A]], i1 false)
181+
; CHECK-NEXT: ret i32 [[SCMP]]
182+
;
183+
%scmp = call i32 @llvm.scmp(i1 %a, i1 false)
184+
ret i32 %scmp
185+
}

0 commit comments

Comments
 (0)