Skip to content

Commit 2508851

Browse files
authored
[InstCombine] Fold min(X+1, Y) - min(X, Y) --> zext X < Y (#157782)
This PR closes #157524. alive2: https://alive2.llvm.org/ce/z/xe_vb2 godbolt: https://alive2.llvm.org/ce/z/7A8PxK This fold is invalid for `@llvm.smin.i1` since `smin(-1, 0) == -1`. I also avoided i1 in general since this uses zext, but it seems like those checks for width might not be necessary, since other folds get to it first. The alive2 proof in #157524 used a select for the fold, but it seems like `select X < Y, 1, 0` should be canonicalized to `zext X < Y` if the bit width is correct.
1 parent af82c1a commit 2508851

File tree

2 files changed

+168
-0
lines changed

2 files changed

+168
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,24 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
27312731
return BinaryOperator::CreateSub(X, Not);
27322732
}
27332733

2734+
// min(X+1, Y) - min(X, Y) --> zext X < Y
2735+
// Replacing a sub and at least one min with an icmp
2736+
// and a zext is a potential improvement.
2737+
if (match(Op0, m_c_SMin(m_NSWAddLike(m_Value(X), m_One()), m_Value(Y))) &&
2738+
match(Op1, m_c_SMin(m_Specific(X), m_Specific(Y))) &&
2739+
I.getType()->getScalarSizeInBits() != 1 &&
2740+
(Op0->hasOneUse() || Op1->hasOneUse())) {
2741+
Value *Cond = Builder.CreateICmpSLT(X, Y);
2742+
return new ZExtInst(Cond, I.getType());
2743+
}
2744+
if (match(Op0, m_c_UMin(m_NUWAddLike(m_Value(X), m_One()), m_Value(Y))) &&
2745+
match(Op1, m_c_UMin(m_Specific(X), m_Specific(Y))) &&
2746+
I.getType()->getScalarSizeInBits() != 1 &&
2747+
(Op0->hasOneUse() || Op1->hasOneUse())) {
2748+
Value *Cond = Builder.CreateICmpULT(X, Y);
2749+
return new ZExtInst(Cond, I.getType());
2750+
}
2751+
27342752
// Optimize pointer differences into the same array into a size. Consider:
27352753
// &A[10] - &A[0]: we should compile this to "10".
27362754
Value *LHSOp, *RHSOp;
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i32 @test_smin(i32 %arg0, i32 %arg1) {
5+
; CHECK-LABEL: define i32 @test_smin(
6+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]]
8+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32
9+
; CHECK-NEXT: ret i32 [[V3]]
10+
;
11+
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
12+
%v1 = add nsw i32 %arg0, 1
13+
%v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
14+
%v3 = sub i32 %v2, %v0
15+
ret i32 %v3
16+
}
17+
18+
define i32 @test_umin(i32 %arg0, i32 %arg1) {
19+
; CHECK-LABEL: define i32 @test_umin(
20+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
21+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG0]], [[ARG1]]
22+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32
23+
; CHECK-NEXT: ret i32 [[V3]]
24+
;
25+
%v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
26+
%v1 = add nuw i32 %arg0, 1
27+
%v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
28+
%v3 = sub i32 %v2, %v0
29+
ret i32 %v3
30+
}
31+
32+
define i1 @test_smin_i1(i1 %arg0, i1 %arg1) {
33+
; CHECK-LABEL: define i1 @test_smin_i1(
34+
; CHECK-SAME: i1 [[ARG0:%.*]], i1 [[ARG1:%.*]]) {
35+
; CHECK-NEXT: [[V0:%.*]] = or i1 [[ARG0]], [[ARG1]]
36+
; CHECK-NEXT: [[V3:%.*]] = xor i1 [[V0]], true
37+
; CHECK-NEXT: ret i1 [[V3]]
38+
;
39+
%v0 = tail call i1 @llvm.smin.i1(i1 %arg0, i1 %arg1)
40+
%v1 = add nsw i1 %arg0, 1
41+
%v2 = tail call i1 @llvm.smin.i1(i1 %v1, i1 %arg1)
42+
%v3 = sub i1 %v2, %v0
43+
ret i1 %v3
44+
}
45+
46+
declare void @use(i2)
47+
48+
define i2 @test_smin_use_operands(i2 %arg0, i2 %arg1) {
49+
; CHECK-LABEL: define i2 @test_smin_use_operands(
50+
; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
51+
; CHECK-NEXT: [[V0:%.*]] = tail call i2 @llvm.smin.i2(i2 [[ARG0]], i2 [[ARG1]])
52+
; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1
53+
; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
54+
; CHECK-NEXT: [[V3:%.*]] = sub i2 [[V2]], [[V0]]
55+
; CHECK-NEXT: call void @use(i2 [[V2]])
56+
; CHECK-NEXT: call void @use(i2 [[V0]])
57+
; CHECK-NEXT: ret i2 [[V3]]
58+
;
59+
%v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
60+
%v1 = add nsw i2 %arg0, 1
61+
%v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
62+
%v3 = sub i2 %v2, %v0
63+
call void @use(i2 %v2)
64+
call void @use(i2 %v0)
65+
ret i2 %v3
66+
}
67+
68+
define i2 @test_smin_use_operand(i2 %arg0, i2 %arg1) {
69+
; CHECK-LABEL: define i2 @test_smin_use_operand(
70+
; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
71+
; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1
72+
; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
73+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i2 [[ARG0]], [[ARG1]]
74+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i2
75+
; CHECK-NEXT: call void @use(i2 [[V2]])
76+
; CHECK-NEXT: ret i2 [[V3]]
77+
;
78+
%v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
79+
%v1 = add nsw i2 %arg0, 1
80+
%v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
81+
%v3 = sub i2 %v2, %v0
82+
call void @use(i2 %v2)
83+
ret i2 %v3
84+
}
85+
86+
define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) {
87+
; CHECK-LABEL: define i32 @test_smin_missing_nsw(
88+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
89+
; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[ARG1]])
90+
; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1
91+
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]])
92+
; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]]
93+
; CHECK-NEXT: ret i32 [[V3]]
94+
;
95+
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
96+
%v1 = add i32 %arg0, 1
97+
%v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
98+
%v3 = sub i32 %v2, %v0
99+
ret i32 %v3
100+
}
101+
102+
define i32 @test_umin_missing_nuw(i32 %arg0, i32 %arg1) {
103+
; CHECK-LABEL: define i32 @test_umin_missing_nuw(
104+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
105+
; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ARG0]], i32 [[ARG1]])
106+
; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1
107+
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 [[ARG1]])
108+
; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]]
109+
; CHECK-NEXT: ret i32 [[V3]]
110+
;
111+
%v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
112+
%v1 = add i32 %arg0, 1
113+
%v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
114+
%v3 = sub i32 %v2, %v0
115+
ret i32 %v3
116+
}
117+
118+
@tmp = external global i32
119+
120+
define i32 @test_mismatched_operands(i32 %arg0, i32 %arg1) {
121+
; CHECK-LABEL: define i32 @test_mismatched_operands(
122+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
123+
; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr @tmp, align 4
124+
; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[TMP]])
125+
; CHECK-NEXT: [[V1:%.*]] = add nsw i32 [[ARG0]], 1
126+
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]])
127+
; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]]
128+
; CHECK-NEXT: ret i32 [[V3]]
129+
;
130+
%tmp = load i32, ptr @tmp, align 4
131+
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %tmp)
132+
%v1 = add nsw i32 %arg0, 1
133+
%v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
134+
%v3 = sub i32 %v2, %v0
135+
ret i32 %v3
136+
}
137+
138+
define i32 @test_disjoint_or(i32 %arg0, i32 %arg1) {
139+
; CHECK-LABEL: define i32 @test_disjoint_or(
140+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
141+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]]
142+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32
143+
; CHECK-NEXT: ret i32 [[V3]]
144+
;
145+
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
146+
%v1 = or disjoint i32 %arg0, 1
147+
%v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
148+
%v3 = sub i32 %v2, %v0
149+
ret i32 %v3
150+
}

0 commit comments

Comments
 (0)