Skip to content

Commit b8d16ac

Browse files
committed
[InstCombine] Fold min(X+1, Y) - min(X, Y) --> zext X < Y (#157524)
This fold is invalid for @llvm.smin.i1, since smin(-1, 0) == -1 (take X = Y = 0). Otherwise, if X+1 has the appropriate nsw or nuw, this transform replaces a sub and at least one min with an icmp and a zext. It is also invalid for i1 in general, but it seems that other folds take care of i1. In #157524, this expression was folded to a select, but it seems that select X < Y, 1, 0 can be canonicalized to zext X < Y.
1 parent 72eb14a commit b8d16ac

File tree

2 files changed

+62
-40
lines changed

2 files changed

+62
-40
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2719,6 +2719,24 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
27192719
return BinaryOperator::CreateSub(X, Not);
27202720
}
27212721

2722+
// min(X+1, Y) - min(X, Y) --> zext X < Y
2723+
// Replacing a sub and at least one min with an icmp
2724+
// and a zext is a potential improvement.
2725+
if (match(Op0, m_c_SMin(m_c_NSWAdd(m_Value(X), m_One()), m_Value(Y))) &&
2726+
match(Op1, m_c_SMin(m_Value(X), m_Value(Y))) &&
2727+
I.getType()->getScalarSizeInBits() != 1 &&
2728+
(Op0->hasOneUse() || Op1->hasOneUse())) {
2729+
Value *Cond = Builder.CreateICmpSLT(X, Y);
2730+
return new ZExtInst(Cond, I.getType());
2731+
}
2732+
if (match(Op0, m_c_UMin(m_c_NUWAdd(m_Value(X), m_One()), m_Value(Y))) &&
2733+
match(Op1, m_c_UMin(m_Value(X), m_Value(Y))) &&
2734+
I.getType()->getScalarSizeInBits() != 1 &&
2735+
(Op0->hasOneUse() || Op1->hasOneUse())) {
2736+
Value *Cond = Builder.CreateICmpULT(X, Y);
2737+
return new ZExtInst(Cond, I.getType());
2738+
}
2739+
27222740
// Optimize pointer differences into the same array into a size. Consider:
27232741
// &A[10] - &A[0]: we should compile this to "10".
27242742
Value *LHSOp, *RHSOp;

llvm/test/Transforms/InstCombine/min-zext.ll

Lines changed: 44 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
23

34
define i32 @test_smin(i32 %arg0, i32 %arg1) {
45
; CHECK-LABEL: define i32 @test_smin(
5-
; CHECK-NEXT: %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
6-
; CHECK-NEXT: %v1 = add nsw i32 %arg0, 1
7-
; CHECK-NEXT: %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
8-
; CHECK-NEXT: %v3 = sub i32 %v2, %v0
9-
; CHECK-NEXT: ret i32 %v3
6+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
7+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]]
8+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32
9+
; CHECK-NEXT: ret i32 [[V3]]
1010
;
1111
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
1212
%v1 = add nsw i32 %arg0, 1
@@ -17,11 +17,10 @@ define i32 @test_smin(i32 %arg0, i32 %arg1) {
1717

1818
define i32 @test_umin(i32 %arg0, i32 %arg1) {
1919
; CHECK-LABEL: define i32 @test_umin(
20-
; CHECK-NEXT: %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
21-
; CHECK-NEXT: %v1 = add nuw i32 %arg0, 1
22-
; CHECK-NEXT: %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
23-
; CHECK-NEXT: %v3 = sub i32 %v2, %v0
24-
; CHECK-NEXT: ret i32 %v3
20+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
21+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG0]], [[ARG1]]
22+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32
23+
; CHECK-NEXT: ret i32 [[V3]]
2524
;
2625
%v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
2726
%v1 = add nuw i32 %arg0, 1
@@ -32,9 +31,10 @@ define i32 @test_umin(i32 %arg0, i32 %arg1) {
3231

3332
define i1 @test_smin_i1(i1 %arg0, i1 %arg1) {
3433
; CHECK-LABEL: define i1 @test_smin_i1(
35-
; CHECK-NEXT: %v0 = or i1 %arg0, %arg1
36-
; CHECK-NEXT: %v3 = xor i1 %v0, true
37-
; CHECK-NEXT: ret i1 %v3
34+
; CHECK-SAME: i1 [[ARG0:%.*]], i1 [[ARG1:%.*]]) {
35+
; CHECK-NEXT: [[V0:%.*]] = or i1 [[ARG0]], [[ARG1]]
36+
; CHECK-NEXT: [[V3:%.*]] = xor i1 [[V0]], true
37+
; CHECK-NEXT: ret i1 [[V3]]
3838
;
3939
%v0 = tail call i1 @llvm.smin.i1(i1 %arg0, i1 %arg1)
4040
%v1 = add nsw i1 %arg0, 1
@@ -47,47 +47,50 @@ declare void @use(i2)
4747

4848
define i2 @test_smin_use_operands(i2 %arg0, i2 %arg1) {
4949
; CHECK-LABEL: define i2 @test_smin_use_operands(
50-
; CHECK-NEXT: %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
51-
; CHECK-NEXT: %v1 = add nsw i2 %arg0, 1
52-
; CHECK-NEXT: %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
53-
; CHECK-NEXT: %v3 = sub i2 %v2, %v0
54-
; CHECK-NEXT: call void @use(i2 %v2)
55-
; CHECK-NEXT: call void @use(i2 %v0)
56-
; CHECK-NEXT: ret i2 %v3
50+
; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
51+
; CHECK-NEXT: [[V0:%.*]] = tail call i2 @llvm.smin.i2(i2 [[ARG0]], i2 [[ARG1]])
52+
; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1
53+
; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
54+
; CHECK-NEXT: [[V3:%.*]] = sub i2 [[V2]], [[V0]]
55+
; CHECK-NEXT: call void @use(i2 [[V2]])
56+
; CHECK-NEXT: call void @use(i2 [[V0]])
57+
; CHECK-NEXT: ret i2 [[V3]]
5758
;
5859
%v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
5960
%v1 = add nsw i2 %arg0, 1
6061
%v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
61-
%v3 = sub i2 %v2, %v0
62+
%v3 = sub i2 %v2, %v0
6263
call void @use(i2 %v2)
6364
call void @use(i2 %v0)
64-
ret i2 %v3
65+
ret i2 %v3
6566
}
6667

6768
define i2 @test_smin_use_operand(i2 %arg0, i2 %arg1) {
6869
; CHECK-LABEL: define i2 @test_smin_use_operand(
69-
; CHECK-NEXT: %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
70-
; CHECK-NEXT: %v1 = add nsw i2 %arg0, 1
71-
; CHECK-NEXT: %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
72-
; CHECK-NEXT: %v3 = sub i2 %v2, %v0
73-
; CHECK-NEXT: call void @use(i2 %v2)
74-
; CHECK-NEXT: ret i2 %v3
70+
; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
71+
; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1
72+
; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
73+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i2 [[ARG0]], [[ARG1]]
74+
; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i2
75+
; CHECK-NEXT: call void @use(i2 [[V2]])
76+
; CHECK-NEXT: ret i2 [[V3]]
7577
;
7678
%v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
7779
%v1 = add nsw i2 %arg0, 1
7880
%v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
79-
%v3 = sub i2 %v2, %v0
81+
%v3 = sub i2 %v2, %v0
8082
call void @use(i2 %v2)
81-
ret i2 %v3
83+
ret i2 %v3
8284
}
8385

8486
define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) {
8587
; CHECK-LABEL: define i32 @test_smin_missing_nsw(
86-
; CHECK-NEXT: %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
87-
; CHECK-NEXT: %v1 = add i32 %arg0, 1
88-
; CHECK-NEXT: %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
89-
; CHECK-NEXT: %v3 = sub i32 %v2, %v0
90-
; CHECK-NEXT: ret i32 %v3
88+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
89+
; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[ARG1]])
90+
; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1
91+
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]])
92+
; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]]
93+
; CHECK-NEXT: ret i32 [[V3]]
9194
;
9295
%v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
9396
%v1 = add i32 %arg0, 1
@@ -98,11 +101,12 @@ define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) {
98101

99102
define i32 @test_umin_missing_nuw(i32 %arg0, i32 %arg1) {
100103
; CHECK-LABEL: define i32 @test_umin_missing_nuw(
101-
; CHECK-NEXT: %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
102-
; CHECK-NEXT: %v1 = add i32 %arg0, 1
103-
; CHECK-NEXT: %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
104-
; CHECK-NEXT: %v3 = sub i32 %v2, %v0
105-
; CHECK-NEXT: ret i32 %v3
104+
; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
105+
; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ARG0]], i32 [[ARG1]])
106+
; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1
107+
; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 [[ARG1]])
108+
; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]]
109+
; CHECK-NEXT: ret i32 [[V3]]
106110
;
107111
%v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
108112
%v1 = add i32 %arg0, 1

0 commit comments

Comments
 (0)