diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index f9155cc660317..71c53e37c7757 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -2731,6 +2731,24 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) { return BinaryOperator::CreateSub(X, Not); } + // min(X+1, Y) - min(X, Y) --> zext X < Y + // Replacing a sub and at least one min with an icmp + // and a zext is a potential improvement. + if (match(Op0, m_c_SMin(m_NSWAddLike(m_Value(X), m_One()), m_Value(Y))) && + match(Op1, m_c_SMin(m_Specific(X), m_Specific(Y))) && + I.getType()->getScalarSizeInBits() != 1 && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *Cond = Builder.CreateICmpSLT(X, Y); + return new ZExtInst(Cond, I.getType()); + } + if (match(Op0, m_c_UMin(m_NUWAddLike(m_Value(X), m_One()), m_Value(Y))) && + match(Op1, m_c_UMin(m_Specific(X), m_Specific(Y))) && + I.getType()->getScalarSizeInBits() != 1 && + (Op0->hasOneUse() || Op1->hasOneUse())) { + Value *Cond = Builder.CreateICmpULT(X, Y); + return new ZExtInst(Cond, I.getType()); + } + // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". Value *LHSOp, *RHSOp; diff --git a/llvm/test/Transforms/InstCombine/min-zext.ll b/llvm/test/Transforms/InstCombine/min-zext.ll new file mode 100644 index 0000000000000..f016d1a8de524 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/min-zext.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i32 @test_smin(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_smin( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = add nsw i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_umin(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_umin( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1) + %v1 = add nuw i32 %arg0, 1 + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i1 @test_smin_i1(i1 %arg0, i1 %arg1) { +; CHECK-LABEL: define i1 @test_smin_i1( +; CHECK-SAME: i1 [[ARG0:%.*]], i1 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = or i1 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = xor i1 [[V0]], true +; CHECK-NEXT: ret i1 [[V3]] +; + %v0 = tail call i1 @llvm.smin.i1(i1 %arg0, i1 %arg1) + %v1 = add nsw i1 %arg0, 1 + %v2 = tail call i1 @llvm.smin.i1(i1 %v1, i1 %arg1) + %v3 = sub i1 %v2, %v0 + ret i1 %v3 +} + +declare void @use(i2) + +define i2 @test_smin_use_operands(i2 %arg0, i2 %arg1) { +; CHECK-LABEL: define i2 @test_smin_use_operands( +; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i2 @llvm.smin.i2(i2 [[ARG0]], i2 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i2 [[V2]], [[V0]] +; CHECK-NEXT: call void @use(i2 [[V2]]) +; CHECK-NEXT: call void @use(i2 [[V0]]) +; CHECK-NEXT: ret i2 [[V3]] +; + %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1) + %v1 = add nsw i2 %arg0, 1 + %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1) + %v3 = sub i2 %v2, %v0 + call void @use(i2 %v2) + call void @use(i2 %v0) + ret i2 %v3 +} + +define i2 @test_smin_use_operand(i2 %arg0, i2 %arg1) { +; CHECK-LABEL: define i2 @test_smin_use_operand( +; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = add nsw i2 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]]) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i2 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i2 +; CHECK-NEXT: call void @use(i2 [[V2]]) +; CHECK-NEXT: ret i2 [[V3]] +; + %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1) + %v1 = add nsw i2 %arg0, 1 + %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1) + %v3 = sub i2 %v2, %v0 + call void @use(i2 %v2) + ret i2 %v3 +} + +define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_smin_missing_nsw( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = add i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_umin_missing_nuw(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_umin_missing_nuw( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ARG0]], i32 [[ARG1]]) +; CHECK-NEXT: [[V1:%.*]] = add i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1) + %v1 = add i32 %arg0, 1 + %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +@tmp = external global i32 + +define i32 @test_mismatched_operands(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_mismatched_operands( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr @tmp, align 4 +; CHECK-NEXT: [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[TMP]]) +; CHECK-NEXT: [[V1:%.*]] = add nsw i32 [[ARG0]], 1 +; CHECK-NEXT: [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]]) +; CHECK-NEXT: [[V3:%.*]] = sub i32 [[V2]], [[V0]] +; CHECK-NEXT: ret i32 [[V3]] +; + %tmp = load i32, ptr @tmp, align 4 + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %tmp) + %v1 = add nsw i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +} + +define i32 @test_disjoint_or(i32 %arg0, i32 %arg1) { +; CHECK-LABEL: define i32 @test_disjoint_or( +; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]] +; CHECK-NEXT: [[V3:%.*]] = zext i1 [[TMP1]] to i32 +; CHECK-NEXT: ret i32 [[V3]] +; + %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1) + %v1 = or disjoint i32 %arg0, 1 + %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1) + %v3 = sub i32 %v2, %v0 + ret i32 %v3 +}