diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index f4b378b82daec..32170e3705b81 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -19,7 +19,9 @@ #include "llvm/Analysis/ValueLattice.h" #include "llvm/Analysis/ValueLatticeUtils.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/NoFolder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" @@ -245,11 +247,43 @@ static Value *simplifyInstruction(SCCPSolver &Solver, const APInt *RHSC; // Remove masking operations. if (match(&Inst, m_And(m_Value(X), m_LowBitMask(RHSC)))) { - ConstantRange LRange = GetRange(Inst.getOperand(0)); + ConstantRange LRange = GetRange(X); if (LRange.getUnsignedMax().ule(*RHSC)) return X; } + // Check if we can simplify [us]cmp(X, Y) to X - Y. + if (auto *Cmp = dyn_cast(&Inst)) { + Value *LHS = Cmp->getOperand(0); + Value *RHS = Cmp->getOperand(1); + unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); + // Bail out on 1-bit comparisons. + if (BitWidth == 1) + return nullptr; + ConstantRange LRange = GetRange(LHS); + if (LRange.isSizeLargerThan(3)) + return nullptr; + ConstantRange RRange = GetRange(RHS); + if (RRange.isSizeLargerThan(3)) + return nullptr; + ConstantRange RHSLower = RRange.sub(APInt(BitWidth, 1)); + ConstantRange RHSUpper = RRange.add(APInt(BitWidth, 1)); + ICmpInst::Predicate Pred = + Cmp->isSigned() ? CmpInst::ICMP_SLE : CmpInst::ICMP_ULE; + if (!RHSLower.icmp(Pred, LRange) || !LRange.icmp(Pred, RHSUpper)) + return nullptr; + + IRBuilder Builder(&Inst); + Value *Sub = Builder.CreateSub(LHS, RHS, Inst.getName(), /*HasNUW=*/false, + /*HasNSW=*/Cmp->isSigned()); + InsertedValues.insert(Sub); + if (Sub->getType() != Inst.getType()) { + Sub = Builder.CreateSExtOrTrunc(Sub, Inst.getType()); + InsertedValues.insert(Sub); + } + return Sub; + } + return nullptr; } diff --git a/llvm/test/Transforms/SCCP/uscmp.ll b/llvm/test/Transforms/SCCP/uscmp.ll new file mode 100644 index 0000000000000..d010c0632c809 --- /dev/null +++ b/llvm/test/Transforms/SCCP/uscmp.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=sccp -S < %s | FileCheck %s + +define i32 @scmp_to_sub(i32 range(i32 -1, 2) %a) { +; CHECK-LABEL: define i32 @scmp_to_sub( +; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[A]], 0 +; CHECK-NEXT: ret i32 [[SCMP]] +; + %scmp = call i32 @llvm.scmp(i32 %a, i32 0) + ret i32 %scmp +} + +define i32 @scmp_zext_to_sub(i1 %a, i1 %b) { +; CHECK-LABEL: define i32 @scmp_zext_to_sub( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[ZEXT_A:%.*]] = zext i1 [[A]] to i32 +; CHECK-NEXT: [[ZEXT_B:%.*]] = zext i1 [[B]] to i32 +; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[ZEXT_A]], [[ZEXT_B]] +; CHECK-NEXT: ret i32 [[SCMP]] +; + %zext_a = zext i1 %a to i32 + %zext_b = zext i1 %b to i32 + %scmp = call i32 @llvm.scmp(i32 %zext_a, i32 %zext_b) + ret i32 %scmp +} + +define i8 @scmp_to_sub_trunc(i32 range(i32 -1, 2) %a) { +; CHECK-LABEL: define i8 @scmp_to_sub_trunc( +; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP1:%.*]] = sub nsw i32 [[A]], 0 +; CHECK-NEXT: [[SCMP:%.*]] = trunc i32 [[SCMP1]] to i8 +; CHECK-NEXT: ret i8 [[SCMP]] +; + %scmp = call i8 @llvm.scmp(i32 %a, i32 0) + ret i8 %scmp +} + +define i64 @scmp_to_sub_sext(i32 range(i32 -1, 2) %a) { +; CHECK-LABEL: define i64 @scmp_to_sub_sext( +; CHECK-SAME: i32 range(i32 -1, 2) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP1:%.*]] = sub nsw i32 [[A]], 0 +; CHECK-NEXT: [[SCMP:%.*]] = sext i32 [[SCMP1]] to i64 +; CHECK-NEXT: ret i64 [[SCMP]] +; + %scmp = call i64 @llvm.scmp(i32 %a, i32 0) + ret i64 %scmp +} + +define i32 @scmp_to_sub_small_range(i32 range(i32 -1, 1) %a) { +; CHECK-LABEL: define i32 @scmp_to_sub_small_range( +; CHECK-SAME: i32 range(i32 -1, 1) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = sub nsw i32 [[A]], 0 +; CHECK-NEXT: ret i32 [[SCMP]] +; + %scmp = call i32 @llvm.scmp(i32 %a, i32 0) + ret i32 %scmp +} + +define i32 @ucmp_to_sub(i32 range(i32 0, 3) %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub( +; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = sub i32 [[A]], 1 +; CHECK-NEXT: ret i32 [[SCMP]] +; + %ucmp = call i32 @llvm.ucmp(i32 %a, i32 1) + ret i32 %ucmp +} + +define i8 @ucmp_to_sub_trunc(i32 range(i32 0, 3) %a) { +; CHECK-LABEL: define i8 @ucmp_to_sub_trunc( +; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) { +; CHECK-NEXT: [[UCMP1:%.*]] = sub i32 [[A]], 1 +; CHECK-NEXT: [[UCMP:%.*]] = trunc i32 [[UCMP1]] to i8 +; CHECK-NEXT: ret i8 [[UCMP]] +; + %ucmp = call i8 @llvm.ucmp(i32 %a, i32 1) + ret i8 %ucmp +} + +define i64 @ucmp_to_sub_sext(i32 range(i32 0, 3) %a) { +; CHECK-LABEL: define i64 @ucmp_to_sub_sext( +; CHECK-SAME: i32 range(i32 0, 3) [[A:%.*]]) { +; CHECK-NEXT: [[UCMP1:%.*]] = sub i32 [[A]], 1 +; CHECK-NEXT: [[UCMP:%.*]] = sext i32 [[UCMP1]] to i64 +; CHECK-NEXT: ret i64 [[UCMP]] +; + %ucmp = call i64 @llvm.ucmp(i32 %a, i32 1) + ret i64 %ucmp +} + +; TODO: we can fold this into %a. +define i32 @ucmp_to_sub_small_range(i32 range(i32 0, 2) %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub_small_range( +; CHECK-SAME: i32 range(i32 0, 2) [[A:%.*]]) { +; CHECK-NEXT: [[UCMP:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[UCMP]] +; + %ucmp = call i32 @llvm.ucmp(i32 %a, i32 0) + ret i32 %ucmp +} + +define i32 @scmp_to_sub_large_range(i32 range(i32 -1, 3) %a) { +; CHECK-LABEL: define i32 @scmp_to_sub_large_range( +; CHECK-SAME: i32 range(i32 -1, 3) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %scmp = call i32 @llvm.scmp(i32 %a, i32 0) + ret i32 %scmp +} + +define i32 @ucmp_to_sub_large_range(i32 range(i32 -1, 3) %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub_large_range( +; CHECK-SAME: i32 range(i32 -1, 3) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i32(i32 [[A]], i32 0) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %ucmp = call i32 @llvm.ucmp(i32 %a, i32 0) + ret i32 %ucmp +} + +define i32 @scmp_to_sub_wrap(i8 range(i8 127, -126) %a) { +; CHECK-LABEL: define i32 @scmp_to_sub_wrap( +; CHECK-SAME: i8 range(i8 127, -126) [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i8(i8 [[A]], i8 -128) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %scmp = call i32 @llvm.scmp(i8 %a, i8 -128) + ret i32 %scmp +} + +define i32 @ucmp_to_sub_wrap(i8 range(i8 -1, 2) %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub_wrap( +; CHECK-SAME: i8 range(i8 -1, 2) [[A:%.*]]) { +; CHECK-NEXT: [[UCMP:%.*]] = call i32 @llvm.ucmp.i32.i8(i8 [[A]], i8 0) +; CHECK-NEXT: ret i32 [[UCMP]] +; + %ucmp = call i32 @llvm.ucmp(i8 %a, i8 0) + ret i32 %ucmp +} + +; It is incorrect to convert a ucmp into sub when the input type is i1. +define i32 @ucmp_to_sub_i1_rhs_const(i1 %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub_i1_rhs_const( +; CHECK-SAME: i1 [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 [[A]], i1 false) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %ucmp = call i32 @llvm.ucmp(i1 %a, i1 false) + ret i32 %ucmp +} + +; It is incorrect to convert a ucmp into sub when the input type is i1. +define i32 @ucmp_to_sub_i1_lhs_const(i1 %a) { +; CHECK-LABEL: define i32 @ucmp_to_sub_i1_lhs_const( +; CHECK-SAME: i1 [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 false, i1 [[A]]) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %ucmp = call i32 @llvm.ucmp(i1 false, i1 %a) + ret i32 %ucmp +} + +; It is incorrect to convert a ucmp into sub when the input type is i1. +define i32 @ucmp_to_sub_i1(i1 %a, i1 %b) { +; CHECK-LABEL: define i32 @ucmp_to_sub_i1( +; CHECK-SAME: i1 [[A:%.*]], i1 [[B:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.ucmp.i32.i1(i1 [[A]], i1 [[B]]) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %ucmp = call i32 @llvm.ucmp(i1 %a, i1 %b) + ret i32 %ucmp +} + +; It is incorrect to convert a scmp into sub when the input type is i1. +define i32 @scmp_to_sub_i1_rhs_const(i1 %a) { +; CHECK-LABEL: define i32 @scmp_to_sub_i1_rhs_const( +; CHECK-SAME: i1 [[A:%.*]]) { +; CHECK-NEXT: [[SCMP:%.*]] = call i32 @llvm.scmp.i32.i1(i1 [[A]], i1 false) +; CHECK-NEXT: ret i32 [[SCMP]] +; + %scmp = call i32 @llvm.scmp(i1 %a, i1 false) + ret i32 %scmp +}