-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[InstCombine]: Replace overflow calculation with intrinsic #168195
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: None (kper) ChangesReplaces the manual overflow calculation with Closes #162717 Full diff: https://github.com/llvm/llvm-project/pull/168195.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fba1ccf2c8c9b..5c433277a8f7d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,6 +12,7 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -22,12 +23,14 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/Utils/Local.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CmpPredicate.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -7161,6 +7164,66 @@ Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
return nullptr;
}
+/// Fold icmp(ult, sub(add(sext(X), Cst1), sext(Y)), Cst2) -->
+/// extract(__builtin_ssub_overflow(X, Y), 1)
+Instruction *InstCombinerImpl::foldICmpsToSignedSubOverflow(Instruction &I) {
+ CmpPredicate Pred;
+ ConstantInt *Cst1, *Cst2;
+ Value *X, *Y;
+
+ /*
+ This transformation detects the pattern used to check for
+ a signed subtraction overflow.
+
+ The matched sequence performs the following steps:
+
+ 1. X = sext(x)
+ Y = sext(y)
+ // Sign-extend 32-bit operands to 64 bits.
+
+ 2. Shifted = X + Cst1
+ // Shift the signed range [-2^31, 2^31-1] by adding the minimum
+ // signed value (Cst1 = INT_MIN), producing an unsigned range
+ // [0, 2^32).
+
+ 3. Sub = Shifted - Y
+ // Compute the shifted subtraction result.
+
+ 4. icmp ult Sub, Cst2
+ // Check whether the result fits in [0, 2^32).
+ // If not, the subtraction overflowed.
+ */
+
+ auto SExtX = m_SExt(m_Value(X));
+ auto SExtY = m_SExt(m_Value(Y));
+ auto Shifted = m_Add(SExtX, m_ConstantInt(Cst1));
+ auto Sub = m_Sub(Shifted, SExtY);
+
+ if (!match(&I, m_ICmp(Pred, Sub, m_ConstantInt(Cst2))) ||
+ Pred != CmpInst::ICMP_ULT)
+ return nullptr;
+
+ const auto SignedMin =
+ APInt::getSignedMinValue(X->getType()->getScalarSizeInBits());
+ const auto ExpectedRange = SignedMin.getSExtValue() << 1;
+
+ // Cst1 must equal to SignedMin
+ // Cst2 must equal to ExpectedRange
+ if (SignedMin.getSExtValue() != Cst1->getValue().getSExtValue() ||
+ ExpectedRange != Cst2->getValue().getSExtValue())
+ return nullptr;
+
+ Module *M = I.getModule();
+ Function *F = Intrinsic::getOrInsertDeclaration(
+ M, Intrinsic::ssub_with_overflow, X->getType());
+
+ Builder.SetInsertPoint(&I);
+ auto *Call = Builder.CreateCall(F, {X, Y});
+ auto *Extract = Builder.CreateExtractValue(Call, 1);
+
+ return replaceInstUsesWith(I, Extract);
+}
+
/// If we have an icmp le or icmp ge instruction with a constant operand, turn
/// it into the appropriate icmp lt or icmp gt instruction. This transform
/// allows them to be folded in visitICmpInst.
@@ -7970,6 +8033,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
}
}
+ if (Instruction *R = foldICmpsToSignedSubOverflow(I))
+ return R;
+
return Changed ? &I : nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9bdd8cb71f7f3..5791932450711 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -791,6 +791,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
Instruction *foldICmpWithTrunc(ICmpInst &Cmp);
Instruction *foldICmpCommutative(CmpPredicate Pred, Value *Op0, Value *Op1,
ICmpInst &CxtI);
+ Instruction *foldICmpsToSignedSubOverflow(Instruction &I);
// Helpers of visitSelectInst().
Instruction *foldSelectOfBools(SelectInst &SI);
diff --git a/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll
new file mode 100644
index 0000000000000..1d22781db16f2
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-fold-ssub-overflow.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Fold
+;
+; int result = x - y;
+; return !(INT_MIN <= result && result <= INT_MAX);
+;
+; into
+;
+; __builtin_ssub_overflow(x, y)
+
+define i1 @idiomatic_check_sub_i16(i16 %x, i16 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %3 = sext i16 %x to i32
+ %4 = sext i16 %y to i32
+ %5 = add nsw i32 %3, -32768
+ %6 = sub nsw i32 %5, %4
+ %7 = icmp ult i32 %6, -65536
+ ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i16_no_flags(i16 %x, i16 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i16_no_flags(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 [[X:%.*]], i16 [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i16, i1 } [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %3 = sext i16 %x to i32
+ %4 = sext i16 %y to i32
+ %5 = add i32 %3, -32768
+ %6 = sub i32 %5, %4
+ %7 = icmp ult i32 %6, -65536
+ ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32(
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %3 = sext i32 %x to i64
+ %4 = sext i32 %y to i64
+ %5 = add nsw i64 %3, -2147483648
+ %6 = sub nsw i64 %5, %4
+ %7 = icmp ult i64 %6, -4294967296
+ ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_1(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_1(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i64 [[TMP3]], -4294967296
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %3 = sext i32 %x to i64
+ %4 = sext i32 %y to i64
+ %5 = add nsw i64 %3, 0 ; Constant wrong
+ %6 = sub nsw i64 %5, %4
+ %7 = icmp ult i64 %6, -4294967296
+ ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_2(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_2(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], -4294967295
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %3 = sext i32 %x to i64
+ %4 = sext i32 %y to i64
+ %5 = add nsw i64 %3, -2147483648
+ %6 = sub nsw i64 %5, %4
+ %7 = icmp ult i64 %6, -4294967295 ; Constant wrong
+ ret i1 %7
+}
+
+define i1 @idiomatic_check_sub_i32_negative_test_3(i32 %x, i32 %y) {
+; CHECK-LABEL: @idiomatic_check_sub_i32_negative_test_3(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[X:%.*]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[Y:%.*]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = add nsw i64 [[TMP1]], -2147483648
+; CHECK-NEXT: [[TMP4:%.*]] = sub nsw i64 [[TMP3]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i64 [[TMP4]], -4294967296
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %3 = sext i32 %x to i64
+ %4 = sext i32 %y to i64
+ %5 = add nsw i64 %3, -2147483648
+ %6 = sub nsw i64 %5, %4
+ %7 = icmp slt i64 %6, -4294967296 ; wrong Condition
+ ret i1 %7
+}
|
| if (SignedMin.getSExtValue() != Cst1->getValue().getSExtValue() || | ||
| ExpectedRange != Cst2->getValue().getSExtValue()) | ||
| return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I use getSExtValue because the variables have different bitwidths and I wasn't able to compare them directly.
Replaces the manual overflow calculation with
ssub_with_overflowintrinsic.alive: https://alive2.llvm.org/ce/z/NUUUhw
Closes #162717