From edded5af5494adfc53187719fa3f3b0be7a4a20e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 18 Oct 2024 13:44:57 -0700 Subject: [PATCH 1/2] [SLP][NFC]Add a test with the incorrect casting of the abs argument, NFC (cherry picked from commit 825f9cb1b31aa91d23eba803003897490de74a20) --- .../abs-overflow-incorrect-minbws.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll new file mode 100644 index 0000000000000..a936b076138d0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s + +define i32 @test(i32 %n) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 +; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[RES1]] +; +entry: + %n1 = add i32 %n, 1 + %zn1 = zext nneg i32 %n1 to i64 + %m1 = mul nuw nsw i64 %zn1, 273837369 + %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true) + %t1 = trunc i64 %a1 to i32 + %n2 = add i32 %n, 2 + %zn2 = zext nneg i32 %n2 to i64 + %m2 = mul nuw nsw i64 %zn2, 273837369 + %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true) + %t2 = trunc i64 %a2 to i32 + %res1 = add i32 %t1, %t2 + ret i32 %res1 +} From 9f72c9837c553063ab0cbacc1a472a73c0ec2a4b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 18 Oct 2024 13:54:30 -0700 Subject: [PATCH 2/2] [SLP]Check that operand of abs does not overflow before making it part of minbitwidth transformation Need to check that the operand of the abs intrinsic can be safely truncated before making it part of the minbitwidth transformation. Fixes #112577 (cherry picked from commit 709abacdc350d63c61888607edb28ce272daa0a0) --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 ++++++++++++++++ .../abs-overflow-incorrect-minbws.ll | 6 ++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab2b96cdc42db..746ba51a981fe 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote( MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL))); }); }; + auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + unsigned SignBits = OrigBitWidth - BitWidth; + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1); + unsigned Op0SignBits = + ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + return SignBits <= Op0SignBits && + ((SignBits != Op0SignBits && + !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) || + MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL))); + }); + }; if (ID != Intrinsic::abs) { Operands.push_back(getOperandEntry(&E, 1)); CallChecker = CompChecker; + } else { + CallChecker = AbsChecker; } InstructionCost BestCost = std::numeric_limits::max(); diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll index a936b076138d0..51b635837d3b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -8,8 +8,10 @@ define i32 @test(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]