diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a2200f283168d..61c4df68799c6 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6449,14 +6449,11 @@ void BoUpSLP::reorderTopToBottom() { assert(SLPReVec && "Only supported by REVEC."); // ShuffleVectorInst does not do reorderOperands (and it should not // because ShuffleVectorInst supports only a limited set of - // patterns). Only do reorderNodeWithReuses if all of the users are - // not ShuffleVectorInst. - if (isa(TE->UserTreeIndex.UserTE->getMainOp())) + // patterns). Only do reorderNodeWithReuses if the user is not + // ShuffleVectorInst. + if (TE->UserTreeIndex && TE->UserTreeIndex.UserTE->hasState() && + isa(TE->UserTreeIndex.UserTE->getMainOp())) continue; - assert((!TE->UserTreeIndex || - !isa( - TE->UserTreeIndex.UserTE->getMainOp())) && - "Does not know how to reorder."); } // Update ordering of the operands with the smaller VF than the given // one. diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll new file mode 100644 index 0000000000000..bdd5971c0c91a --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-reduced-value-replace-extractelement.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer -slp-threshold=-99999 < %s -mtriple=x86_64-unknown-linux-gnu -slp-revec | FileCheck %s + +define void @test() { +; CHECK-LABEL: define void @test() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 0 to i32 +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = call i8 @llvm.vector.reduce.mul.v4i8(<4 x i8> zeroinitializer) +; CHECK-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 +; CHECK-NEXT: [[OP_RDX:%.*]] = mul i32 [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[OP_RDX1:%.*]] = mul i32 [[OP_RDX]], [[TRUNC]] +; CHECK-NEXT: [[TMP4]] = insertelement <2 x i32> , i32 [[OP_RDX1]], i32 1 +; CHECK-NEXT: br label %[[BB1]] +; +bb: + br label %bb1 + +bb1: + %phi = phi i32 [ 0, %bb ], [ %mul9, %bb1 ] + %phi2 = phi i32 [ 0, %bb ], [ 0, %bb1 ] + %trunc = trunc i64 0 to i32 + %mul = mul i32 0, %trunc + %mul3 = mul i32 %trunc, %phi + %mul4 = mul i32 %mul3, %mul + %mul5 = mul i32 %mul4, %mul + %trunc6 = trunc i64 0 to i32 + %mul7 = mul i32 0, %trunc6 + %mul8 = mul i32 %mul5, %mul7 + %mul9 = mul i32 %mul8, %mul7 + br label %bb1 +}