diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b62c8f1631ff7..cdff33cbcc6e7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10657,10 +10657,11 @@ class InstructionsCompatibilityAnalysis { /// Checks if the opcode is supported as the main opcode for copyable /// elements. static bool isSupportedOpcode(const unsigned Opcode) { - return Opcode == Instruction::Add || Opcode == Instruction::LShr || - Opcode == Instruction::Shl || Opcode == Instruction::SDiv || - Opcode == Instruction::UDiv || Opcode == Instruction::And || - Opcode == Instruction::Or || Opcode == Instruction::Xor; + return Opcode == Instruction::Add || Opcode == Instruction::Sub || + Opcode == Instruction::LShr || Opcode == Instruction::Shl || + Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || + Opcode == Instruction::And || Opcode == Instruction::Or || + Opcode == Instruction::Xor; } /// Identifies the best candidate value, which represents main opcode @@ -10720,8 +10721,12 @@ class InstructionsCompatibilityAnalysis { for (const auto &P : Candidates) { if (P.second.size() < BestOpcodeNum) continue; + // If have inner dependencies - skip. + if (any_of(P.second, + [&](Instruction *I) { return Operands.contains(I); })) + continue; for (Instruction *I : P.second) { - if (IsSupportedInstruction(I, AnyUndef) && !Operands.contains(I)) { + if (IsSupportedInstruction(I, AnyUndef)) { MainOp = I; BestOpcodeNum = P.second.size(); break; @@ -10981,6 +10986,7 @@ class InstructionsCompatibilityAnalysis { getWidenedType(S.getMainOp()->getType(), VL.size()); switch (MainOpcode) { case Instruction::Add: + case Instruction::Sub: case Instruction::LShr: case Instruction::Shl: case Instruction::SDiv: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll index 55f2b238c07df..24899900ebb3a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll @@ -4,15 +4,8 @@ define i8 @test() { ; CHECK-LABEL: define i8 @test() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> , i16 [[SUB_I_I79_PEEL_I]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]] -; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i16> [[TMP3]], [[TMP0]] -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i16> [[TMP4]], [[TMP0]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 -; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8 +; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i16> , +; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 false to i8 ; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll index 7c8cb02f28c63..60e13d0b4cb6a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/parent-node-non-schedulable.ll @@ -6,12 +6,12 @@ define void @test(ptr %0, i64 %1, i64 %2, i1 %3, i64 %4, i64 %5) { ; CHECK-SAME: ptr [[TMP0:%.*]], i64 [[TMP1:%.*]], i64 [[TMP2:%.*]], i1 [[TMP3:%.*]], i64 [[TMP4:%.*]], i64 [[TMP5:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i32 240 ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP0]], i32 128 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i64> , i64 [[TMP2]], i32 3 ; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i64> [[TMP10]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr null, align 4 ; CHECK-NEXT: [[TMP15:%.*]] = load <2 x i64>, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP13]], <2 x i64> [[TMP15]], <6 x i32> ; CHECK-NEXT: [[TMP17:%.*]] = insertelement <6 x i64> poison, i64 [[TMP14]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll index 3e0a3741d6bbc..2a0e7889f0f34 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -183,7 +183,7 @@ define void @addsub1(ptr noalias %dst, ptr noalias %src) { ; CHECK-LABEL: @addsub1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> [[TMP0]], ; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 ; CHECK-NEXT: ret void ;