diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4454eb3e34d98..bf7bc570000dd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2386,6 +2386,9 @@ class BoUpSLP { /// the whole vector (it is mixed with constants or loop invariant values). /// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow. bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) { + // Small number of loads - try load matching. + if (isa(Op) && getNumLanes() == 2 && getNumOperands() == 2) + return false; bool OpAPO = getData(OpIdx, Lane).APO; bool IsInvariant = L && L->isLoopInvariant(Op); unsigned Cnt = 0; @@ -2511,23 +2514,23 @@ class BoUpSLP { Value *OpLane0 = getValue(OpIdx, FirstLane); // Keep track if we have instructions with all the same opcode on one // side. - if (isa(OpLane0)) - ReorderingModes[OpIdx] = ReorderingMode::Load; - else if (auto *OpILane0 = dyn_cast(OpLane0)) { + if (auto *OpILane0 = dyn_cast(OpLane0)) { // Check if OpLane0 should be broadcast. if (shouldBroadcast(OpLane0, OpIdx, FirstLane) || !canBeVectorized(OpILane0, OpIdx, FirstLane)) ReorderingModes[OpIdx] = ReorderingMode::Splat; + else if (isa(OpILane0)) + ReorderingModes[OpIdx] = ReorderingMode::Load; else ReorderingModes[OpIdx] = ReorderingMode::Opcode; - } else if (isa(OpLane0)) + } else if (isa(OpLane0)) { ReorderingModes[OpIdx] = ReorderingMode::Constant; - else if (isa(OpLane0)) + } else if (isa(OpLane0)) { // Our best hope is a Splat. It may save some cost in some cases. ReorderingModes[OpIdx] = ReorderingMode::Splat; - else - // NOTE: This should be unreachable. - ReorderingModes[OpIdx] = ReorderingMode::Failed; + } else { + llvm_unreachable("Unexpected value kind."); + } } // Check that we don't have same operands. No need to reorder if operands diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll index 928cbe3655411..1e7cc9c268cfa 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll @@ -8,19 +8,13 @@ define fastcc void @rephase(ptr %phases_in, ptr %157, i64 %158) { ; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[IMAG_247:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 408 -; CHECK-NEXT: [[MUL35_248:%.*]] = fmul double [[TMP2]], 0.000000e+00 -; CHECK-NEXT: store double [[MUL35_248]], ptr [[IMAG_247]], align 8 -; CHECK-NEXT: [[ARRAYIDX23_1_249:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 416 -; CHECK-NEXT: [[MUL_1_250:%.*]] = fmul double [[TMP2]], 0.000000e+00 -; CHECK-NEXT: store double [[MUL_1_250]], ptr [[ARRAYIDX23_1_249]], align 8 ; CHECK-NEXT: [[IMAG_1_251:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 424 -; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[IMAG_1_251]], align 8 -; CHECK-NEXT: [[MUL35_1_252:%.*]] = fmul double [[TMP2]], [[TMP3]] -; CHECK-NEXT: store double [[MUL35_1_252]], ptr [[IMAG_1_251]], align 8 -; CHECK-NEXT: [[ARRAYIDX23_2_253:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 432 -; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX23_2_253]], align 8 -; CHECK-NEXT: [[MUL_2_254:%.*]] = fmul double [[TMP2]], [[TMP4]] -; CHECK-NEXT: store double [[MUL_2_254]], ptr [[ARRAYIDX23_2_253]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IMAG_1_251]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> , <2 x double> [[TMP3]], i64 2) +; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP5]], [[TMP6]] +; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[IMAG_247]], align 8 ; CHECK-NEXT: store double [[TMP2]], ptr [[PHASES_IN]], align 8 ; CHECK-NEXT: ret void ;