diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a2200f283168d..b73229d23e1fc 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -795,7 +795,7 @@ isFixedVectorShuffle(ArrayRef VL, SmallVectorImpl &Mask, } /// \returns True if Extract{Value,Element} instruction extracts element Idx. -static std::optional getExtractIndex(Instruction *E) { +static std::optional getExtractIndex(const Instruction *E) { unsigned Opcode = E->getOpcode(); assert((Opcode == Instruction::ExtractElement || Opcode == Instruction::ExtractValue) && @@ -22690,8 +22690,38 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S && !S.isAltShuffle()) + if (S && !S.isAltShuffle()) { + const auto *E1 = dyn_cast(I1); + const auto *E2 = dyn_cast(I2); + if (!E1 || !E2) + continue; + + // Sort on ExtractElementInsts primarily by vector operands. Prefer + // program order of the vector operands. + const auto *V1 = dyn_cast(E1->getVectorOperand()); + const auto *V2 = dyn_cast(E2->getVectorOperand()); + if (V1 != V2) { + if (!V1 || !V2) + continue; + if (V1->getParent() != V2->getParent()) + continue; + return V1->comesBefore(V2); + } + // If we have the same vector operand, try to sort by constant + // index. + std::optional Id1 = getExtractIndex(E1); + std::optional Id2 = getExtractIndex(E2); + // Bring constants to the top + if (Id1 && !Id2) + return true; + if (!Id1 && Id2) + return false; + // First elements come first. + if (Id1 && Id2) + return *Id1 < *Id2; + continue; + } return I1->getOpcode() < I2->getOpcode(); } if (I1) diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll index 67ef5ba3732f4..c585a7f08ad0c 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/extract-ordering.ll @@ -6,59 +6,51 @@ define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr ; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] { ; GFX9-NEXT: [[ENTRY:.*]]: ; GFX9-NEXT: [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16 -; GFX9-NEXT: [[EL0:%.*]] = extractelement <8 x i16> [[VEC1]], i64 0 -; GFX9-NEXT: [[EL3:%.*]] = extractelement <8 x i16> [[VEC1]], i64 3 ; GFX9-NEXT: [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16 -; GFX9-NEXT: [[BEL0:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 0 -; GFX9-NEXT: [[BEL3:%.*]] = extractelement <8 x i16> [[BVEC1]], i64 3 -; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> -; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: br label %[[DO_BODY:.*]] ; GFX9: [[DO_BODY]]: -; GFX9-NEXT: [[A_THREAD_BUF3:%.*]] = phi i16 [ [[EL3]], %[[ENTRY]] ], [ [[NEWEL3:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[B_THREAD_BUF3:%.*]] = phi i16 [ [[BEL3]], %[[ENTRY]] ], [ [[BNEWEL3:%.*]], %[[DO_BODY]] ] ; GFX9-NEXT: [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[A_THREAD_BUF0:%.*]] = phi i16 [ [[EL0]], %[[ENTRY]] ], [ [[NEWEL0:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[B_THREAD_BUF0:%.*]] = phi i16 [ [[BEL0]], %[[ENTRY]] ], [ [[BNEWEL0:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP25:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP31:%.*]], %[[DO_BODY]] ] ; GFX9-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ] ; GFX9-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ] +; GFX9-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ] ; GFX9-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ] ; GFX9-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ] -; GFX9-NEXT: [[A_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[A_THREAD_BUF0]], i64 0 +; GFX9-NEXT: [[TMP16:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <8 x i32> ; GFX9-NEXT: [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> -; GFX9-NEXT: [[TMP16:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC0]], <8 x i16> [[TMP17]], <8 x i32> -; GFX9-NEXT: [[A_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP16]], i16 [[A_THREAD_BUF3]], i64 3 +; GFX9-NEXT: [[TMP18:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> [[TMP9]], <8 x i32> ; GFX9-NEXT: [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> -; GFX9-NEXT: [[TMP20:%.*]] = shufflevector <8 x i16> [[A_THREAD_VEC3]], <8 x i16> [[TMP19]], <8 x i32> +; GFX9-NEXT: [[TMP20:%.*]] = shufflevector <8 x i16> [[TMP18]], <8 x i16> [[TMP19]], <8 x i32> ; GFX9-NEXT: [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> ; GFX9-NEXT: [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> -; GFX9-NEXT: [[B_THREAD_VEC0:%.*]] = insertelement <8 x i16> poison, i16 [[B_THREAD_BUF0]], i64 0 +; GFX9-NEXT: [[TMP23:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <8 x i32> ; GFX9-NEXT: [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> -; GFX9-NEXT: [[TMP23:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC0]], <8 x i16> [[TMP24]], <8 x i32> -; GFX9-NEXT: [[B_THREAD_VEC3:%.*]] = insertelement <8 x i16> [[TMP23]], i16 [[B_THREAD_BUF3]], i64 3 +; GFX9-NEXT: [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> [[TMP13]], <8 x i32> ; GFX9-NEXT: [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> -; GFX9-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[B_THREAD_VEC3]], <8 x i16> [[TMP26]], <8 x i32> +; GFX9-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP25]], <8 x i16> [[TMP26]], <8 x i32> ; GFX9-NEXT: [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> ; GFX9-NEXT: [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> ; GFX9-NEXT: [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]] ; GFX9-NEXT: [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16 -; GFX9-NEXT: [[NEWEL0]] = extractelement <8 x i16> [[VEC2]], i64 0 -; GFX9-NEXT: [[NEWEL3]] = extractelement <8 x i16> [[VEC2]], i64 3 ; GFX9-NEXT: [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16 -; GFX9-NEXT: [[BNEWEL0]] = extractelement <8 x i16> [[BVEC2]], i64 0 -; GFX9-NEXT: [[BNEWEL3]] = extractelement <8 x i16> [[BVEC2]], i64 3 ; GFX9-NEXT: [[NEWADD]] = add i32 [[ADD]], 1 ; GFX9-NEXT: [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]] -; GFX9-NEXT: [[TMP25]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP30]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP31]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> -; GFX9-NEXT: [[TMP30]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP34]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> +; GFX9-NEXT: [[TMP35]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> ; GFX9-NEXT: br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]] diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll index 3b63c1e35610f..a3a4ab948519f 100644 --- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll +++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll @@ -49,19 +49,19 @@ bb1: define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) { ; CHECK-LABEL: @phis_reverse( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> ; CHECK-NEXT: br label [[BB1]] ; CHECK: bb1: -; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[BB0]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP2]], [[ENTRY]] ], [ [[TMP9]], [[BB0]] ] ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x half> [[TMP8]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll index dd7a21198ac1f..651f565412830 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll @@ -141,7 +141,7 @@ define ptr @test4() { ; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer ; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> -; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> +; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> ; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0) ; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2) ; POWEROF2-NEXT: br label [[TMP8:%.*]] @@ -156,10 +156,10 @@ define ptr @test4() { ; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer ; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2) ; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]] -; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 -; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]] ; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0 -; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00 +; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP18]] +; POWEROF2-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[TMP9]], i32 1 +; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP30]], 0.000000e+00 ; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0 ; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]] ; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0