diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1c9d43ce4c062..97510a3091f69 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4066,9 +4066,12 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &dl, unsigned vectorWidth) { EVT VT = Vec.getValueType(); EVT ElVT = VT.getVectorElementType(); - unsigned Factor = VT.getSizeInBits() / vectorWidth; - EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, - VT.getVectorNumElements() / Factor); + unsigned ResultNumElts = + (VT.getVectorNumElements() * vectorWidth) / VT.getSizeInBits(); + EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, ResultNumElts); + + assert(ResultVT.getSizeInBits() == vectorWidth && + "Illegal subvector extraction"); // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); diff --git a/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll b/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll new file mode 100644 index 0000000000000..d699b1a182845 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-extract-subvector-non-pow2-elems.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; Ensure assertion is not hit when folding concat of two contiguous extract_subvector operations +; from a source with a non-power-of-two vector length. +; RUN: llc -mtriple=x86_64 -mattr=+avx2 < %s | FileCheck %s + +define void @foo(ptr %pDst) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; CHECK-NEXT: vmovups %ymm0, 16(%rdi) +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq +entry: + %0 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> + %1 = shufflevector <12 x float> zeroinitializer, <12 x float> zeroinitializer, <4 x i32> + %2 = getelementptr i8, ptr %pDst, i64 16 + %3 = getelementptr i8, ptr %pDst, i64 32 + store <4 x float> %0, ptr %2, align 1 + store <4 x float> %1, ptr %3, align 1 + ret void +}