diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 989090b80e1c8..5f7aa53034248 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -504,25 +504,26 @@ void llvm::processShuffleMasks( unsigned SzSrc = Sz / NumOfSrcRegs; for (unsigned I = 0; I < NumOfDestRegs; ++I) { auto &RegMasks = Res[I]; - RegMasks.assign(NumOfSrcRegs, {}); + RegMasks.assign(2 * NumOfSrcRegs, {}); // Check that the values in dest registers are in the one src // register. for (unsigned K = 0; K < SzDest; ++K) { int Idx = I * SzDest + K; if (Idx == Sz) break; - if (Mask[Idx] >= Sz || Mask[Idx] == PoisonMaskElem) + if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem) continue; - int SrcRegIdx = Mask[Idx] / SzSrc; + int MaskIdx = Mask[Idx] % Sz; + int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0); // Add a cost of PermuteTwoSrc for each new source register permute, // if we have more than one source registers. if (RegMasks[SrcRegIdx].empty()) RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem); - RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc; + RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc; } } // Process split mask. - for (unsigned I = 0; I < NumOfUsedRegs; ++I) { + for (unsigned I : seq(NumOfUsedRegs)) { auto &Dest = Res[I]; int NumSrcRegs = count_if(Dest, [](ArrayRef Mask) { return !Mask.empty(); }); @@ -567,7 +568,7 @@ void llvm::processShuffleMasks( int FirstIdx = -1; SecondIdx = -1; MutableArrayRef FirstMask, SecondMask; - for (unsigned I = 0; I < NumOfDestRegs; ++I) { + for (unsigned I : seq(2 * NumOfSrcRegs)) { SmallVectorImpl &RegMask = Dest[I]; if (RegMask.empty()) continue; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll index 5d629022c148f..39c935fff6b76 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-codesize.ll @@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll index 3d743c17715e2..2a89924dc7780 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-latency.ll @@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll index 53262d8e4f564..848e7b4e611a7 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat-sizelatency.ll @@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll index 6913c753f36fa..4c6d1ccd5ca34 100644 --- a/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splat.ll @@ -483,7 +483,7 @@ define void @test_upper_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a ; SSE-LABEL: 'test_upper_vXf32' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ;