Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/lib/Target/X86/X86TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1775,7 +1775,7 @@ InstructionCost X86TTIImpl::getShuffleCost(
}

// For 2-input shuffles, we must account for splitting the 2 inputs into many.
if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) {
if (Kind == TTI::SK_PermuteTwoSrc && !IsInLaneShuffle && LT.first != 1) {
// We assume that source and destination have the same vector type.
InstructionCost NumOfDests = LT.first;
InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1;
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/Analysis/CostModel/X86/shuffle-transpose-codesize.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/Analysis/CostModel/X86/shuffle-transpose-latency.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/Analysis/CostModel/X86/shuffle-transpose-sizelatency.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/Analysis/CostModel/X86/shuffle-transpose.ll

Large diffs are not rendered by default.

20 changes: 7 additions & 13 deletions llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,11 @@
; RUN: opt -mtriple=x86_64-- -mcpu=x86-64-v3 -passes="default<O3>" -S < %s | FileCheck %s --check-prefixes=AVX,AVX2

define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
; SSE2-LABEL: @PR94546(
; SSE2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
; SSE2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
; SSE2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
; SSE2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
; SSE2-NEXT: ret <4 x double> [[TMP4]]
;
; SSE4-LABEL: @PR94546(
; SSE4-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
; SSE4-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
; SSE4-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; SSE4-NEXT: ret <4 x double> [[TMP3]]
; SSE-LABEL: @PR94546(
; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
; SSE-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
; SSE-NEXT: ret <4 x double> [[TMP3]]
;
; AVX-LABEL: @PR94546(
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
Expand Down Expand Up @@ -50,4 +43,5 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX1: {{.*}}
; AVX2: {{.*}}
; SSE: {{.*}}
; SSE2: {{.*}}
; SSE4: {{.*}}