Skip to content

Commit 0f4fca8

Browse files
committed
[X86] canCreateUndefOrPoisonForTargetNode/isGuaranteedNotToBeUndefOrPoisonForTargetNode - add X86ISD::VPERMV handling
X86ISD::PSHUFB shuffles can't create undef/poison itself, allowing us to fold freeze(vpermps(x,y)) -> vpermps(freeze(x),freeze(y))
1 parent c731291 commit 0f4fca8

File tree

2 files changed

+5
-13
lines changed

2 files changed

+5
-13
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45189,6 +45189,7 @@ bool X86TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
4518945189
case X86ISD::UNPCKL:
4519045190
case X86ISD::UNPCKH:
4519145191
case X86ISD::VPERMILPI:
45192+
case X86ISD::VPERMV:
4519245193
case X86ISD::VPERMV3: {
4519345194
SmallVector<int, 8> Mask;
4519445195
SmallVector<SDValue, 2> Ops;
@@ -45255,6 +45256,7 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
4525545256
case X86ISD::UNPCKL:
4525645257
case X86ISD::UNPCKH:
4525745258
case X86ISD::VPERMILPI:
45259+
case X86ISD::VPERMV:
4525845260
case X86ISD::VPERMV3:
4525945261
return false;
4526045262
// SSE comparisons handle all icmp/fcmp cases.

llvm/test/CodeGen/X86/vector-shuffle-combining-avx2.ll

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -934,19 +934,9 @@ entry:
934934
}
935935

936936
define <8 x float> @freeze_permps(<8 x float> %a0) {
937-
; AVX2-LABEL: freeze_permps:
938-
; AVX2: # %bb.0:
939-
; AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
940-
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
941-
; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0
942-
; AVX2-NEXT: ret{{[l|q]}}
943-
;
944-
; AVX512-LABEL: freeze_permps:
945-
; AVX512: # %bb.0:
946-
; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
947-
; AVX512-NEXT: vpermps %ymm0, %ymm1, %ymm0
948-
; AVX512-NEXT: vpermps %ymm0, %ymm1, %ymm0
949-
; AVX512-NEXT: ret{{[l|q]}}
937+
; CHECK-LABEL: freeze_permps:
938+
; CHECK: # %bb.0:
939+
; CHECK-NEXT: ret{{[l|q]}}
950940
%s0 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
951941
%f0 = freeze <8 x float> %s0
952942
%s1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %f0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)

0 commit comments

Comments
 (0)