Skip to content

Commit da17f95

Browse files
RKSimonLukacma
authored andcommitted
[DAG] combineTruncationShuffle - ensure the *_EXTEND_VECTOR_INREG node didn't come from a smaller type (llvm#164160)
The *_EXTEND_VECTOR_INREG source vector must be the same size as the destination We already have a similar TODO to handle more types. Fixes llvm#164107
1 parent 035d558 commit da17f95

File tree

2 files changed

+48
-0
lines changed

2 files changed

+48
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26876,6 +26876,8 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
2687626876
// TODO: handle more extension/truncation cases as cases arise.
2687726877
if (EltSizeInBits != ExtSrcSizeInBits)
2687826878
return SDValue();
26879+
if (VT.getSizeInBits() != N00.getValueSizeInBits())
26880+
return SDValue();
2687926881

2688026882
// We can remove *extend_vector_inreg only if the truncation happens at
2688126883
// the same scale as the extension.

llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -624,6 +624,52 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n
624624
ret void
625625
}
626626

627+
define i32 @PR164107(<16 x i1> %0) {
628+
; AVX1-LABEL: PR164107:
629+
; AVX1: # %bb.0:
630+
; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0
631+
; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0
632+
; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0
633+
; AVX1-NEXT: vmovd %xmm0, %eax
634+
; AVX1-NEXT: ret{{[l|q]}}
635+
;
636+
; AVX2-LABEL: PR164107:
637+
; AVX2: # %bb.0:
638+
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
639+
; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0
640+
; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0
641+
; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
642+
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
643+
; AVX2-NEXT: vmovd %xmm0, %eax
644+
; AVX2-NEXT: vzeroupper
645+
; AVX2-NEXT: ret{{[l|q]}}
646+
;
647+
; AVX512-LABEL: PR164107:
648+
; AVX512: # %bb.0:
649+
; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0
650+
; AVX512-NEXT: vpslld $31, %zmm0, %zmm0
651+
; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1
652+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
653+
; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0
654+
; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1
655+
; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1
656+
; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0
657+
; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0
658+
; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0
659+
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
660+
; AVX512-NEXT: vmovd %xmm0, %eax
661+
; AVX512-NEXT: vzeroupper
662+
; AVX512-NEXT: ret{{[l|q]}}
663+
%cmp = shufflevector <16 x i1> %0, <16 x i1> zeroinitializer, <16 x i32> zeroinitializer
664+
%sext = sext <16 x i1> %cmp to <16 x i64>
665+
%bc.1 = bitcast <16 x i64> %sext to <64 x i16>
666+
%vecinit15.i = shufflevector <64 x i16> %bc.1, <64 x i16> zeroinitializer, <16 x i32> <i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
667+
%conv16.i = sext <16 x i16> %vecinit15.i to <16 x i64>
668+
%bc.2 = bitcast <16 x i64> %conv16.i to <32 x i32>
669+
%conv22.i = extractelement <32 x i32> %bc.2, i64 4
670+
ret i32 %conv22.i
671+
}
672+
627673
define <4 x i64> @concat_self_v4i64(<2 x i64> %x) {
628674
; AVX1-LABEL: concat_self_v4i64:
629675
; AVX1: # %bb.0:

0 commit comments

Comments
 (0)