Skip to content

Commit be653f6

Browse files
RKSimontstellar
authored andcommitted
[X86] combineX86ShuffleChain - don't fold to truncate(concat(V1,V2)) if it was already a PACK op
Fixes #55050 (cherry picked from commit e8305c0)
1 parent 5252880 commit be653f6

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37558,7 +37558,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
3755837558
(RootVT.is128BitVector() && Subtarget.hasVLX())) &&
3755937559
(MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
3756037560
isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
37561-
if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE)
37561+
// Bail if this was already a truncation or PACK node.
37562+
// We sometimes fail to match PACK if we demand known undef elements.
37563+
if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE ||
37564+
Root.getOpcode() == X86ISD::PACKSS ||
37565+
Root.getOpcode() == X86ISD::PACKUS))
3756237566
return SDValue(); // Nothing to do!
3756337567
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
3756437568
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);

llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,47 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) {
174174
%sel = select <8 x i1> %mask, <8 x i32> %shl, <8 x i32> zeroinitializer
175175
ret <8 x i32> %sel
176176
}
177+
178+
define i64 @PR55050() {
179+
; X86-LABEL: PR55050:
180+
; X86: # %bb.0: # %entry
181+
; X86-NEXT: xorl %eax, %eax
182+
; X86-NEXT: testb %al, %al
183+
; X86-NEXT: jne .LBB10_2
184+
; X86-NEXT: # %bb.1: # %if
185+
; X86-NEXT: xorl %eax, %eax
186+
; X86-NEXT: .LBB10_2: # %exit
187+
; X86-NEXT: movl %eax, %edx
188+
; X86-NEXT: retl
189+
;
190+
; X64-LABEL: PR55050:
191+
; X64: # %bb.0: # %entry
192+
; X64-NEXT: xorl %eax, %eax
193+
; X64-NEXT: testb %al, %al
194+
; X64-NEXT: xorl %eax, %eax
195+
; X64-NEXT: retq
196+
entry:
197+
%i275 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
198+
%i277 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
199+
br i1 undef, label %exit, label %if
200+
201+
if:
202+
%i298 = bitcast <2 x i64> %i275 to <4 x i32>
203+
%i299 = bitcast <2 x i64> %i277 to <4 x i32>
204+
%i300 = shufflevector <4 x i32> %i298, <4 x i32> %i299, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
205+
%i339 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %i300, <4 x i32> undef)
206+
%i354 = shufflevector <8 x i16> %i339, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef>
207+
%i356 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %i354, <8 x i16> undef)
208+
%i357 = shufflevector <16 x i8> %i356, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 5, i32 4, i32 16, i32 2, i32 1, i32 0, i32 16, i32 10, i32 9, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16>
209+
%i361 = extractelement <16 x i8> %i357, i64 8
210+
%i360 = and i8 %i361, 63
211+
%i379 = zext i8 %i360 to i64
212+
br label %exit
213+
214+
exit:
215+
%res = phi i64 [ %i379, %if ], [ 0, %entry ]
216+
ret i64 %res
217+
}
218+
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>)
219+
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
220+
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)

0 commit comments

Comments
 (0)