Skip to content

Commit 574dee3

Browse files
RKSimongithub-actions[bot]
authored andcommitted
Automerge: [X86] vector-shuffle-combining-ssse3.ll - add tests showing the failure to merge logical shifts with non-uniform shift amounts into shuffles
2 parents 3394ec9 + 9f94e36 commit 574dee3

File tree

1 file changed

+69
-0
lines changed

1 file changed

+69
-0
lines changed

llvm/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,75 @@ define <16 x i8> @combine_and_pshufb_or_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
763763
ret <16 x i8> %4
764764
}
765765

766+
define <16 x i8> @combine_lshr_pshufb(<4 x i32> %a0) {
767+
; SSE-LABEL: combine_lshr_pshufb:
768+
; SSE: # %bb.0:
769+
; SSE-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
770+
; SSE-NEXT: retq
771+
;
772+
; AVX1-LABEL: combine_lshr_pshufb:
773+
; AVX1: # %bb.0:
774+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[3,5,6,7,4,10,11],zero,xmm0[9,14,15],zero,zero
775+
; AVX1-NEXT: retq
776+
;
777+
; AVX2-LABEL: combine_lshr_pshufb:
778+
; AVX2: # %bb.0:
779+
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
780+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
781+
; AVX2-NEXT: retq
782+
;
783+
; AVX512F-LABEL: combine_lshr_pshufb:
784+
; AVX512F: # %bb.0:
785+
; AVX512F-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
786+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
787+
; AVX512F-NEXT: retq
788+
%shr = lshr <4 x i32> %a0, <i32 24, i32 0, i32 8, i32 16>
789+
%bc = bitcast <4 x i32> %shr to <16 x i8>
790+
%shuffle = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4, i32 9, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
791+
ret <16 x i8> %shuffle
792+
}
793+
794+
define <16 x i8> @combine_shl_pshufb(<4 x i32> %a0) {
795+
; SSSE3-LABEL: combine_shl_pshufb:
796+
; SSSE3: # %bb.0:
797+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
798+
; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
799+
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
800+
; SSSE3-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
801+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
802+
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
803+
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
804+
; SSSE3-NEXT: retq
805+
;
806+
; SSE41-LABEL: combine_shl_pshufb:
807+
; SSE41: # %bb.0:
808+
; SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
809+
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
810+
; SSE41-NEXT: retq
811+
;
812+
; AVX1-LABEL: combine_shl_pshufb:
813+
; AVX1: # %bb.0:
814+
; AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
815+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
816+
; AVX1-NEXT: retq
817+
;
818+
; AVX2-LABEL: combine_shl_pshufb:
819+
; AVX2: # %bb.0:
820+
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
821+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
822+
; AVX2-NEXT: retq
823+
;
824+
; AVX512F-LABEL: combine_shl_pshufb:
825+
; AVX512F: # %bb.0:
826+
; AVX512F-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
827+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,2,3,0,5,6,7,4,9,10,11,8,12,13,14,15]
828+
; AVX512F-NEXT: retq
829+
%shr = shl <4 x i32> %a0, <i32 0, i32 8, i32 16, i32 16>
830+
%bc = bitcast <4 x i32> %shr to <16 x i8>
831+
%shuffle = shufflevector <16 x i8> %bc, <16 x i8> poison, <16 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4, i32 9, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
832+
ret <16 x i8> %shuffle
833+
}
834+
766835
define <16 x i8> @constant_fold_pshufb() {
767836
; SSE-LABEL: constant_fold_pshufb:
768837
; SSE: # %bb.0:

0 commit comments

Comments
 (0)