@@ -98,6 +98,61 @@ define <16 x i8> @combine_shuffle_vrotli_v4i32(<4 x i32> %a0) {
9898}
9999declare <4 x i32 > @llvm.fshl.v4i32 (<4 x i32 >, <4 x i32 >, <4 x i32 >)
100100
101+ define <16 x i16 > @concat2_permw_v8i16 (<8 x i16 > %x , <8 x i16 > %y ) nounwind {
102+ ; CHECK-LABEL: concat2_permw_v8i16:
103+ ; CHECK: # %bb.0:
104+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
105+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
106+ ; CHECK-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
107+ ; CHECK-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
108+ ; CHECK-NEXT: ret{{[l|q]}}
109+ %lo = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %x , <8 x i16 > <i16 7 , i16 0 , i16 6 , i16 1 , i16 5 , i16 2 , i16 4 , i16 3 >)
110+ %hi = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %y , <8 x i16 > <i16 5 , i16 2 , i16 4 , i16 3 , i16 7 , i16 0 , i16 6 , i16 1 >)
111+ %res = shufflevector <8 x i16 > %lo , <8 x i16 > %hi , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
112+ ret <16 x i16 > %res
113+ }
114+
115+ define <32 x i16 > @concat4_permw_v8i16 (<8 x i16 > %x , <8 x i16 > %y , <8 x i16 > %z , <8 x i16 > %w ) nounwind {
116+ ; X86-LABEL: concat4_permw_v8i16:
117+ ; X86: # %bb.0:
118+ ; X86-NEXT: pushl %ebp
119+ ; X86-NEXT: movl %esp, %ebp
120+ ; X86-NEXT: andl $-16, %esp
121+ ; X86-NEXT: subl $16, %esp
122+ ; X86-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
123+ ; X86-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
124+ ; X86-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
125+ ; X86-NEXT: vmovdqa 8(%ebp), %xmm3
126+ ; X86-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
127+ ; X86-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
128+ ; X86-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
129+ ; X86-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
130+ ; X86-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
131+ ; X86-NEXT: movl %ebp, %esp
132+ ; X86-NEXT: popl %ebp
133+ ; X86-NEXT: retl
134+ ;
135+ ; X64-LABEL: concat4_permw_v8i16:
136+ ; X64: # %bb.0:
137+ ; X64-NEXT: # kill: def $xmm3 killed $xmm3 def $ymm3
138+ ; X64-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
139+ ; X64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
140+ ; X64-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
141+ ; X64-NEXT: vpmovsxbw {{.*#+}} ymm4 = [6,1,7,0,4,3,5,2,20,19,21,18,22,17,23,16]
142+ ; X64-NEXT: vpermi2w %ymm3, %ymm2, %ymm4
143+ ; X64-NEXT: vpmovsxbw {{.*#+}} ymm2 = [7,0,6,1,5,2,4,3,21,18,20,19,23,16,22,17]
144+ ; X64-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
145+ ; X64-NEXT: vinserti64x4 $1, %ymm4, %zmm2, %zmm0
146+ ; X64-NEXT: retq
147+ %px = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %x , <8 x i16 > <i16 7 , i16 0 , i16 6 , i16 1 , i16 5 , i16 2 , i16 4 , i16 3 >)
148+ %py = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %y , <8 x i16 > <i16 5 , i16 2 , i16 4 , i16 3 , i16 7 , i16 0 , i16 6 , i16 1 >)
149+ %pz = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %z , <8 x i16 > <i16 6 , i16 1 , i16 7 , i16 0 , i16 4 , i16 3 , i16 5 , i16 2 >)
150+ %pw = tail call <8 x i16 > @llvm.x86.avx512.permvar.hi.128 (<8 x i16 > %w , <8 x i16 > <i16 4 , i16 3 , i16 5 , i16 2 , i16 6 , i16 1 , i16 7 , i16 0 >)
151+ %lo = shufflevector <8 x i16 > %px , <8 x i16 > %py , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
152+ %hi = shufflevector <8 x i16 > %pz , <8 x i16 > %pw , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
153+ %res = shufflevector <16 x i16 > %lo , <16 x i16 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
154+ ret <32 x i16 > %res
155+ }
101156
102157define <8 x i32 > @concat_vrotli_v4i32 (<4 x i32 > %a0 , <4 x i32 > %a1 ) {
103158; CHECK-LABEL: concat_vrotli_v4i32:
@@ -204,11 +259,11 @@ define i64 @PR55050() {
204259; X86-NEXT: xorl %edx, %edx
205260; X86-NEXT: xorl %eax, %eax
206261; X86-NEXT: testb %dl, %dl
207- ; X86-NEXT: jne .LBB12_2
262+ ; X86-NEXT: jne .LBB14_2
208263; X86-NEXT: # %bb.1: # %if
209264; X86-NEXT: xorl %eax, %eax
210265; X86-NEXT: xorl %edx, %edx
211- ; X86-NEXT: .LBB12_2 : # %exit
266+ ; X86-NEXT: .LBB14_2 : # %exit
212267; X86-NEXT: retl
213268;
214269; X64-LABEL: PR55050:
0 commit comments