@@ -150,59 +150,32 @@ define <4 x i32> @insert_v4i32_01x3(<4 x i32> %a) {
150150define <8 x i32 > @insert_v8i32_x12345x7 (<8 x i32 > %a ) {
151151; SSE2-LABEL: insert_v8i32_x12345x7:
152152; SSE2: # %bb.0:
153- ; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
154- ; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
155- ; SSE2-NEXT: movl $-1, %eax
156- ; SSE2-NEXT: movd %eax, %xmm2
157- ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
158- ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
153+ ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
154+ ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
159155; SSE2-NEXT: retq
160156;
161157; SSE3-LABEL: insert_v8i32_x12345x7:
162158; SSE3: # %bb.0:
163- ; SSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
164- ; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
165- ; SSE3-NEXT: movl $-1, %eax
166- ; SSE3-NEXT: movd %eax, %xmm2
167- ; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
168- ; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
159+ ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
160+ ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
169161; SSE3-NEXT: retq
170162;
171163; SSSE3-LABEL: insert_v8i32_x12345x7:
172164; SSSE3: # %bb.0:
173- ; SSSE3-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
174- ; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3]
175- ; SSSE3-NEXT: movl $-1, %eax
176- ; SSSE3-NEXT: movd %eax, %xmm2
177- ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0]
178- ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
165+ ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
166+ ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
179167; SSSE3-NEXT: retq
180168;
181169; SSE41-LABEL: insert_v8i32_x12345x7:
182170; SSE41: # %bb.0:
183- ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
184- ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3,4,5,6,7]
185- ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
171+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
172+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
186173; SSE41-NEXT: retq
187174;
188- ; AVX1-LABEL: insert_v8i32_x12345x7:
189- ; AVX1: # %bb.0:
190- ; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
191- ; AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1
192- ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
193- ; AVX1-NEXT: retq
194- ;
195- ; AVX2-LABEL: insert_v8i32_x12345x7:
196- ; AVX2: # %bb.0:
197- ; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
198- ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
199- ; AVX2-NEXT: retq
200- ;
201- ; AVX512-LABEL: insert_v8i32_x12345x7:
202- ; AVX512: # %bb.0:
203- ; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
204- ; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7]
205- ; AVX512-NEXT: retq
175+ ; AVX-LABEL: insert_v8i32_x12345x7:
176+ ; AVX: # %bb.0:
177+ ; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
178+ ; AVX-NEXT: retq
206179 %1 = insertelement <8 x i32 > %a , i32 -1 , i32 0
207180 %2 = insertelement <8 x i32 > %1 , i32 -1 , i32 6
208181 ret <8 x i32 > %2
@@ -211,35 +184,27 @@ define <8 x i32> @insert_v8i32_x12345x7(<8 x i32> %a) {
211184define <8 x i16 > @insert_v8i16_x12345x7 (<8 x i16 > %a ) {
212185; SSE2-LABEL: insert_v8i16_x12345x7:
213186; SSE2: # %bb.0:
214- ; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
215- ; SSE2-NEXT: pinsrw $0, %eax, %xmm0
216- ; SSE2-NEXT: pinsrw $6, %eax, %xmm0
187+ ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
217188; SSE2-NEXT: retq
218189;
219190; SSE3-LABEL: insert_v8i16_x12345x7:
220191; SSE3: # %bb.0:
221- ; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
222- ; SSE3-NEXT: pinsrw $0, %eax, %xmm0
223- ; SSE3-NEXT: pinsrw $6, %eax, %xmm0
192+ ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224193; SSE3-NEXT: retq
225194;
226195; SSSE3-LABEL: insert_v8i16_x12345x7:
227196; SSSE3: # %bb.0:
228- ; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
229- ; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
230- ; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
197+ ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
231198; SSSE3-NEXT: retq
232199;
233200; SSE41-LABEL: insert_v8i16_x12345x7:
234201; SSE41: # %bb.0:
235- ; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
236- ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
202+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
237203; SSE41-NEXT: retq
238204;
239205; AVX-LABEL: insert_v8i16_x12345x7:
240206; AVX: # %bb.0:
241- ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
242- ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
207+ ; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
243208; AVX-NEXT: retq
244209 %1 = insertelement <8 x i16 > %a , i16 -1 , i32 0
245210 %2 = insertelement <8 x i16 > %1 , i16 -1 , i32 6
@@ -249,60 +214,32 @@ define <8 x i16> @insert_v8i16_x12345x7(<8 x i16> %a) {
249214define <16 x i16 > @insert_v16i16_x12345x789ABCDEx (<16 x i16 > %a ) {
250215; SSE2-LABEL: insert_v16i16_x12345x789ABCDEx:
251216; SSE2: # %bb.0:
252- ; SSE2-NEXT: movl $65535, %eax # imm = 0xFFFF
253- ; SSE2-NEXT: pinsrw $0, %eax, %xmm0
254- ; SSE2-NEXT: pinsrw $6, %eax, %xmm0
255- ; SSE2-NEXT: pinsrw $7, %eax, %xmm1
217+ ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
218+ ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
256219; SSE2-NEXT: retq
257220;
258221; SSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
259222; SSE3: # %bb.0:
260- ; SSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
261- ; SSE3-NEXT: pinsrw $0, %eax, %xmm0
262- ; SSE3-NEXT: pinsrw $6, %eax, %xmm0
263- ; SSE3-NEXT: pinsrw $7, %eax, %xmm1
223+ ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
224+ ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
264225; SSE3-NEXT: retq
265226;
266227; SSSE3-LABEL: insert_v16i16_x12345x789ABCDEx:
267228; SSSE3: # %bb.0:
268- ; SSSE3-NEXT: movl $65535, %eax # imm = 0xFFFF
269- ; SSSE3-NEXT: pinsrw $0, %eax, %xmm0
270- ; SSSE3-NEXT: pinsrw $6, %eax, %xmm0
271- ; SSSE3-NEXT: pinsrw $7, %eax, %xmm1
229+ ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230+ ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
272231; SSSE3-NEXT: retq
273232;
274233; SSE41-LABEL: insert_v16i16_x12345x789ABCDEx:
275234; SSE41: # %bb.0:
276- ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2
277- ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7]
278- ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7]
235+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
236+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
279237; SSE41-NEXT: retq
280238;
281- ; AVX1-LABEL: insert_v16i16_x12345x789ABCDEx:
282- ; AVX1: # %bb.0:
283- ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [65535,0,0,0]
284- ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
285- ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
286- ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
287- ; AVX1-NEXT: retq
288- ;
289- ; AVX2-LABEL: insert_v16i16_x12345x789ABCDEx:
290- ; AVX2: # %bb.0:
291- ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
292- ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
293- ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
294- ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
295- ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
296- ; AVX2-NEXT: retq
297- ;
298- ; AVX512-LABEL: insert_v16i16_x12345x789ABCDEx:
299- ; AVX512: # %bb.0:
300- ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
301- ; AVX512-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7]
302- ; AVX512-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
303- ; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15]
304- ; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
305- ; AVX512-NEXT: retq
239+ ; AVX-LABEL: insert_v16i16_x12345x789ABCDEx:
240+ ; AVX: # %bb.0:
241+ ; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
242+ ; AVX-NEXT: retq
306243 %1 = insertelement <16 x i16 > %a , i16 -1 , i32 0
307244 %2 = insertelement <16 x i16 > %1 , i16 -1 , i32 6
308245 %3 = insertelement <16 x i16 > %2 , i16 -1 , i32 15
@@ -313,33 +250,26 @@ define <16 x i8> @insert_v16i8_x123456789ABCDEx(<16 x i8> %a) {
313250; SSE2-LABEL: insert_v16i8_x123456789ABCDEx:
314251; SSE2: # %bb.0:
315252; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
316- ; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
317253; SSE2-NEXT: retq
318254;
319255; SSE3-LABEL: insert_v16i8_x123456789ABCDEx:
320256; SSE3: # %bb.0:
321257; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
322- ; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
323258; SSE3-NEXT: retq
324259;
325260; SSSE3-LABEL: insert_v16i8_x123456789ABCDEx:
326261; SSSE3: # %bb.0:
327262; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
328- ; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
329263; SSSE3-NEXT: retq
330264;
331265; SSE41-LABEL: insert_v16i8_x123456789ABCDEx:
332266; SSE41: # %bb.0:
333- ; SSE41-NEXT: movl $255, %eax
334- ; SSE41-NEXT: pinsrb $0, %eax, %xmm0
335- ; SSE41-NEXT: pinsrb $15, %eax, %xmm0
267+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
336268; SSE41-NEXT: retq
337269;
338270; AVX-LABEL: insert_v16i8_x123456789ABCDEx:
339271; AVX: # %bb.0:
340- ; AVX-NEXT: movl $255, %eax
341- ; AVX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
342- ; AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
272+ ; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
343273; AVX-NEXT: retq
344274 %1 = insertelement <16 x i8 > %a , i8 -1 , i32 0
345275 %2 = insertelement <16 x i8 > %1 , i8 -1 , i32 15
@@ -350,69 +280,31 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
350280; SSE2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
351281; SSE2: # %bb.0:
352282; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
353- ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
354- ; SSE2-NEXT: orps %xmm2, %xmm0
355283; SSE2-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
356- ; SSE2-NEXT: orps %xmm2, %xmm1
357284; SSE2-NEXT: retq
358285;
359286; SSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
360287; SSE3: # %bb.0:
361288; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
362- ; SSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
363- ; SSE3-NEXT: orps %xmm2, %xmm0
364289; SSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
365- ; SSE3-NEXT: orps %xmm2, %xmm1
366290; SSE3-NEXT: retq
367291;
368292; SSSE3-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
369293; SSSE3: # %bb.0:
370294; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
371- ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255]
372- ; SSSE3-NEXT: orps %xmm2, %xmm0
373295; SSSE3-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
374- ; SSSE3-NEXT: orps %xmm2, %xmm1
375296; SSSE3-NEXT: retq
376297;
377298; SSE41-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
378299; SSE41: # %bb.0:
379- ; SSE41-NEXT: movl $255, %eax
380- ; SSE41-NEXT: pinsrb $0, %eax, %xmm0
381- ; SSE41-NEXT: pinsrb $15, %eax, %xmm0
382- ; SSE41-NEXT: pinsrb $14, %eax, %xmm1
383- ; SSE41-NEXT: pinsrb $15, %eax, %xmm1
300+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
301+ ; SSE41-NEXT: orps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
384302; SSE41-NEXT: retq
385303;
386- ; AVX1-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
387- ; AVX1: # %bb.0:
388- ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [255,0,0,0]
389- ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
390- ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
391- ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
392- ; AVX1-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
393- ; AVX1-NEXT: retq
394- ;
395- ; AVX2-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
396- ; AVX2: # %bb.0:
397- ; AVX2-NEXT: movl $255, %eax
398- ; AVX2-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
399- ; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
400- ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
401- ; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
402- ; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
403- ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
404- ; AVX2-NEXT: retq
405- ;
406- ; AVX512-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
407- ; AVX512: # %bb.0:
408- ; AVX512-NEXT: movl $255, %eax
409- ; AVX512-NEXT: vpinsrb $0, %eax, %xmm0, %xmm1
410- ; AVX512-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
411- ; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
412- ; AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0
413- ; AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
414- ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
415- ; AVX512-NEXT: retq
304+ ; AVX-LABEL: insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx:
305+ ; AVX: # %bb.0:
306+ ; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
307+ ; AVX-NEXT: retq
416308 %1 = insertelement <32 x i8 > %a , i8 -1 , i32 0
417309 %2 = insertelement <32 x i8 > %1 , i8 -1 , i32 15
418310 %3 = insertelement <32 x i8 > %2 , i8 -1 , i32 30
0 commit comments