@@ -262,54 +262,37 @@ define <4 x float> @merge_4f32_f32_45zz(ptr %ptr) nounwind uwtable noinline ssp
262262define <4 x float > @merge_4f32_f32_012u (ptr %ptr ) nounwind uwtable noinline ssp {
263263; SSE2-LABEL: merge_4f32_f32_012u:
264264; SSE2: # %bb.0:
265- ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
266265; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
267- ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
268- ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
269- ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
270- ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
266+ ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
267+ ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271268; SSE2-NEXT: retq
272269;
273270; SSE41-LABEL: merge_4f32_f32_012u:
274271; SSE41: # %bb.0:
275- ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
276- ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
277- ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
278- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
279- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
280- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
272+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
273+ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
281274; SSE41-NEXT: retq
282275;
283276; AVX-LABEL: merge_4f32_f32_012u:
284277; AVX: # %bb.0:
285- ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
286- ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
287- ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
288- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
289- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
290- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
278+ ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
279+ ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
291280; AVX-NEXT: retq
292281;
293282; X86-SSE1-LABEL: merge_4f32_f32_012u:
294283; X86-SSE1: # %bb.0:
295284; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
296- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
285+ ; X86-SSE1-NEXT: xorps %xmm0, %xmm0
286+ ; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
297287; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
298- ; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
299- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
300- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
301- ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
288+ ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302289; X86-SSE1-NEXT: retl
303290;
304291; X86-SSE41-LABEL: merge_4f32_f32_012u:
305292; X86-SSE41: # %bb.0:
306293; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
307- ; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
308- ; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
309- ; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
310- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
311- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
312- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
294+ ; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
295+ ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
313296; X86-SSE41-NEXT: retl
314297 %ptr1 = getelementptr inbounds float , ptr %ptr , i64 1
315298 %ptr2 = getelementptr inbounds float , ptr %ptr , i64 2
@@ -326,54 +309,37 @@ define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp
326309define <4 x float > @merge_4f32_f32_019u (ptr %ptr ) nounwind uwtable noinline ssp {
327310; SSE2-LABEL: merge_4f32_f32_019u:
328311; SSE2: # %bb.0:
329- ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330312; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
331- ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
332- ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
333- ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334- ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
313+ ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
314+ ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
335315; SSE2-NEXT: retq
336316;
337317; SSE41-LABEL: merge_4f32_f32_019u:
338318; SSE41: # %bb.0:
339- ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
340- ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
341- ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
342- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
343- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
344- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
319+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
320+ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
345321; SSE41-NEXT: retq
346322;
347323; AVX-LABEL: merge_4f32_f32_019u:
348324; AVX: # %bb.0:
349- ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
350- ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
351- ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
352- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
353- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
354- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
325+ ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
326+ ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
355327; AVX-NEXT: retq
356328;
357329; X86-SSE1-LABEL: merge_4f32_f32_019u:
358330; X86-SSE1: # %bb.0:
359331; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
360- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
332+ ; X86-SSE1-NEXT: xorps %xmm0, %xmm0
333+ ; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
361334; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362- ; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
363- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
364- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
365- ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
335+ ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366336; X86-SSE1-NEXT: retl
367337;
368338; X86-SSE41-LABEL: merge_4f32_f32_019u:
369339; X86-SSE41: # %bb.0:
370340; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
371- ; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
372- ; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
373- ; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
374- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
375- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
376- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
341+ ; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
342+ ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
377343; X86-SSE41-NEXT: retl
378344 %ptr1 = getelementptr inbounds float , ptr %ptr , i64 1
379345 %ptr2 = getelementptr inbounds float , ptr %ptr , i64 9
0 commit comments