@@ -342,42 +342,50 @@ define void @freeze_buildvector_single_repeated_maybe_poison_operand(ptr %origin
342342define void @freeze_two_frozen_buildvectors (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
343343; X86-LABEL: freeze_two_frozen_buildvectors:
344344; X86: # %bb.0:
345+ ; X86-NEXT: pushl %esi
345346; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
346347; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
347348; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
349+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
350+ ; X86-NEXT: movl (%esi), %esi
351+ ; X86-NEXT: andl $15, %esi
348352; X86-NEXT: movl (%edx), %edx
349353; X86-NEXT: andl $15, %edx
350- ; X86-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0
351- ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
352- ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
354+ ; X86-NEXT: vmovd %esi, %xmm0
355+ ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
356+ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
357+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
358+ ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
359+ ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
353360; X86-NEXT: vmovdqa %xmm0, (%ecx)
354361; X86-NEXT: vmovd %edx, %xmm0
355362; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
356- ; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
357- ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
358- ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
363+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
364+ ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
359365; X86-NEXT: vmovdqa %xmm0, (%eax)
366+ ; X86-NEXT: popl %esi
360367; X86-NEXT: retl
361368;
362369; X64-LABEL: freeze_two_frozen_buildvectors:
363370; X64: # %bb.0:
364- ; X64-NEXT: movl (%rdi), %eax
365- ; X64-NEXT: andl $15, %eax
366- ; X64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
367- ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [7,7,7,7]
368- ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
371+ ; X64-NEXT: movl (%rsi), %eax
372+ ; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
373+ ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
374+ ; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1
375+ ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
376+ ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
377+ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
369378; X64-NEXT: vmovdqa %xmm0, (%rdx)
370379; X64-NEXT: vmovd %eax, %xmm0
371380; X64-NEXT: vpbroadcastd %xmm0, %xmm0
372- ; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
373- ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
374- ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
381+ ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2],xmm1[3]
382+ ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
375383; X64-NEXT: vmovdqa %xmm0, (%rcx)
376384; X64-NEXT: retq
377385 %i0.src = load i32 , ptr %origin0
378386 %i0 = and i32 %i0.src , 15
379387 %i1.src = load i32 , ptr %origin1
380- %i1 = and i32 %i0 .src , 15
388+ %i1 = and i32 %i1 .src , 15
381389 %i2 = insertelement <4 x i32 > poison, i32 %i0 , i64 1
382390 %i3 = and <4 x i32 > %i2 , <i32 7 , i32 7 , i32 7 , i32 7 >
383391 %i4 = freeze <4 x i32 > %i3
@@ -392,41 +400,43 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
392400define void @freeze_two_buildvectors_only_one_frozen (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
393401; X86-LABEL: freeze_two_buildvectors_only_one_frozen:
394402; X86: # %bb.0:
403+ ; X86-NEXT: pushl %esi
395404; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
396405; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
397406; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
398- ; X86-NEXT: movl (%edx), %edx
399- ; X86-NEXT: andl $15, %edx
400- ; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
401- ; X86-NEXT: vmovd %edx, %xmm1
402- ; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
403- ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
404- ; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [7,7,7,7]
405- ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
407+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
408+ ; X86-NEXT: movl (%esi), %esi
409+ ; X86-NEXT: andl $15, %esi
410+ ; X86-NEXT: vmovd %esi, %xmm0
411+ ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
412+ ; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1
413+ ; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
414+ ; X86-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
415+ ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
416+ ; X86-NEXT: vbroadcastss (%edx), %xmm2
406417; X86-NEXT: vmovdqa %xmm0, (%ecx)
407- ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
408- ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
418+ ; X86-NEXT: vpand %xmm1, %xmm2, %xmm0
409419; X86-NEXT: vmovdqa %xmm0, (%eax)
420+ ; X86-NEXT: popl %esi
410421; X86-NEXT: retl
411422;
412423; X64-LABEL: freeze_two_buildvectors_only_one_frozen:
413424; X64: # %bb.0:
414- ; X64-NEXT: movl (%rdi), %eax
415- ; X64-NEXT: andl $15, %eax
416- ; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
417- ; X64-NEXT: vmovd %eax, %xmm1
418- ; X64-NEXT: vpbroadcastd %xmm1, %xmm1
419- ; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
420- ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
421- ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
422- ; X64-NEXT: vmovdqa %xmm0, (%rdx)
423- ; X64-NEXT: vpand %xmm2, %xmm1, %xmm0
424- ; X64-NEXT: vmovdqa %xmm0, (%rcx)
425+ ; X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
426+ ; X64-NEXT: vbroadcastss %xmm0, %xmm0
427+ ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
428+ ; X64-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3]
429+ ; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [7,7,7,7]
430+ ; X64-NEXT: vandps %xmm1, %xmm0, %xmm0
431+ ; X64-NEXT: vbroadcastss (%rsi), %xmm2
432+ ; X64-NEXT: vmovaps %xmm0, (%rdx)
433+ ; X64-NEXT: vandps %xmm1, %xmm2, %xmm0
434+ ; X64-NEXT: vmovaps %xmm0, (%rcx)
425435; X64-NEXT: retq
426436 %i0.src = load i32 , ptr %origin0
427437 %i0 = and i32 %i0.src , 15
428438 %i1.src = load i32 , ptr %origin1
429- %i1 = and i32 %i0 .src , 15
439+ %i1 = and i32 %i1 .src , 15
430440 %i2 = insertelement <4 x i32 > poison, i32 %i0 , i64 1
431441 %i3 = and <4 x i32 > %i2 , <i32 7 , i32 7 , i32 7 , i32 7 >
432442 %i4 = freeze <4 x i32 > %i3
@@ -440,34 +450,40 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
440450define void @freeze_two_buildvectors_one_undef_elt (ptr %origin0 , ptr %origin1 , ptr %dst0 , ptr %dst1 ) nounwind {
441451; X86-LABEL: freeze_two_buildvectors_one_undef_elt:
442452; X86: # %bb.0:
453+ ; X86-NEXT: pushl %esi
443454; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
444455; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
445456; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
446- ; X86-NEXT: movl (%edx), %edx
447- ; X86-NEXT: andl $15, %edx
448- ; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,0,7,0]
449- ; X86-NEXT: # xmm0 = mem[0,0]
450- ; X86-NEXT: vmovd %edx, %xmm1
451- ; X86-NEXT: vpand %xmm0, %xmm1, %xmm2
452- ; X86-NEXT: vmovdqa %xmm2, (%ecx)
453- ; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
454- ; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
457+ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
458+ ; X86-NEXT: movl (%esi), %esi
459+ ; X86-NEXT: andl $15, %esi
460+ ; X86-NEXT: vmovd %esi, %xmm0
461+ ; X86-NEXT: vmovddup {{.*#+}} xmm1 = [7,0,7,0]
462+ ; X86-NEXT: # xmm1 = mem[0,0]
463+ ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
464+ ; X86-NEXT: vmovddup {{.*#+}} xmm2 = mem[0,0]
465+ ; X86-NEXT: vmovdqa %xmm0, (%ecx)
466+ ; X86-NEXT: vpand %xmm1, %xmm2, %xmm0
455467; X86-NEXT: vmovdqa %xmm0, (%eax)
468+ ; X86-NEXT: popl %esi
456469; X86-NEXT: retl
457470;
458471; X64-LABEL: freeze_two_buildvectors_one_undef_elt:
459472; X64: # %bb.0:
460473; X64-NEXT: movq (%rdi), %rax
474+ ; X64-NEXT: andl $15, %eax
461475; X64-NEXT: vmovd %eax, %xmm0
462- ; X64-NEXT: vpbroadcastd %xmm0, %xmm0
463- ; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
476+ ; X64-NEXT: vpmovsxbq {{.*#+}} xmm1 = [7,7]
477+ ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
478+ ; X64-NEXT: vpbroadcastq (%rsi), %xmm2
464479; X64-NEXT: vmovdqa %xmm0, (%rdx)
480+ ; X64-NEXT: vpand %xmm1, %xmm2, %xmm0
465481; X64-NEXT: vmovdqa %xmm0, (%rcx)
466482; X64-NEXT: retq
467483 %i0.src = load i64 , ptr %origin0
468484 %i0 = and i64 %i0.src , 15
469485 %i1.src = load i64 , ptr %origin1
470- %i1 = and i64 %i0 .src , 15
486+ %i1 = and i64 %i1 .src , 15
471487 %i2 = insertelement <2 x i64 > poison, i64 %i0 , i64 0
472488 %i3 = and <2 x i64 > %i2 , <i64 7 , i64 7 >
473489 %i4 = freeze <2 x i64 > %i3
0 commit comments