Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions llvm/lib/Target/X86/X86FixupInstTuning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,41 @@ bool X86FixupInstTuningPass::processInstruction(
auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
return ProcessUNPCKToIntDomain(NewOpc);
};

auto ProcessBLENDToMOV = [&](unsigned MovOpc) -> bool {
if (!MI.getOperand(NumOperands - 1).isImm() ||
MI.getOperand(NumOperands - 1).getImm() != 1)
return false;

bool Force = MF.getFunction().hasOptSize();

if (!Force && !NewOpcPreferable(MovOpc))
return false;
MI.setDesc(TII->get(MovOpc));
MI.removeOperand(NumOperands - 1);
return true;
};
switch (Opc) {
case X86::VBLENDPSrri:
case X86::VBLENDPSYrri:
case X86::VBLENDMPSZ128rrkz:
case X86::VBLENDMPSZ256rrkz:
case X86::VBLENDMPSZrrkz: {
int Imm = MI.getOperand(NumOperands - 1).getImm();
if (Imm != 1)
return false;
return ProcessBLENDToMOV(X86::VMOVSSrr);
}
case X86::VBLENDPDrri:
case X86::VBLENDPDYrri:
case X86::VBLENDMPDZ128rrkz:
case X86::VBLENDMPDZ256rrkz:
case X86::VBLENDMPDZrrkz: {
int Imm = MI.getOperand(NumOperands - 1).getImm();
if (Imm != 1)
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop this and just return ProcessBLENDToMOV

return ProcessBLENDToMOV(X86::VMOVSDrr);
}
case X86::VPERMILPDri:
return ProcessVPERMILPDri(X86::VSHUFPDrri);
case X86::VPERMILPDYri:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define void @endless_loop() {
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vmovaps %ymm0, (%eax)
; AVX1-NEXT: vmovaps %ymm1, (%eax)
; AVX1-NEXT: vzeroupper
Expand All @@ -21,7 +21,7 @@ define void @endless_loop() {
; AVX2: # %bb.0: # %entry
; AVX2-NEXT: vbroadcastss (%eax), %xmm0
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/avx-insertelt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define <8 x float> @insert_f32_firstelt_of_low_subvector(<8 x float> %x, float %
; ALL-LABEL: insert_f32_firstelt_of_low_subvector:
; ALL: # %bb.0:
; ALL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
; ALL-NEXT: vmovss {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
; ALL-NEXT: retq
%i0 = insertelement <8 x float> %x, float %s, i32 0
ret <8 x float> %i0
Expand Down Expand Up @@ -94,7 +94,7 @@ define <8 x float> @insert_f32_firstelt_of_high_subvector(<8 x float> %x, float
; AVX-LABEL: insert_f32_firstelt_of_high_subvector:
; AVX: # %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX-NEXT: retq
;
Expand Down Expand Up @@ -202,9 +202,9 @@ define <4 x i64> @insert_i64_firstelt_of_high_subvector(<4 x i64> %x, i64 %s) {
define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) {
; AVX-LABEL: insert_f32_firstelts:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX-NEXT: retq
;
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1843,7 +1843,7 @@ define <2 x double> @test_mm_cvtu64_sd(<2 x double> %__A, i64 %__B) {
; X86-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1
; X86-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; X86-NEXT: vaddsd %xmm1, %xmm2, %xmm1
; X86-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X86-NEXT: retl
;
; X64-LABEL: test_mm_cvtu64_sd:
Expand Down Expand Up @@ -1891,7 +1891,7 @@ define <4 x float> @test_mm_cvtu64_ss(<4 x float> %__A, i64 %__B) {
; X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
; X86-NEXT: fstps {{[0-9]+}}(%esp)
; X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X86-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X86-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: .cfi_def_cfa %esp, 4
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10483,7 +10483,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
; CHECK: ## %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
; CHECK-NEXT: vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
; CHECK-NEXT: vmovss %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x10,0xc1]
; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
%q = load float, ptr %ptr_b
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6505,7 +6505,7 @@ define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x
; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
; CHECK: # %bb.0:
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; CHECK-NEXT: ret{{[l|q]}}
%q = load float, ptr %ptr_b
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
; NOAVX512MOVZXC: # %bb.0:
; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
; NOAVX512MOVZXC-NEXT: vmovss %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf2,0x10,0xc0]
; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
%res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/build-vector-512.ll
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,7 @@ define <16 x float> @test_buildvector_16f32_2_var(float %a0, float %a1) {
; AVX-32-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,17,0,0]
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpermi2ps %zmm0, %zmm2, %zmm1
; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm0[1,2,3]
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm2[0]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
Expand Down Expand Up @@ -626,7 +626,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
; AVX-32-NEXT: vbroadcastss (%ecx), %xmm1
; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-32-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
; AVX-32-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-32-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-32-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-32-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
Expand All @@ -640,7 +640,7 @@ define <16 x float> @test_buildvector_16f32_2_load(ptr %p0, ptr %p1) {
; AVX-64-NEXT: vbroadcastss (%rdi), %xmm1
; AVX-64-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; AVX-64-NEXT: vpermi2ps %zmm1, %zmm2, %zmm0
; AVX-64-NEXT: vblendps {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = xmm2[0],xmm1[1,2,3]
; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm1[0,1,2],xmm2[0]
; AVX-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/X86/buildvec-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX-LABEL: extract0_i32_zext_insert0_i64_zero:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
%z = zext i32 %e to i64
Expand Down Expand Up @@ -85,7 +85,7 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 1
%z = zext i32 %e to i64
Expand Down Expand Up @@ -130,7 +130,7 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 2
%z = zext i32 %e to i64
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ define void @v_test_canonicalize__half(half addrspace(1)* %out) nounwind {
; AVX512-NEXT: vcvtph2ps %xmm1, %xmm1
; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512-NEXT: retq
Expand Down Expand Up @@ -149,7 +149,7 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
Expand Down Expand Up @@ -235,12 +235,12 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
; AVX512-NEXT: vcvtph2ps %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm2
; AVX512-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; AVX512-NEXT: vmovd %xmm0, (%rdi)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/coalesce_commute_movsd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ define <4 x float> @insert_f32(float %a0, <4 x float> %a1) {
;
; AVX-LABEL: insert_f32:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
;
; AVX512-LABEL: insert_f32:
; AVX512: # %bb.0:
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%1 = insertelement <4 x float> %a1, float %a0, i32 0
ret <4 x float> %1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/X86/combine-and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define <4 x i32> @test1(<4 x i32> %A) {
; AVX-LABEL: test1:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
%1 = and <4 x i32> %A, <i32 -1, i32 0, i32 0, i32 0>
ret <4 x i32> %1
Expand Down Expand Up @@ -195,7 +195,7 @@ define <4 x i32> @test11(<4 x i32> %A) {
; AVX-LABEL: test11:
; AVX: # %bb.0:
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: retq
%1 = and <4 x i32> %A, <i32 0, i32 -1, i32 -1, i32 -1>
ret <4 x i32> %1
Expand Down
80 changes: 60 additions & 20 deletions llvm/test/CodeGen/X86/combine-or-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,20 @@ define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE4-NEXT: retq
;
; AVX-LABEL: test4:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
; AVX1-LABEL: test4:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: test4:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: retq
;
; AVX512-LABEL: test4:
; AVX512: # %bb.0:
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%or = or <4 x i32> %shuf1, %shuf2
Expand All @@ -108,10 +118,20 @@ define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE4-NEXT: retq
;
; AVX-LABEL: test5:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: retq
; AVX1-LABEL: test5:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: test5:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2-NEXT: retq
;
; AVX512-LABEL: test5:
; AVX512: # %bb.0:
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX512-NEXT: retq
%shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3>
%shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4>
%or = or <4 x i32> %shuf1, %shuf2
Expand Down Expand Up @@ -241,10 +261,20 @@ define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; SSE4-NEXT: retq
;
; AVX-LABEL: test11:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: retq
; AVX1-LABEL: test11:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: test11:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX2-NEXT: retq
;
; AVX512-LABEL: test11:
; AVX512: # %bb.0:
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: retq
%and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
%and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1>
%or = or <4 x i32> %and1, %and2
Expand All @@ -263,10 +293,20 @@ define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) {
; SSE4-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; SSE4-NEXT: retq
;
; AVX-LABEL: test12:
; AVX: # %bb.0:
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX-NEXT: retq
; AVX1-LABEL: test12:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX1-NEXT: retq
;
; AVX2-LABEL: test12:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX2-NEXT: retq
;
; AVX512-LABEL: test12:
; AVX512: # %bb.0:
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; AVX512-NEXT: retq
%and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1>
%and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0>
%or = or <4 x i32> %and1, %and2
Expand Down Expand Up @@ -395,18 +435,18 @@ define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) {
; AVX1-LABEL: test18:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test18:
; AVX2: # %bb.0:
; AVX2-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX2-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,0,1,1]
; AVX2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX2-NEXT: vmovss {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3]
; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1343,7 +1343,7 @@ define <2 x double> @test_fminimumnum_vector_nan(<2 x double> %x) {
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; AVX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm1[0],mem[0]
; AVX-NEXT: vminpd %xmm0, %xmm2, %xmm0
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; AVX-NEXT: retq
;
; AVX10_2-LABEL: test_fminimumnum_vector_nan:
Expand Down
Loading
Loading