diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 9d8c123185a7c..f8e1553d07039 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7777,6 +7777,12 @@ def : Pat<(v2f64 (X86Movsd (VCVTSS2SDZrr_Int VR128X:$dst, VR128X:$src)>, Requires<[HasAVX512]>; +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128X:$dst), + (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))), + (VCVTSS2SDZrm_Int VR128:$dst, ssmem:$src)>, + Requires<[HasAVX512]>; + //===----------------------------------------------------------------------===// // AVX-512 Vector convert from signed/unsigned integer to float/double // and from float/double to signed/unsigned integer diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6aadb788c851e..71ca383d59767 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1425,6 +1425,11 @@ def : Pat<(v2f64 (X86Movsd (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (VCVTSS2SDrr_Int VR128:$dst, VR128:$src)>; +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))), + (VCVTSS2SDrm_Int VR128:$dst, ssmem:$src)>; + def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector (f32 (any_sint_to_fp GR64:$src)))))), @@ -1479,6 +1484,11 @@ def : Pat<(v2f64 (X86Movsd (f64 (any_fpextend (f32 (extractelt VR128:$src, (iPTR 0))))))))), (CVTSS2SDrr_Int VR128:$dst, VR128:$src)>; +def : Pat<(v2f64 (X86Movsd + (v2f64 VR128:$dst), + (v2f64 (scalar_to_vector (f64 (any_fpextend (loadf32 addr:$src))))))), + (CVTSS2SDrm_Int VR128:$dst, ssmem:$src)>; + def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector (f64 (any_sint_to_fp GR64:$src)))))), diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index f6b0df153c260..0f4b1d915e78f 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -711,58 +711,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, ptr %p1) { ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load: ; X86-SSE: ## %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x08] -; X86-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] -; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] -; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x08] -; X86-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] -; X86-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X86-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: ; X86-AVX512: ## %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08] -; X86-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] -; X86-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X86-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load: ; X64-SSE: ## %bb.0: -; X64-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-SSE-NEXT: ## encoding: [0xf3,0x0f,0x10,0x0f] -; X64-SSE-NEXT: cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9] -; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] -; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load: ; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-AVX1-NEXT: ## encoding: [0xc5,0xfa,0x10,0x0f] -; X64-AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9] -; X64-AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X64-AVX1-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07] ; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load: ; X64-AVX512: ## %bb.0: -; X64-AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X64-AVX512-NEXT: ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f] -; X64-AVX512-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9] -; X64-AVX512-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] -; X64-AVX512-NEXT: ## xmm0 = xmm1[0,1],xmm0[2,3] +; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07] ; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a1 = load <4 x float>, ptr %p1 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] @@ -774,46 +750,34 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr % ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X86-SSE: ## %bb.0: ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-SSE-NEXT: cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08] -; X86-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] -; X86-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X86-SSE-NEXT: cvtss2sd (%eax), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x00] ; X86-SSE-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X86-AVX1: ## %bb.0: ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08] -; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] -; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x00] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X86-AVX512: ## %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08] -; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] -; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x00] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X64-SSE: ## %bb.0: -; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f] -; X64-SSE-NEXT: movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1] -; X64-SSE-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X64-SSE-NEXT: cvtss2sd (%rdi), %xmm0 ## encoding: [0xf3,0x0f,0x5a,0x07] ; X64-SSE-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X64-AVX1: ## %bb.0: -; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f] -; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1] -; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0x07] ; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize: ; X64-AVX512: ## %bb.0: -; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f] -; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] -; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1] +; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0x07] ; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a1 = load <4 x float>, ptr %p1 %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]