11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
33; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
4+ ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
45
56define i32 @combine_f32 (float %x ) nounwind {
67; X86-LABEL: combine_f32:
@@ -12,6 +13,11 @@ define i32 @combine_f32(float %x) nounwind {
1213; X64: # %bb.0: # %entry
1314; X64-NEXT: cvtss2si %xmm0, %eax
1415; X64-NEXT: retq
16+ ;
17+ ; AVX-LABEL: combine_f32:
18+ ; AVX: # %bb.0: # %entry
19+ ; AVX-NEXT: vcvtss2si %xmm0, %eax
20+ ; AVX-NEXT: retq
1521entry:
1622 %0 = tail call float @llvm.rint.f32 (float %x )
1723 %1 = fptosi float %0 to i32
@@ -28,6 +34,11 @@ define i32 @combine_f64(double %x) nounwind {
2834; X64: # %bb.0: # %entry
2935; X64-NEXT: cvtsd2si %xmm0, %eax
3036; X64-NEXT: retq
37+ ;
38+ ; AVX-LABEL: combine_f64:
39+ ; AVX: # %bb.0: # %entry
40+ ; AVX-NEXT: vcvtsd2si %xmm0, %eax
41+ ; AVX-NEXT: retq
3142entry:
3243 %0 = tail call double @llvm.rint.f32 (double %x )
3344 %1 = fptosi double %0 to i32
@@ -44,8 +55,98 @@ define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
4455; X64: # %bb.0: # %entry
4556; X64-NEXT: cvtps2dq %xmm0, %xmm0
4657; X64-NEXT: retq
58+ ;
59+ ; AVX-LABEL: combine_v4f32:
60+ ; AVX: # %bb.0: # %entry
61+ ; AVX-NEXT: vcvtps2dq %xmm0, %xmm0
62+ ; AVX-NEXT: retq
4763entry:
4864 %0 = tail call <4 x float > @llvm.rint.v4f32 (<4 x float > %x )
4965 %1 = fptosi <4 x float > %0 to <4 x i32 >
5066 ret <4 x i32 > %1
5167}
68+
69+ define <2 x i32 > @combine_v2f64 (<2 x double > %x ) nounwind {
70+ ; X86-LABEL: combine_v2f64:
71+ ; X86: # %bb.0: # %entry
72+ ; X86-NEXT: cvtsd2si %xmm0, %eax
73+ ; X86-NEXT: movd %eax, %xmm1
74+ ; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
75+ ; X86-NEXT: cvtsd2si %xmm0, %eax
76+ ; X86-NEXT: movd %eax, %xmm0
77+ ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
78+ ; X86-NEXT: movdqa %xmm1, %xmm0
79+ ; X86-NEXT: retl
80+ ;
81+ ; X64-LABEL: combine_v2f64:
82+ ; X64: # %bb.0: # %entry
83+ ; X64-NEXT: cvtsd2si %xmm0, %eax
84+ ; X64-NEXT: movd %eax, %xmm1
85+ ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
86+ ; X64-NEXT: cvtsd2si %xmm0, %eax
87+ ; X64-NEXT: movd %eax, %xmm0
88+ ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
89+ ; X64-NEXT: movdqa %xmm1, %xmm0
90+ ; X64-NEXT: retq
91+ ;
92+ ; AVX-LABEL: combine_v2f64:
93+ ; AVX: # %bb.0: # %entry
94+ ; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0]
95+ ; AVX-NEXT: vcvtsd2si %xmm1, %eax
96+ ; AVX-NEXT: vcvtsd2si %xmm0, %ecx
97+ ; AVX-NEXT: vmovd %ecx, %xmm0
98+ ; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
99+ ; AVX-NEXT: retq
100+ entry:
101+ %0 = tail call <2 x double > @llvm.rint.v2f64 (<2 x double > %x )
102+ %1 = fptosi <2 x double > %0 to <2 x i32 >
103+ ret <2 x i32 > %1
104+ }
105+
106+ define <4 x i32 > @combine_v4f64 (<4 x double > %x ) nounwind {
107+ ; X86-LABEL: combine_v4f64:
108+ ; X86: # %bb.0: # %entry
109+ ; X86-NEXT: cvtsd2si %xmm1, %eax
110+ ; X86-NEXT: movd %eax, %xmm2
111+ ; X86-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
112+ ; X86-NEXT: cvtsd2si %xmm1, %eax
113+ ; X86-NEXT: movd %eax, %xmm1
114+ ; X86-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
115+ ; X86-NEXT: cvtsd2si %xmm0, %eax
116+ ; X86-NEXT: movd %eax, %xmm1
117+ ; X86-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
118+ ; X86-NEXT: cvtsd2si %xmm0, %eax
119+ ; X86-NEXT: movd %eax, %xmm0
120+ ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
121+ ; X86-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
122+ ; X86-NEXT: movdqa %xmm1, %xmm0
123+ ; X86-NEXT: retl
124+ ;
125+ ; X64-LABEL: combine_v4f64:
126+ ; X64: # %bb.0: # %entry
127+ ; X64-NEXT: cvtsd2si %xmm1, %eax
128+ ; X64-NEXT: movd %eax, %xmm2
129+ ; X64-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
130+ ; X64-NEXT: cvtsd2si %xmm1, %eax
131+ ; X64-NEXT: movd %eax, %xmm1
132+ ; X64-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
133+ ; X64-NEXT: cvtsd2si %xmm0, %eax
134+ ; X64-NEXT: movd %eax, %xmm1
135+ ; X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
136+ ; X64-NEXT: cvtsd2si %xmm0, %eax
137+ ; X64-NEXT: movd %eax, %xmm0
138+ ; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
139+ ; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
140+ ; X64-NEXT: movdqa %xmm1, %xmm0
141+ ; X64-NEXT: retq
142+ ;
143+ ; AVX-LABEL: combine_v4f64:
144+ ; AVX: # %bb.0: # %entry
145+ ; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0
146+ ; AVX-NEXT: vzeroupper
147+ ; AVX-NEXT: retq
148+ entry:
149+ %0 = tail call <4 x double > @llvm.rint.v4f64 (<4 x double > %x )
150+ %1 = fptosi <4 x double > %0 to <4 x i32 >
151+ ret <4 x i32 > %1
152+ }
0 commit comments