Skip to content

Commit 0e5d6af

Browse files
committed
[CRT:MATH] Implement x86 sse2 math functions
These are just wrappers around the normal functions and lack any optimization.
1 parent 5c6912f commit 0e5d6af

File tree

3 files changed

+300
-32
lines changed

3 files changed

+300
-32
lines changed

dll/win32/ucrtbase/ucrtbase.spec

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -133,27 +133,27 @@
133133
@ cdecl __iswcsym(long)
134134
@ cdecl __iswcsymf(long)
135135
@ stdcall -arch=arm __jump_unwind(ptr ptr) ntdll.__jump_unwind
136-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acos()
137-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_acosf()
138-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asin()
139-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_asinf()
140-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan()
141-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atan2()
142-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_atanf()
143-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cos()
144-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_cosf()
145-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_exp()
146-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_expf()
147-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log()
148-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10()
149-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_log10f()
150-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_logf()
151-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_pow()
152-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_powf()
153-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sin()
154-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_sinf()
155-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tan()
156-
@ cdecl -stub -arch=i386 -norelay __libm_sse2_tanf()
136+
@ cdecl -arch=i386 -norelay __libm_sse2_acos()
137+
@ cdecl -arch=i386 -norelay __libm_sse2_acosf()
138+
@ cdecl -arch=i386 -norelay __libm_sse2_asin()
139+
@ cdecl -arch=i386 -norelay __libm_sse2_asinf()
140+
@ cdecl -arch=i386 -norelay __libm_sse2_atan()
141+
@ cdecl -arch=i386 -norelay __libm_sse2_atan2()
142+
@ cdecl -arch=i386 -norelay __libm_sse2_atanf()
143+
@ cdecl -arch=i386 -norelay __libm_sse2_cos()
144+
@ cdecl -arch=i386 -norelay __libm_sse2_cosf()
145+
@ cdecl -arch=i386 -norelay __libm_sse2_exp()
146+
@ cdecl -arch=i386 -norelay __libm_sse2_expf()
147+
@ cdecl -arch=i386 -norelay __libm_sse2_log()
148+
@ cdecl -arch=i386 -norelay __libm_sse2_log10()
149+
@ cdecl -arch=i386 -norelay __libm_sse2_log10f()
150+
@ cdecl -arch=i386 -norelay __libm_sse2_logf()
151+
@ cdecl -arch=i386 -norelay __libm_sse2_pow()
152+
@ cdecl -arch=i386 -norelay __libm_sse2_powf()
153+
@ cdecl -arch=i386 -norelay __libm_sse2_sin()
154+
@ cdecl -arch=i386 -norelay __libm_sse2_sinf()
155+
@ cdecl -arch=i386 -norelay __libm_sse2_tan()
156+
@ cdecl -arch=i386 -norelay __libm_sse2_tanf()
157157
@ cdecl __p___argc()
158158
@ cdecl __p___argv()
159159
@ cdecl __p___wargv()
@@ -602,17 +602,17 @@
602602
@ cdecl _ldunscale(ptr ptr) _dunscale
603603
@ cdecl _lfind(ptr ptr ptr long ptr)
604604
@ cdecl _lfind_s(ptr ptr ptr long ptr ptr)
605-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_acos_precise() #__libm_sse2_acos
606-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_asin_precise() #__libm_sse2_asin
607-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_atan_precise() #__libm_sse2_atan
608-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_cos_precise() #__libm_sse2_cos
609-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_exp_precise() #__libm_sse2_exp
610-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log10_precise() #__libm_sse2_log10
611-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_log_precise() #__libm_sse2_log
612-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_pow_precise() #__libm_sse2_pow
613-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sin_precise() #__libm_sse2_sin
614-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_sqrt_precise() #__libm_sse2_sqrt
615-
@ cdecl -stub -arch=i386 -norelay _libm_sse2_tan_precise() #__libm_sse2_tan
605+
@ cdecl -arch=i386 -norelay _libm_sse2_acos_precise()
606+
@ cdecl -arch=i386 -norelay _libm_sse2_asin_precise()
607+
@ cdecl -arch=i386 -norelay _libm_sse2_atan_precise()
608+
@ cdecl -arch=i386 -norelay _libm_sse2_cos_precise()
609+
@ cdecl -arch=i386 -norelay _libm_sse2_exp_precise()
610+
@ cdecl -arch=i386 -norelay _libm_sse2_log10_precise()
611+
@ cdecl -arch=i386 -norelay _libm_sse2_log_precise()
612+
@ cdecl -arch=i386 -norelay _libm_sse2_pow_precise()
613+
@ cdecl -arch=i386 -norelay _libm_sse2_sin_precise()
614+
@ cdecl -arch=i386 -norelay _libm_sse2_sqrt_precise()
615+
@ cdecl -arch=i386 -norelay _libm_sse2_tan_precise()
616616
@ cdecl _loaddll(str)
617617
@ cdecl -arch=win64 _local_unwind(ptr ptr) ntdll._local_unwind
618618
@ cdecl -arch=i386 _local_unwind2(ptr long)

sdk/lib/crt/math/i386/libm_sse2.c

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
/*
2+
* PROJECT: ReactOS CRT
3+
* LICENSE: MIT (https://spdx.org/licenses/MIT)
4+
* PURPOSE: Simplified implementation of __libm_sse2_*
5+
* COPYRIGHT: Copyright 2025 Timo Kreuzer <[email protected]>
6+
*/
7+
8+
#include <emmintrin.h>
9+
#include <math.h>
10+
11+
#if defined(_MSC_VER) && !defined(__clang__)
12+
#pragma function(acos,asin,atan,atan2,cos)
13+
#pragma function(exp,log,log10,pow,sin,tan)
14+
#define __ATTRIBUTE_SSE2__
15+
#else
16+
#define __ATTRIBUTE_SSE2__ __attribute__((__target__("sse2")))
17+
#endif
18+
19+
#ifdef __GNUC__
20+
#pragma GCC diagnostic ignored "-Wuninitialized"
21+
#endif
22+
23+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_acos(__m128d Xmm0)
24+
{
25+
double x = _mm_cvtsd_f64(Xmm0);
26+
double result = acos(x);
27+
return _mm_set_sd(result);
28+
}
29+
30+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_acosf(__m128 Xmm0)
31+
{
32+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
33+
double x = _mm_cvtsd_f64(Xmm0d);
34+
double result = acos(x);
35+
__m128d result128 = _mm_set_sd(result);
36+
return _mm_cvtpd_ps(result128);
37+
}
38+
39+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_asin(__m128d Xmm0)
40+
{
41+
double x = _mm_cvtsd_f64(Xmm0);
42+
double result = asin(x);
43+
return _mm_set_sd(result);
44+
}
45+
46+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_asinf(__m128 Xmm0)
47+
{
48+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
49+
double x = _mm_cvtsd_f64(Xmm0d);
50+
double result = asin(x);
51+
__m128d result128 = _mm_set_sd(result);
52+
return _mm_cvtpd_ps(result128);
53+
}
54+
55+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan(__m128d Xmm0)
56+
{
57+
double x = _mm_cvtsd_f64(Xmm0);
58+
double result = atan(x);
59+
return _mm_set_sd(result);
60+
}
61+
62+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_atanf(__m128 Xmm0)
63+
{
64+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
65+
double x = _mm_cvtsd_f64(Xmm0d);
66+
double result = atan(x);
67+
__m128d result128 = _mm_set_sd(result);
68+
return _mm_cvtpd_ps(result128);
69+
}
70+
71+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_atan2(__m128d Xmm0, __m128d Xmm1)
72+
{
73+
double x = _mm_cvtsd_f64(Xmm0);
74+
double y = _mm_cvtsd_f64(Xmm1);
75+
double result = atan2(x, y);
76+
return _mm_set_sd(result);
77+
}
78+
79+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_cos(__m128d Xmm0)
80+
{
81+
double x = _mm_cvtsd_f64(Xmm0);
82+
double result = cos(x);
83+
return _mm_set_sd(result);
84+
}
85+
86+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_cosf(__m128 Xmm0)
87+
{
88+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
89+
double x = _mm_cvtsd_f64(Xmm0d);
90+
double result = cos(x);
91+
__m128d result128 = _mm_set_sd(result);
92+
return _mm_cvtpd_ps(result128);
93+
}
94+
95+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_exp(__m128d Xmm0)
96+
{
97+
double x = _mm_cvtsd_f64(Xmm0);
98+
double result = exp(x);
99+
return _mm_set_sd(result);
100+
}
101+
102+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_expf(__m128 Xmm0)
103+
{
104+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
105+
double x = _mm_cvtsd_f64(Xmm0d);
106+
double result = exp(x);
107+
__m128d result128 = _mm_set_sd(result);
108+
return _mm_cvtpd_ps(result128);
109+
}
110+
111+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log(__m128d Xmm0)
112+
{
113+
double x = _mm_cvtsd_f64(Xmm0);
114+
double result = log(x);
115+
return _mm_set_sd(result);
116+
}
117+
118+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_logf(__m128 Xmm0)
119+
{
120+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
121+
double x = _mm_cvtsd_f64(Xmm0d);
122+
double result = log(x);
123+
__m128d result128 = _mm_set_sd(result);
124+
return _mm_cvtpd_ps(result128);
125+
}
126+
127+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_log10(__m128d Xmm0)
128+
{
129+
double x = _mm_cvtsd_f64(Xmm0);
130+
double result = log10(x);
131+
return _mm_set_sd(result);
132+
}
133+
134+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_log10f(__m128 Xmm0)
135+
{
136+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
137+
double x = _mm_cvtsd_f64(Xmm0d);
138+
double result = log10(x);
139+
__m128d result128 = _mm_set_sd(result);
140+
return _mm_cvtpd_ps(result128);
141+
}
142+
143+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_pow(__m128d Xmm0, __m128d Xmm1)
144+
{
145+
double x = _mm_cvtsd_f64(Xmm0);
146+
double y = _mm_cvtsd_f64(Xmm1);
147+
double result = pow(x, y);
148+
return _mm_set_sd(result);
149+
}
150+
151+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_powf(__m128 Xmm0, __m128 Xmm1)
152+
{
153+
float x = _mm_cvtss_f32(Xmm0);
154+
float y = _mm_cvtss_f32(Xmm1);
155+
float result = powf(x, y);
156+
return _mm_set_ss(result);
157+
}
158+
159+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_sin(__m128d Xmm0)
160+
{
161+
double x = _mm_cvtsd_f64(Xmm0);
162+
double result = sin(x);
163+
return _mm_set_sd(result);
164+
}
165+
166+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_sinf(__m128 Xmm0)
167+
{
168+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
169+
double x = _mm_cvtsd_f64(Xmm0d);
170+
double result = sin(x);
171+
__m128d result128 = _mm_set_sd(result);
172+
return _mm_cvtpd_ps(result128);
173+
}
174+
175+
__ATTRIBUTE_SSE2__ __m128d __libm_sse2_tan(__m128d Xmm0)
176+
{
177+
double x = _mm_cvtsd_f64(Xmm0);
178+
double result = tan(x);
179+
return _mm_set_sd(result);
180+
}
181+
182+
__ATTRIBUTE_SSE2__ __m128 __libm_sse2_tanf(__m128 Xmm0)
183+
{
184+
__m128d Xmm0d = _mm_cvtss_sd(Xmm0d, Xmm0);
185+
double x = _mm_cvtsd_f64(Xmm0d);
186+
double result = tan(x);
187+
__m128d result128 = _mm_set_sd(result);
188+
return _mm_cvtpd_ps(result128);
189+
}
190+
191+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_acos_precise(__m128d Xmm0)
192+
{
193+
double x = _mm_cvtsd_f64(Xmm0);
194+
double result = acos(x);
195+
return _mm_set_sd(result);
196+
}
197+
198+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_asin_precise(__m128d Xmm0)
199+
{
200+
double x = _mm_cvtsd_f64(Xmm0);
201+
double result = asin(x);
202+
return _mm_set_sd(result);
203+
}
204+
205+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_atan_precise(__m128d Xmm0)
206+
{
207+
double x = _mm_cvtsd_f64(Xmm0);
208+
double result = atan(x);
209+
return _mm_set_sd(result);
210+
}
211+
212+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_cos_precise(__m128d Xmm0)
213+
{
214+
double x = _mm_cvtsd_f64(Xmm0);
215+
double result = cos(x);
216+
return _mm_set_sd(result);
217+
}
218+
219+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_exp_precise(__m128d Xmm0)
220+
{
221+
double x = _mm_cvtsd_f64(Xmm0);
222+
double result = exp(x);
223+
return _mm_set_sd(result);
224+
}
225+
226+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log_precise(__m128d Xmm0)
227+
{
228+
double x = _mm_cvtsd_f64(Xmm0);
229+
double result = log(x);
230+
return _mm_set_sd(result);
231+
}
232+
233+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_log10_precise(__m128d Xmm0)
234+
{
235+
double x = _mm_cvtsd_f64(Xmm0);
236+
double result = log10(x);
237+
return _mm_set_sd(result);
238+
}
239+
240+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_pow_precise(__m128d Xmm0, __m128d Xmm1)
241+
{
242+
double x = _mm_cvtsd_f64(Xmm0);
243+
double y = _mm_cvtsd_f64(Xmm1);
244+
double result = pow(x, y);
245+
return _mm_set_sd(result);
246+
}
247+
248+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sin_precise(__m128d Xmm0)
249+
{
250+
double x = _mm_cvtsd_f64(Xmm0);
251+
double result = sin(x);
252+
return _mm_set_sd(result);
253+
}
254+
255+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_sqrt_precise(__m128d Xmm0)
256+
{
257+
double x = _mm_cvtsd_f64(Xmm0);
258+
double result = sqrt(x);
259+
return _mm_set_sd(result);
260+
}
261+
262+
__ATTRIBUTE_SSE2__ __m128d _libm_sse2_tan_precise(__m128d Xmm0)
263+
{
264+
double x = _mm_cvtsd_f64(Xmm0);
265+
double result = tan(x);
266+
return _mm_set_sd(result);
267+
}

sdk/lib/crt/math/math.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ if(ARCH STREQUAL "i386")
2424
math/i386/cisin.c
2525
math/i386/cisqrt.c
2626
math/i386/ldexp.c
27+
math/i386/libm_sse2.c
2728
)
2829
list(APPEND LIBCNTPR_MATH_ASM_SOURCE
2930
math/i386/alldiv_asm.s

0 commit comments

Comments
 (0)