Skip to content

Commit ac9cd43

Browse files
Alcanderiancaic99
andauthored
Feature: port some math functions to abacus (#1780)
* Feature: port some libm source file to abacus: sincos, exp, cexp. and apply to ForceStress * Fix: fix stress_func_ewa, stress_func_loc `double` to `FPTYPE` * Update CMakeLists.txt Co-authored-by: Chun Cai <[email protected]> * Doc: add `Build math library from source` into install.md * Doc: add doc for libm * Refactor: use cmake to control libm compilation Co-authored-by: Chun Cai <[email protected]>
1 parent 461b51a commit ac9cd43

File tree

17 files changed

+2726
-33
lines changed

17 files changed

+2726
-33
lines changed

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ option(INFO "Enable gathering of math library information" OFF)
2323
option(ENABLE_COVERAGE "Enable coverage build." OFF)
2424
option(ENABLE_LIBRI "Enable EXX with LibRI." OFF)
2525
option(USE_ELPA "Enable ELPA" ON)
26+
option(USE_ABACUS_LIBM "Build libmath from source to speed up." ON)
2627
option(GIT_SUBMODULE "Check submodules during build" ON)
2728
# Do not enable it if generated code will run on different CPUs
2829
option(ENABLE_NATIVE_OPTIMIZATION "Enable compilation optimization for the native machine's CPU type" OFF)
@@ -92,6 +93,15 @@ if(NOT CMAKE_BUILD_TYPE)
9293
add_compile_options(-O3 -g)
9394
endif()
9495

96+
# Force turn off USE_ABACUS_LIBM on Intel Compiler
97+
if (("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel") OR
98+
("${CMAKE_CXX_COMPILER_ID}" STREQUAL "IntelLLVM"))
99+
set(USE_ABACUS_LIBM OFF)
100+
endif()
101+
if (USE_ABACUS_LIBM)
102+
add_definitions(-DUSE_ABACUS_LIBM)
103+
endif()
104+
95105
if (ENABLE_NATIVE_OPTIMIZATION)
96106
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mtune=native")
97107
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native -mtune=native")

docs/advanced/install.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,19 @@ To build NVIDIA GPU support for ABACUS, define `USE_CUDA` flag. You can also spe
7979
cmake -B build -DUSE_CUDA=1
8080
```
8181

82+
## Build math library from source
83+
84+
> Note: This flag is **enabled by default**. It will get better performance than the standard implementation on `gcc` and `clang`. But it **will be disabled** when using `Intel Compiler` since the math functions will get wrong results and the performance is also unexpectly poor.
85+
86+
To build math functions from source code, instead of using c++ standard implementation, define `USE_ABACUS_LIBM` flag.
87+
88+
Currently supported math functions:
89+
`sin`, `cos`, `sincos`, `exp`, `cexp`
90+
91+
```bash
92+
cmake -B build -DUSE_ABACUS_LIBM=1
93+
```
94+
8295
## Build ABACUS with make
8396

8497
> Note: We suggest using CMake to configure and compile.
@@ -258,4 +271,4 @@ To use new EXX, you need two libraries: LibRI and LibComm and need to define `LI
258271
```makefile
259272
make LIBRI_DIR=/public/software/LibRI LIBCOMM_DIR=/public/software/LibComm
260273
```
261-
directly.
274+
directly.

source/module_base/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,12 @@
1+
if (USE_ABACUS_LIBM)
2+
list (APPEND LIBM_SRC
3+
libm/branred.cpp
4+
libm/cexp.cpp
5+
libm/exp.cpp
6+
libm/sincos.cpp
7+
)
8+
endif()
9+
110
add_library(
211
base
312
OBJECT
@@ -34,6 +43,7 @@ add_library(
3443
tool_title.cpp
3544
ylm.cpp
3645
abfs-vector3_order.cpp
46+
${LIBM_SRC}
3747
)
3848

3949
if(ENABLE_COVERAGE)
@@ -43,4 +53,7 @@ endif()
4353
if(BUILD_TESTING)
4454
add_subdirectory(test)
4555
add_subdirectory(kernels/test)
56+
if (USE_ABACUS_LIBM)
57+
add_subdirectory(libm/test)
58+
endif()
4659
endif()

source/module_base/libm/LICENCE

Lines changed: 502 additions & 0 deletions
Large diffs are not rendered by default.

source/module_base/libm/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Math fucntions source code
2+
3+
These math functions is ported form glibc-2.36/math (https://github.com/bminor/glibc/tree/release/2.36/master/math)
4+
5+
The new version of sin/cos/exp implementation is much faster than glibc-2.27.
6+
7+
In order to avoid the performance impact of different versions of glibc installed on systems and different versions of math functions provided by compilers, we choose to migrate these source codes.
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
//==========================================================
2+
// AUTHOR : [email protected]
3+
// DATE : 2023-01-06
4+
//==========================================================
5+
6+
#include <math.h>
7+
#include <endian.h>
8+
9+
namespace ModuleBase
10+
{
11+
namespace libm
12+
{
13+
14+
typedef int int4;
15+
typedef union { unsigned int u[2]; int4 i[2]; double x; double d; } mynumber;
16+
17+
#define max(x, y) (((y) > (x)) ? (y) : (x))
18+
#define min(x, y) (((y) < (x)) ? (y) : (x))
19+
20+
#if (__BYTE_ORDER == __BIG_ENDIAN)
21+
22+
#define HIGH_HALF 0
23+
#define LOW_HALF 1
24+
25+
static const mynumber
26+
27+
/**/ t576 = {{0x63f00000, 0x00000000}}, /* 2 ^ 576 */
28+
/**/ tm600 = {{0x1a700000, 0x00000000}}, /* 2 ^- 600 */
29+
/**/ tm24 = {{0x3e700000, 0x00000000}}, /* 2 ^- 24 */
30+
/**/ big = {{0x43380000, 0x00000000}}, /* 6755399441055744 */
31+
/**/ big1 = {{0x43580000, 0x00000000}}, /* 27021597764222976 */
32+
/**/ hp0 = {{0x3FF921FB, 0x54442D18}} ,/* 1.5707963267948966 */
33+
/**/ hp1 = {{0x3C91A626, 0x33145C07}} ,/* 6.123233995736766e-17 */
34+
/**/ mp1 = {{0x3FF921FB, 0x58000000}}, /* 1.5707963407039642 */
35+
/**/ mp2 = {{0xBE4DDE97, 0x40000000}}; /*-1.3909067675399456e-08 */
36+
37+
#endif
38+
39+
#if (__BYTE_ORDER == __LITTLE_ENDIAN)
40+
41+
#define HIGH_HALF 1
42+
#define LOW_HALF 0
43+
44+
static const mynumber
45+
46+
/**/ t576 = {{0x00000000, 0x63f00000}}, /* 2 ^ 576 */
47+
/**/ tm600 = {{0x00000000, 0x1a700000}}, /* 2 ^- 600 */
48+
/**/ tm24 = {{0x00000000, 0x3e700000}}, /* 2 ^- 24 */
49+
/**/ big = {{0x00000000, 0x43380000}}, /* 6755399441055744 */
50+
/**/ big1 = {{0x00000000, 0x43580000}}, /* 27021597764222976 */
51+
/**/ hp0 = {{0x54442D18, 0x3FF921FB}}, /* 1.5707963267948966 */
52+
/**/ hp1 = {{0x33145C07, 0x3C91A626}}, /* 6.123233995736766e-17 */
53+
/**/ mp1 = {{0x58000000, 0x3FF921FB}}, /* 1.5707963407039642 */
54+
/**/ mp2 = {{0x40000000, 0xBE4DDE97}}; /*-1.3909067675399456e-08 */
55+
56+
#endif
57+
58+
static const double toverp[75] = { /* 2/ PI base 24*/
59+
10680707.0, 7228996.0, 1387004.0, 2578385.0, 16069853.0,
60+
12639074.0, 9804092.0, 4427841.0, 16666979.0, 11263675.0,
61+
12935607.0, 2387514.0, 4345298.0, 14681673.0, 3074569.0,
62+
13734428.0, 16653803.0, 1880361.0, 10960616.0, 8533493.0,
63+
3062596.0, 8710556.0, 7349940.0, 6258241.0, 3772886.0,
64+
3769171.0, 3798172.0, 8675211.0, 12450088.0, 3874808.0,
65+
9961438.0, 366607.0, 15675153.0, 9132554.0, 7151469.0,
66+
3571407.0, 2607881.0, 12013382.0, 4155038.0, 6285869.0,
67+
7677882.0, 13102053.0, 15825725.0, 473591.0, 9065106.0,
68+
15363067.0, 6271263.0, 9264392.0, 5636912.0, 4652155.0,
69+
7056368.0, 13614112.0, 10155062.0, 1944035.0, 9527646.0,
70+
15080200.0, 6658437.0, 6231200.0, 6832269.0, 16767104.0,
71+
5075751.0, 3212806.0, 1398474.0, 7579849.0, 6349435.0,
72+
12618859.0, 4703257.0, 12806093.0, 14477321.0, 2786137.0,
73+
12875403.0, 9837734.0, 14528324.0, 13719321.0, 343717.0 };
74+
75+
/* CN = 1+2**27 = '41a0000002000000' IEEE double format. Use it to split a
76+
double for better accuracy. */
77+
#define CN 134217729.0
78+
static const double split = CN; /* 2^27 + 1 */
79+
80+
/*******************************************************************/
81+
/* Routine branred() performs range reduction of a double number */
82+
/* x into Double length number a+aa,such that */
83+
/* x=n*pi/2+(a+aa), abs(a+aa)<pi/4, n=0,+-1,+-2,.... */
84+
/* Routine return integer (n mod 4) */
85+
/*******************************************************************/
86+
int
87+
__branred(double x, double *a, double *aa)
88+
{
89+
int i,k;
90+
mynumber u,gor;
91+
double r[6],s,t,sum,b,bb,sum1,sum2,b1,bb1,b2,bb2,x1,x2,t1,t2;
92+
93+
x*=tm600.x;
94+
t=x*split; /* split x to two numbers */
95+
x1=t-(t-x);
96+
x2=x-x1;
97+
sum=0;
98+
u.x = x1;
99+
k = (u.i[HIGH_HALF]>>20)&2047;
100+
k = (k-450)/24;
101+
if (k<0)
102+
k=0;
103+
gor.x = t576.x;
104+
gor.i[HIGH_HALF] -= ((k*24)<<20);
105+
for (i=0;i<6;i++)
106+
{ r[i] = x1*toverp[k+i]*gor.x; gor.x *= tm24.x; }
107+
for (i=0;i<3;i++) {
108+
s=(r[i]+big.x)-big.x;
109+
sum+=s;
110+
r[i]-=s;
111+
}
112+
t=0;
113+
for (i=0;i<6;i++)
114+
t+=r[5-i];
115+
bb=(((((r[0]-t)+r[1])+r[2])+r[3])+r[4])+r[5];
116+
s=(t+big.x)-big.x;
117+
sum+=s;
118+
t-=s;
119+
b=t+bb;
120+
bb=(t-b)+bb;
121+
s=(sum+big1.x)-big1.x;
122+
sum-=s;
123+
b1=b;
124+
bb1=bb;
125+
sum1=sum;
126+
sum=0;
127+
128+
u.x = x2;
129+
k = (u.i[HIGH_HALF]>>20)&2047;
130+
k = (k-450)/24;
131+
if (k<0)
132+
k=0;
133+
gor.x = t576.x;
134+
gor.i[HIGH_HALF] -= ((k*24)<<20);
135+
for (i=0;i<6;i++)
136+
{ r[i] = x2*toverp[k+i]*gor.x; gor.x *= tm24.x; }
137+
for (i=0;i<3;i++) {
138+
s=(r[i]+big.x)-big.x;
139+
sum+=s;
140+
r[i]-=s;
141+
}
142+
t=0;
143+
for (i=0;i<6;i++)
144+
t+=r[5-i];
145+
bb=(((((r[0]-t)+r[1])+r[2])+r[3])+r[4])+r[5];
146+
s=(t+big.x)-big.x;
147+
sum+=s;
148+
t-=s;
149+
b=t+bb;
150+
bb=(t-b)+bb;
151+
s=(sum+big1.x)-big1.x;
152+
sum-=s;
153+
154+
b2=b;
155+
bb2=bb;
156+
sum2=sum;
157+
158+
sum=sum1+sum2;
159+
b=b1+b2;
160+
bb = (fabs(b1)>fabs(b2))? (b1-b)+b2 : (b2-b)+b1;
161+
if (b > 0.5)
162+
{b-=1.0; sum+=1.0;}
163+
else if (b < -0.5)
164+
{b+=1.0; sum-=1.0;}
165+
s=b+(bb+bb1+bb2);
166+
t=((b-s)+bb)+(bb1+bb2);
167+
b=s*split;
168+
t1=b-(b-s);
169+
t2=s-t1;
170+
b=s*hp0.x;
171+
bb=(((t1*mp1.x-b)+t1*mp2.x)+t2*mp1.x)+(t2*mp2.x+s*hp1.x+t*hp0.x);
172+
s=b+bb;
173+
t=(b-s)+bb;
174+
*a=s;
175+
*aa=t;
176+
return ((int) sum)&3; /* return quater of unit circle */
177+
}
178+
179+
};
180+
};

0 commit comments

Comments
 (0)