Skip to content

Commit 7951eda

Browse files
committed
moved flags to specific functions
1 parent 86ab8aa commit 7951eda

File tree

2 files changed

+10
-12
lines changed

2 files changed

+10
-12
lines changed

CMakeLists.txt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,6 @@ if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS)
1212
-funroll-loops
1313
-ffp-contract=fast
1414
-fexcess-precision=fast
15-
-fno-math-errno
16-
-fno-signed-zeros
17-
-fno-trapping-math
18-
-fassociative-math
19-
-freciprocal-math
20-
# These flags make the spreader 20% faster in 2D and up to 100% faster in 3D with no loss of accuracy.
21-
# Why no -fast-math or -funsafe-math-optimizations ?
22-
# It breaks the code, and influences other software that depend on finufft bu changing the rounding mode.
23-
# GCC-13 recently fixed this issue:
24-
# https://github.com/llvm/llvm-project/issues/57589
25-
# https://gcc.gnu.org/gcc-13/changes.html
26-
# https://trofi.github.io/posts/302-Ofast-and-ffast-math-non-local-effects.html
2715
)
2816
set(FINUFFT_CXX_FLAGS_RELWITHDEBINFO -g ${FINUFFT_CXX_FLAGS_RELEASE})
2917
endif ()

src/spreadinterp.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,11 @@ FINUFFT_NEVER_INLINE static void interp_square_wrap(
10141014
target[1] = out[1];
10151015
}
10161016

1017+
#pragma GCC optimize("no-math-errno")
1018+
#pragma GCC optimize("no-signed-zeros")
1019+
#pragma GCC optimize("no-trapping-math")
1020+
#pragma GCC optimize("associative-math")
1021+
#pragma GCC optimize("reciprocal-math")
10171022
template<uint8_t ns, class simd_type>
10181023
void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
10191024
const FLT *ker2, const BIGINT i1, const BIGINT i2, const UBIGINT N1,
@@ -1213,6 +1218,11 @@ FINUFFT_NEVER_INLINE static void interp_cube_wrapped(
12131218
target[1] = out[1];
12141219
}
12151220

1221+
#pragma GCC optimize("no-math-errno")
1222+
#pragma GCC optimize("no-signed-zeros")
1223+
#pragma GCC optimize("no-trapping-math")
1224+
#pragma GCC optimize("associative-math")
1225+
#pragma GCC optimize("reciprocal-math")
12161226
template<uint8_t ns, class simd_type>
12171227
void interp_cube(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
12181228
const FLT *ker2, const FLT *ker3, const BIGINT i1, const BIGINT i2,

0 commit comments

Comments
 (0)