moved flags to specific functions

DiamonDinoia · DiamonDinoia · commit 7951eda91348 · 2024-07-03T12:21:28.000-04:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -12,18 +12,6 @@ if (CMAKE_CXX_COMPILER_ID IN_LIST GNU_LIKE_FRONTENDS)
             -funroll-loops
             -ffp-contract=fast
             -fexcess-precision=fast
-            -fno-math-errno
-            -fno-signed-zeros
-            -fno-trapping-math
-            -fassociative-math
-            -freciprocal-math
-            # These flags make the spreader 20% faster in 2D and up to 100% faster in 3D with no loss of accuracy.
-            # Why no -fast-math or -funsafe-math-optimizations ?
-            # It breaks the code, and influences other software that depend on finufft bu changing the rounding mode.
-            # GCC-13 recently fixed this issue:
-            # https://github.com/llvm/llvm-project/issues/57589
-            # https://gcc.gnu.org/gcc-13/changes.html
-            # https://trofi.github.io/posts/302-Ofast-and-ffast-math-non-local-effects.html
     )
     set(FINUFFT_CXX_FLAGS_RELWITHDEBINFO -g ${FINUFFT_CXX_FLAGS_RELEASE})
 endif ()
diff --git a/src/spreadinterp.cpp b/src/spreadinterp.cpp
@@ -1014,6 +1014,11 @@ FINUFFT_NEVER_INLINE static void interp_square_wrap(
   target[1] = out[1];
 }
 
+#pragma GCC optimize("no-math-errno")
+#pragma GCC optimize("no-signed-zeros")
+#pragma GCC optimize("no-trapping-math")
+#pragma GCC optimize("associative-math")
+#pragma GCC optimize("reciprocal-math")
 template<uint8_t ns, class simd_type>
 void interp_square(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
                    const FLT *ker2, const BIGINT i1, const BIGINT i2, const UBIGINT N1,
@@ -1213,6 +1218,11 @@ FINUFFT_NEVER_INLINE static void interp_cube_wrapped(
   target[1] = out[1];
 }
 
+#pragma GCC optimize("no-math-errno")
+#pragma GCC optimize("no-signed-zeros")
+#pragma GCC optimize("no-trapping-math")
+#pragma GCC optimize("associative-math")
+#pragma GCC optimize("reciprocal-math")
 template<uint8_t ns, class simd_type>
 void interp_cube(FLT *FINUFFT_RESTRICT target, const FLT *du, const FLT *ker1,
                  const FLT *ker2, const FLT *ker3, const BIGINT i1, const BIGINT i2,