Skip to content

Commit 28889ca

Browse files
committed
disable EIGEN_FAST_MATH and use_fast_math
test=develop
1 parent d770b9b commit 28889ca

File tree

4 files changed

+16
-1
lines changed

4 files changed

+16
-1
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
7272
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7373
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7474
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
75+
option(WITH_FAST_MATH "Make use of fast math library" OFF)
7576

7677
# PY_VERSION
7778
if(NOT PY_VERSION)

cmake/cuda.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
175175
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
176176
endif(NOT WIN32)
177177

178-
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
178+
if(WITH_FAST_MATH)
179+
# Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
180+
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
181+
endif()
179182
# in cuda9, suppress cuda warning on eigen
180183
list(APPEND CUDA_NVCC_FLAGS "-w")
181184
# Set :expt-relaxed-constexpr to suppress Eigen warnings

cmake/external/eigen.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ INCLUDE(ExternalProject)
33
SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
44
SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
55
INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
6+
if(NOT WITH_FAST_MATH)
7+
# EIGEN_FAST_MATH: https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html
8+
# enables some optimizations which might affect the accuracy of the result.
9+
# This currently enables the SSE vectorization of sin() and cos(),
10+
# and speedups sqrt() for single precision.
11+
# Defined to 1 by default. Define it to 0 to disable.
12+
add_definitions(-DEIGEN_FAST_MATH=0)
13+
endif()
614

715
if(WITH_AMD_GPU)
816
ExternalProject_Add(

paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ void SetConfig(AnalysisConfig *cfg) {
2727
cfg->device = 0;
2828
cfg->enable_ir_optim = true;
2929
cfg->specify_input_name = true;
30+
#ifdef PADDLE_WITH_MKLDNN
31+
cfg->_use_mkldnn = true;
32+
#endif
3033
}
3134

3235
void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {

0 commit comments

Comments
 (0)