File tree Expand file tree Collapse file tree 4 files changed +16
-1
lines changed
paddle/fluid/inference/tests/api Expand file tree Collapse file tree 4 files changed +16
-1
lines changed Original file line number Diff line number Diff line change @@ -72,6 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
72
72
option (WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF )
73
73
option (WITH_SYSTEM_BLAS "Use system blas library" OFF )
74
74
option (PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION} )
75
+ option (WITH_FAST_MATH "Make use of fast math library" OFF )
75
76
76
77
# PY_VERSION
77
78
if (NOT PY_VERSION )
Original file line number Diff line number Diff line change @@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
175
175
list (APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC" )
176
176
endif (NOT WIN32 )
177
177
178
- list (APPEND CUDA_NVCC_FLAGS "--use_fast_math" )
178
+ if (WITH_FAST_MATH )
179
+ # Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
180
+ list (APPEND CUDA_NVCC_FLAGS "--use_fast_math" )
181
+ endif ()
179
182
# in cuda9, suppress cuda warning on eigen
180
183
list (APPEND CUDA_NVCC_FLAGS "-w" )
181
184
# Set :expt-relaxed-constexpr to suppress Eigen warnings
Original file line number Diff line number Diff line change @@ -3,6 +3,14 @@ INCLUDE(ExternalProject)
3
3
SET (EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH} /eigen3 )
4
4
SET (EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR} /src/extern_eigen3 )
5
5
INCLUDE_DIRECTORIES (${EIGEN_INCLUDE_DIR} )
6
+ if (NOT WITH_FAST_MATH )
7
+ # EIGEN_FAST_MATH: https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html
8
+ # enables some optimizations which might affect the accuracy of the result.
9
+ # This currently enables the SSE vectorization of sin() and cos(),
10
+ # and speedups sqrt() for single precision.
11
+ # Defined to 1 by default. Define it to 0 to disable.
12
+ add_definitions (-DEIGEN_FAST_MATH=0 )
13
+ endif ()
6
14
7
15
if (WITH_AMD_GPU )
8
16
ExternalProject_Add (
Original file line number Diff line number Diff line change @@ -27,6 +27,9 @@ void SetConfig(AnalysisConfig *cfg) {
27
27
cfg->device = 0 ;
28
28
cfg->enable_ir_optim = true ;
29
29
cfg->specify_input_name = true ;
30
+ #ifdef PADDLE_WITH_MKLDNN
31
+ cfg->_use_mkldnn = true ;
32
+ #endif
30
33
}
31
34
32
35
void SetInput (std::vector<std::vector<PaddleTensor>> *inputs) {
You can’t perform that action at this time.
0 commit comments