Skip to content

Commit bcb8ea3

Browse files
committed
Merge remote-tracking branch 'ups/develop' into fea/jitkernel_peephole
test=develop
2 parents 8e18217 + 5f2e837 commit bcb8ea3

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2368
-284
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ third_party/
2525
bazel-*
2626
third_party/
2727

28+
build_*
2829
# clion workspace.
2930
cmake-build-*

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ option(WITH_INFERENCE "Compile fluid inference library" ON)
7272
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7373
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7474
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
75+
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
7576

7677
# PY_VERSION
7778
if(NOT PY_VERSION)

cmake/cuda.cmake

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,10 @@ list(APPEND CUDA_NVCC_FLAGS "-std=c++11")
175175
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
176176
endif(NOT WIN32)
177177

178-
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
178+
if(WITH_FAST_MATH)
179+
# Make use of fast math library. https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html
180+
list(APPEND CUDA_NVCC_FLAGS "--use_fast_math")
181+
endif()
179182
# in cuda9, suppress cuda warning on eigen
180183
list(APPEND CUDA_NVCC_FLAGS "-w")
181184
# Set :expt-relaxed-constexpr to suppress Eigen warnings

cmake/external/eigen.cmake

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,14 @@ INCLUDE(ExternalProject)
33
SET(EIGEN_SOURCE_DIR ${THIRD_PARTY_PATH}/eigen3)
44
SET(EIGEN_INCLUDE_DIR ${EIGEN_SOURCE_DIR}/src/extern_eigen3)
55
INCLUDE_DIRECTORIES(${EIGEN_INCLUDE_DIR})
6+
if(NOT WITH_FAST_MATH)
7+
# EIGEN_FAST_MATH: https://eigen.tuxfamily.org/dox/TopicPreprocessorDirectives.html
8+
# enables some optimizations which might affect the accuracy of the result.
9+
# This currently enables the SSE vectorization of sin() and cos(),
10+
# and speedups sqrt() for single precision.
11+
# Defined to 1 by default. Define it to 0 to disable.
12+
add_definitions(-DEIGEN_FAST_MATH=0)
13+
endif()
614

715
if(WITH_AMD_GPU)
816
ExternalProject_Add(

cmake/flags.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,8 @@ if (APPLE)
157157
# On Mac OS X build fat binaries with x86_64 architectures by default.
158158
set (CMAKE_OSX_ARCHITECTURES "x86_64" CACHE STRING "Build architectures for OSX" FORCE)
159159
endif()
160+
# On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
161+
set (COMMON_FLAGS -Wno-deprecated-register)
160162
endif(APPLE)
161163

162164
if(LINUX)

paddle/fluid/API.spec

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
198198
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
199199
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
200200
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
201+
paddle.fluid.layers.has_inf ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
202+
paddle.fluid.layers.has_nan ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
203+
paddle.fluid.layers.isfinite ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None)
201204
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None))
202205
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
203206
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))

paddle/fluid/framework/data_type.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ limitations under the License. */
1717
#include <typeindex>
1818
#include "paddle/fluid/framework/framework.pb.h"
1919
#include "paddle/fluid/platform/enforce.h"
20-
2120
#include "paddle/fluid/platform/float16.h"
2221

2322
namespace paddle {

paddle/fluid/framework/ir/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ pass_library(fc_lstm_fuse_pass inference)
3838
pass_library(embedding_fc_lstm_fuse_pass inference)
3939
pass_library(fc_gru_fuse_pass inference)
4040
pass_library(seq_concat_fc_fuse_pass inference)
41+
pass_library(conv_bn_fuse_pass inference)
4142

4243
cc_library(fuse_elewise_add_act_pass SRCS fuse_elewise_add_act_pass.cc DEPS pass graph_pattern_detector )
4344

0 commit comments

Comments
 (0)