Skip to content

Commit fbb75c6

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into feature/refine_gather_reduce
2 parents 4760ac4 + 9ced6da commit fbb75c6

File tree

106 files changed

+3022
-528
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+3022
-528
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ set(EXTERNAL_LIBS
179179

180180
if(WITH_GPU)
181181
include(cuda)
182+
include(tensorrt)
182183
endif(WITH_GPU)
183184

184185
if(WITH_AMD_GPU)

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,13 @@ ENV PATH=${PATH}:${GOROOT}/bin:${GOPATH}/bin
4545
# install glide
4646
RUN curl -s -q https://glide.sh/get | sh
4747

48+
# Install TensorRT
49+
# The unnecessary files has been removed to make the library small. It only contains include and lib now.
50+
RUN wget -qO- http://paddlepaddledeps.bj.bcebos.com/TensorRT-4.0.0.3.Ubuntu-16.04.4.x86_64-gnu.cuda-8.0.cudnn7.0.tar.gz | \
51+
tar -xz -C /usr/local && \
52+
cp -rf /usr/local/TensorRT/include /usr && \
53+
cp -rf /usr/local/TensorRT/lib /usr
54+
4855
# git credential to skip password typing
4956
RUN git config --global credential.helper store
5057

Dockerfile.android

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ RUN git config --global credential.helper store
2727
# Fix locales to en_US.UTF-8
2828
RUN localedef -i en_US -f UTF-8 en_US.UTF-8
2929

30-
RUN pip install --upgrade pip && \
30+
RUN pip install --upgrade pip==9.0.3 && \
3131
pip install -U 'protobuf==3.1.0' && \
3232
pip install -U wheel sphinx && \
3333
pip install pre-commit
File renamed without changes.

cmake/configure.cmake

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,16 @@ if(WITH_GPU)
8080
# Include cuda and cudnn
8181
include_directories(${CUDNN_INCLUDE_DIR})
8282
include_directories(${CUDA_TOOLKIT_INCLUDE})
83+
84+
if(TENSORRT_FOUND)
85+
if(${CUDA_VERSION_MAJOR} VERSION_LESS 8)
86+
message(FATAL_ERROR "TensorRT needs CUDA >= 8.0 to compile")
87+
endif()
88+
if(${CUDNN_MAJOR_VERSION} VERSION_LESS 7)
89+
message(FATAL_ERROR "TensorRT needs CUDNN >= 7.0 to compile")
90+
endif()
91+
include_directories(${TENSORRT_INCLUDE_DIR})
92+
endif()
8393
elseif(WITH_AMD_GPU)
8494
add_definitions(-DPADDLE_WITH_HIP)
8595
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D__HIP_PLATFORM_HCC__")

cmake/tensorrt.cmake

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
if(NOT WITH_GPU)
2+
return()
3+
endif()
4+
5+
set(TENSORRT_ROOT "/usr" CACHE PATH "TENSORRT ROOT")
6+
find_path(TENSORRT_INCLUDE_DIR NvInfer.h
7+
PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/include
8+
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/include
9+
NO_DEFAULT_PATH
10+
)
11+
12+
find_library(TENSORRT_LIBRARY NAMES libnvinfer.so libnvinfer.a
13+
PATHS ${TENSORRT_ROOT} ${TENSORRT_ROOT}/lib
14+
$ENV{TENSORRT_ROOT} $ENV{TENSORRT_ROOT}/lib
15+
NO_DEFAULT_PATH
16+
DOC "Path to TensorRT library.")
17+
18+
if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY)
19+
set(TENSORRT_FOUND ON)
20+
else()
21+
set(TENSORRT_FOUND OFF)
22+
endif()
23+
24+
if(TENSORRT_FOUND)
25+
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
26+
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)" TENSORRT_MAJOR_VERSION
27+
"${TENSORRT_VERSION_FILE_CONTENTS}")
28+
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
29+
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
30+
31+
message(STATUS "Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
32+
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}. ")
33+
endif()

doc/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ add_custom_target(paddle_apis ALL
33

44
add_custom_target(paddle_docs ALL
55
DEPENDS paddle_v2_docs paddle_v2_docs_cn
6-
paddle_fluid_docs paddle_fluid_docs_cn)
6+
paddle_fluid_docs paddle_fluid_docs_cn
7+
paddle_mobile_docs paddle_mobile_docs_cn)
78

89
add_subdirectory(v2)
910
add_subdirectory(fluid)
11+
add_subdirectory(mobile)

doc/fluid/api/initializer.rst

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,45 @@ Xavier
3333
:members:
3434
:noindex:
3535

36+
MSRA
37+
------
38+
39+
.. autoclass:: paddle.fluid.initializer.MSRA
40+
:members:
41+
:noindex:
42+
43+
ConstantInitializer
44+
-------------------
45+
46+
.. autoclass:: paddle.fluid.initializer.ConstantInitializer
47+
:members:
48+
:noindex:
49+
50+
UniformInitializer
51+
------------------
52+
53+
.. autoclass:: paddle.fluid.initializer.UniformInitializer
54+
:members:
55+
:noindex:
56+
57+
NormalInitializer
58+
-----------------
59+
60+
.. autoclass:: paddle.fluid.initializer.NormalInitializer
61+
:members:
62+
:noindex:
63+
64+
XavierInitializer
65+
-----------------
66+
67+
.. autoclass:: paddle.fluid.initializer.XavierInitializer
68+
:members:
69+
:noindex:
70+
MSRA
71+
------
72+
73+
MSRAInitializer
74+
-----------------
75+
.. autoclass:: paddle.fluid.initializer.MSRAInitializer
76+
:members:
77+
:noindex:

doc/fluid/api/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -815,3 +815,8 @@ zeros
815815
.. autofunction:: paddle.fluid.layers.zeros
816816
:noindex:
817817

818+
topk
819+
----
820+
821+
.. autofunction:: paddle.fluid.layers.topk
822+
:noindex:

doc/fluid/design/concepts/parallel_executor.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ Running an operator can be asynchronized. There is a thread pool to execute an `
8484
8585
## Synchronize GPU Kernels
8686
87-
The GPU is a non-blocking device. The different streams need be synchronized when switing streams. In current implementation, the synchronization based on the following algorithm:
87+
The GPU is a non-blocking device. The different streams need be synchronized when switching streams. In current implementation, the synchronization based on the following algorithm:
8888
8989
1. `OpHandle` will record `DeviceContext` that it is used.
9090
2. In `OpHandle::Run`, if the `DeviceContext` of current operator is different from `DeviceContext` of any input variable, just wait the generate operator of this input variable.

0 commit comments

Comments
 (0)