Skip to content

Commit 20a0937

Browse files
authored
[cherry-pick] NV JETSON support and auto_growth strategy for inference. (#21500)
* ADD NV JETSON SUPPORT test=release/1.6 * CHERRY_PICK: specify the auto growth allocator for inference. test=release/1.6
1 parent 3f1169f commit 20a0937

File tree

10 files changed

+52
-47
lines changed

10 files changed

+52
-47
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
6969
option(ON_INFER "Turn on inference optimization." OFF)
7070
################################ Internal Configurations #######################################
7171
option(WITH_AMD_GPU "Compile PaddlePaddle with AMD GPU" OFF)
72+
option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
7273
option(WITH_NGRAPH "Compile PaddlePaddle with nGraph support." OFF)
7374
option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools" OFF)
7475
option(WITH_COVERAGE "Compile PaddlePaddle with code coverage" OFF)

cmake/cuda.cmake

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,20 @@ if(NOT WITH_GPU)
22
return()
33
endif()
44

5-
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
6-
set(paddle_known_gpu_archs7 "30 35 50 52")
7-
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
8-
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
9-
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
5+
6+
if (WITH_NV_JETSON)
7+
set(paddle_known_gpu_archs "53 62 72")
8+
set(paddle_known_gpu_archs7 "53")
9+
set(paddle_known_gpu_archs8 "53 62")
10+
set(paddle_known_gpu_archs9 "53 62")
11+
set(paddle_known_gpu_archs10 "53 62 72")
12+
else()
13+
set(paddle_known_gpu_archs "30 35 50 52 60 61 70")
14+
set(paddle_known_gpu_archs7 "30 35 50 52")
15+
set(paddle_known_gpu_archs8 "30 35 50 52 60 61")
16+
set(paddle_known_gpu_archs9 "30 35 50 52 60 61 70")
17+
set(paddle_known_gpu_archs10 "30 35 50 52 60 61 70 75")
18+
endif()
1019

1120
######################################################################################
1221
# A function for automatic detection of GPUs installed (if autodetection is enabled)

cmake/external/openblas.cmake

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ IF(NOT ${CBLAS_FOUND})
3333

3434
IF (NOT WIN32)
3535
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -Wno-unused-but-set-variable -Wno-unused-variable")
36-
SET(OPENBLAS_COMMIT "v0.2.20")
36+
SET(OPENBLAS_COMMIT "v0.3.7")
3737

3838
IF(APPLE)
3939
SET(OPENBLAS_CC "${CMAKE_C_COMPILER} -isysroot ${CMAKE_OSX_SYSROOT}")
@@ -54,7 +54,6 @@ IF(NOT ${CBLAS_FOUND})
5454
BUILD_IN_SOURCE 1
5555
BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} ${COMMON_ARGS} ${OPTIONAL_ARGS}
5656
INSTALL_COMMAND ${CMAKE_MAKE_PROGRAM} install NO_SHARED=1 NO_LAPACK=1 PREFIX=<INSTALL_DIR>
57-
&& rm -r ${CBLAS_INSTALL_DIR}/lib/cmake ${CBLAS_INSTALL_DIR}/lib/pkgconfig
5857
UPDATE_COMMAND ""
5958
CONFIGURE_COMMAND ""
6059
)

cmake/flags.cmake

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,9 @@ set(GPU_COMMON_FLAGS
187187
-Wno-error=unused-function # Warnings in Numpy Header.
188188
-Wno-error=array-bounds # Warnings in Eigen::array
189189
)
190-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
190+
if (NOT WITH_NV_JETSON)
191+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
192+
endif()
191193
endif(NOT WIN32)
192194

193195
if (APPLE)

paddle/fluid/framework/CMakeLists.txt

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -140,26 +140,28 @@ cc_library(op_call_stack SRCS op_call_stack.cc DEPS op_proto_maker enforce)
140140

141141
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
142142

143-
py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto)
144-
py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
143+
if(WITH_PYTHON)
144+
py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto)
145+
py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
145146
#Generate an empty \
146147
#__init__.py to make framework_py_proto as a valid python module.
147-
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
148-
add_dependencies(framework_py_proto framework_py_proto_init)
149-
if (NOT WIN32)
150-
add_custom_command(TARGET framework_py_proto POST_BUILD
148+
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
149+
add_dependencies(framework_py_proto framework_py_proto_init)
150+
if (NOT WIN32)
151+
add_custom_command(TARGET framework_py_proto POST_BUILD
151152
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
152153
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/
153154
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
154155
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
155-
else(NOT WIN32)
156-
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/")
157-
add_custom_command(TARGET framework_py_proto POST_BUILD
156+
else(NOT WIN32)
157+
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/")
158+
add_custom_command(TARGET framework_py_proto POST_BUILD
158159
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto
159160
COMMAND copy /Y *.py ${proto_dstpath}
160161
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
161162
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
162-
endif(NOT WIN32)
163+
endif(NOT WIN32)
164+
endif()
163165

164166
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
165167

paddle/fluid/framework/io/shell.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace framework {
1919

2020
std::shared_ptr<FILE> shell_fopen(const std::string& path,
2121
const std::string& mode) {
22-
#if defined _WIN32 || defined __APPLE__
22+
#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM
2323
return nullptr;
2424
#else
2525
if (shell_verbose()) {
@@ -44,7 +44,7 @@ std::shared_ptr<FILE> shell_fopen(const std::string& path,
4444
// The implementation is async signal safe
4545
// Mostly copy from CPython code
4646
static int close_open_fds_internal() {
47-
#if defined _WIN32 || defined __APPLE__
47+
#if defined _WIN32 || defined __APPLE__ || defined PADDLE_ARM
4848
return 0;
4949
#else
5050
struct linux_dirent {

paddle/fluid/framework/io/shell.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@
3131
#include "paddle/fluid/platform/port.h"
3232
#include "paddle/fluid/string/string_helper.h"
3333

34+
#if defined(__arm__) || defined(__aarch64__) || defined(__ARM_NEON) || \
35+
defined(__ARM_NEON__)
36+
#define PADDLE_ARM
37+
#endif
38+
3439
namespace paddle {
3540
namespace framework {
3641

paddle/fluid/inference/api/analysis_predictor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
500500
std::string flag = "--fraction_of_gpu_memory_to_use=" +
501501
std::to_string(fraction_of_gpu_memory);
502502
flags.push_back(flag);
503+
// use auto growth strategy here.
504+
flags.push_back("--allocator_strategy=auto_growth");
503505
flags.push_back("--cudnn_deterministic=True");
504506
VLOG(3) << "set flag: " << flag;
505507
framework::InitGflags(flags);

paddle/fluid/platform/CMakeLists.txt

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,27 @@
11
proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto simple_threadpool)
2-
py_proto_compile(profiler_py_proto SRCS profiler.proto)
32
proto_library(error_codes_proto SRCS error_codes.proto)
43

5-
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
6-
add_dependencies(profiler_py_proto profiler_py_proto_init)
74

8-
if (NOT WIN32)
9-
add_custom_command(TARGET profiler_py_proto POST_BUILD
5+
if (WITH_PYTHON)
6+
py_proto_compile(profiler_py_proto SRCS profiler.proto)
7+
add_custom_target(profiler_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
8+
add_dependencies(profiler_py_proto profiler_py_proto_init)
9+
10+
if (NOT WIN32)
11+
add_custom_command(TARGET profiler_py_proto POST_BUILD
1012
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
1113
COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
1214
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
1315
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
14-
else(NOT WIN32)
15-
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
16-
add_custom_command(TARGET profiler_py_proto POST_BUILD
16+
else(NOT WIN32)
17+
string(REPLACE "/" "\\" proto_dstpath "${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler/")
18+
add_custom_command(TARGET profiler_py_proto POST_BUILD
1719
COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler
1820
COMMAND copy /Y *.py ${proto_dstpath}
1921
COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler."
2022
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
21-
endif(NOT WIN32)
23+
endif(NOT WIN32)
24+
endif()
2225

2326
cc_library(flags SRCS flags.cc DEPS gflags)
2427

paddle/fluid/platform/float16.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,24 +38,6 @@ limitations under the License. */
3838
#include <cuda_fp16.h>
3939
#endif
4040

41-
#if defined(__arm__) || defined(__aarch64__)
42-
#define PADDLE_ARM
43-
#endif
44-
45-
#if defined(__ARM_NEON) || defined(__ARM_NEON__)
46-
#define PADDLE_NEON
47-
#include <arm_neon.h>
48-
#endif
49-
50-
#if defined(PADDLE_NEON) && defined(PADDLE_ARM_FP16) && \
51-
(PADDLE_GNUC_VER >= 62 || PADDLE_CLANG_VER >= 37)
52-
#define PADDLE_WITH_NATIVE_FP16
53-
#endif
54-
55-
#ifndef PADDLE_ARM
56-
#include <immintrin.h>
57-
#endif // PADDLE_ARM
58-
5941
#if !defined(_WIN32)
6042
#define PADDLE_ALIGN(x) __attribute__((aligned(x)))
6143
#else

0 commit comments

Comments
 (0)