Faster Transformer cmake promote (PaddlePaddle#879)

FrostML · web-flow · commit 74dc99747baf · 2021-09-15T11:43:57.000+08:00
* update cmake

* rm python deps when on_infer is no

* refine encoder code
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ __pycache__/
 # Distribution / packaging
 .Python
 build/
+build*
 develop-eggs/
 dist/
 downloads/
diff --git a/paddlenlp/ops/CMakeLists.txt b/paddlenlp/ops/CMakeLists.txt
@@ -20,14 +20,16 @@ INCLUDE(ExternalProject)
 
 set(CXX_STD "14" CACHE STRING "C++ standard")
 
-option(ON_INFER         "Compile with inference. "                                OFF)
-option(WITH_GPU         "Compile with GPU/CPU, default use CPU."                  ON)
-option(USE_TENSORRT     "Compile with TensorRT."                                  OFF)
-option(WITH_TRANSFORMER "Compile with Transformer"                                ON)
-option(WITH_GPT         "Compile with GPT"                                        OFF)
-option(WITH_UNIFIED         "Compile with Unified Transformer"                        ON)
-option(WITH_DECODER     "Compile with Transformer Decoder"                        ON)
-option(WITH_ENCODER     "Compile with Transformer Encoder"                        ON)
+option(ON_INFER         "Compiled with inference. "                                 OFF)
+option(WITH_GPU         "Compiled with GPU/CPU, default use CPU."                   ON)
+option(WITH_MKL         "Compile with MKL. Only works when ON_INFER is ON."         ON)
+option(USE_TENSORRT     "Compiled with TensorRT."                                   OFF)
+option(WITH_TRANSFORMER "Compiled with Transformer."                                ON)
+option(WITH_GPT         "Compiled with GPT."                                        OFF)
+option(WITH_UNIFIED     "Compiled with Unified Transformer."                        ON)
+option(WITH_SP          "Compiled with sentencepiece. Only works when WITH_GPT and ON_INFER is ON." ON)
+option(WITH_DECODER     "Compile with Transformer Decoder"                          ON)
+option(WITH_ENCODER     "Compile with Transformer Encoder"                          ON)
 
 if(NOT WITH_GPU)
   message(FATAL_ERROR "Faster transformer custom op doesn't support CPU. Please add the flag -DWITH_GPU=ON to use GPU. ")
@@ -259,7 +261,7 @@ ExternalProject_Add(
   PATCH_COMMAND     ${FT_PATCH_COMMAND}
   BINARY_DIR        ${THIRD_PATH}/build/${THIRD_PARTY_NAME}
   INSTALL_COMMAND   ""
-  CMAKE_ARGS        -DCMAKE_BUILD_TYPE=Release -DSM=${SM} -DBUILD_PD=ON -DPY_CMD=${PY_CMD}
+  CMAKE_ARGS        -DCMAKE_BUILD_TYPE=Release -DSM=${SM} -DBUILD_PD=ON -DPY_CMD=${PY_CMD} -DON_INFER=${ON_INFER} -DPADDLE_LIB=${PADDLE_LIB} -DWITH_MKL=${WITH_MKL} -DWITH_WITH_STATIC_LIB=${WITH_STATIC_LIB}
 )
 ExternalProject_Get_property(extern_${THIRD_PARTY_NAME} BINARY_DIR)
 ExternalProject_Get_property(extern_${THIRD_PARTY_NAME} SOURCE_DIR)
@@ -276,7 +278,7 @@ link_directories(
   ${FT_LIB_PATH}
 )
 
-if(ON_INFER AND WITH_GPT)
+if(ON_INFER AND WITH_GPT AND WITH_SP)
   ExternalProject_Add(
     extern_sentencepiece
     GIT_REPOSITORY    https://github.com/google/sentencepiece.git
@@ -293,6 +295,8 @@ if(ON_INFER AND WITH_GPT)
   link_directories(
     ${THIRD_PATH}/build/sentencepiece/src/
   )
+
+  add_definitions(-DGPT_ON_SENTENCEPIECE)
 endif()
 
 add_subdirectory(faster_transformer)
diff --git a/paddlenlp/ops/faster_transformer/src/CMakeLists.txt b/paddlenlp/ops/faster_transformer/src/CMakeLists.txt
@@ -145,9 +145,9 @@ if(ON_INFER)
     set(DEPS ${DEPS} shlwapi.lib)
   endif(NOT WIN32)
 
-  cuda_add_library(pd_infer_custom_op ${decoding_op_files} ${decoder_op_files} SHARED)
+  cuda_add_library(decoding_infer_op ${decoding_op_files} ${decoder_op_files} SHARED)
+  add_dependencies(decoding_infer_op extern_${THIRD_PARTY_NAME})
 
-  add_dependencies(pd_infer_custom_op extern_${THIRD_PARTY_NAME})
   string(REPLACE "/" ";" DEMO_PATH ${DEMO})
 
   list(LENGTH DEMO_PATH PATH_LEN)
@@ -157,9 +157,9 @@ if(ON_INFER)
   string(REPLACE "." ";" DEMO_NAME ${DEMO_NAME})
   list(GET DEMO_NAME 0 DEMO_NAME)
   add_executable(${DEMO_NAME} ${DEMO})
-  set(DEPS pd_infer_custom_op ${ft_lib_link} boost ${DEPS} cublas cudart)
+  set(DEPS decoding_infer_op ${ft_lib_link} boost ${DEPS} cublas cudart)
 
-  if(WITH_GPT)
+  if(WITH_GPT AND WITH_SP)
     set(DEPS ${DEPS} sentencepiece)
   endif()
 
diff --git a/paddlenlp/ops/faster_transformer/src/demo/gpt.cc b/paddlenlp/ops/faster_transformer/src/demo/gpt.cc
@@ -1,5 +1,4 @@
 #include <pthread.h>
-#include <sentencepiece_processor.h>
 #include <algorithm>
 #include <atomic>
 #include <codecvt>
@@ -12,6 +11,10 @@
 #include <thread>
 #include <unordered_map>
 
+#ifdef GPT_ON_SENTENCEPIECE
+#include <sentencepiece_processor.h>
+#endif
+
 #include "helper.h"
 
 #include <sys/time.h>
diff --git a/paddlenlp/ops/faster_transformer/src/fusion_encoder_op.cc b/paddlenlp/ops/faster_transformer/src/fusion_encoder_op.cc
@@ -143,20 +143,7 @@ std::vector<paddle::DataType> EncoderInferDtype(
   // const paddle::DataType& sequence_id_offset,
   // const paddle::DataType& trt_seqlen_offset,
   // const paddle::DataType& amax_list) {
-  switch (input) {
-    case paddle::DataType::FLOAT16: {
-      return {input};
-    }
-    case paddle::DataType::FLOAT32: {
-      return {input};
-    }
-    default: {
-      PD_THROW(
-          "NOT supported data type. "
-          "Only float16 and float32 are supported. ");
-      break;
-    }
-  }
+  return {input};
 }
 
 PD_BUILD_OP(fusion_encoder)
diff --git a/paddlenlp/ops/patches/FasterTransformer/CMakeLists.txt b/paddlenlp/ops/patches/FasterTransformer/CMakeLists.txt
@@ -110,33 +110,107 @@ endif()
 if(BUILD_PD)
   add_definitions(-DPADDLE_WITH_CUDA)
 
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import paddle; print(paddle.sysconfig.get_include())"
-                  RESULT_VARIABLE _INC_PYTHON_SUCCESS
-                  OUTPUT_VARIABLE _INC_PYTHON_VALUES)
-  if (NOT _INC_PYTHON_SUCCESS MATCHES 0)
-      message(FATAL_ERROR "Python config Error.")
-  endif()
-  string(REGEX REPLACE ";" "\\\\;" _INC_PYTHON_VALUES ${_INC_PYTHON_VALUES})
-  string(REGEX REPLACE "\n" ";" _INC_PYTHON_VALUES ${_INC_PYTHON_VALUES})
-  list(GET _INC_PYTHON_VALUES 0 PY_INCLUDE_DIR)
-
-  list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR})
-  list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR}/third_party)
-
-  execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import paddle; print(paddle.sysconfig.get_lib())"
-                  RESULT_VARIABLE _LIB_PYTHON_SUCCESS
-                  OUTPUT_VARIABLE _LIB_PYTHON_VALUES)
-  if (NOT _LIB_PYTHON_SUCCESS MATCHES 0)
-      message(FATAL_ERROR "Python config Error.")
-  endif()
-  string(REGEX REPLACE ";" "\\\\;" _LIB_PYTHON_VALUES ${_LIB_PYTHON_VALUES})
-  string(REGEX REPLACE "\n" ";" _LIB_PYTHON_VALUES ${_LIB_PYTHON_VALUES})
-  list(GET _LIB_PYTHON_VALUES 0 PY_LIB_DIR)
-  list(APPEND COMMON_LIB_DIRS ${PY_LIB_DIR})
-
-  include_directories(${PY_INCLUDE_DIR})
-  include_directories(${PY_INCLUDE_DIR}\third_party)
+  if(ON_INFER)
+    add_definitions(-DPADDLE_ON_INFERENCE)
+
+    link_directories(${COMMON_LIB_DIRS})
+
+    if(NOT WITH_STATIC_LIB)
+      add_definitions("-DPADDLE_WITH_SHARED_LIB")
+    else()
+      # PD_INFER_DECL is mainly used to set the dllimport/dllexport attribute in dynamic library mode.
+      # Set it to empty in static library mode to avoid compilation issues.
+      add_definitions("/DPD_INFER_DECL=")
+    endif()
+
+    macro(safe_set_static_flag)
+        foreach(flag_var
+            CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
+            CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO)
+          if(${flag_var} MATCHES "/MD")
+            string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
+          endif(${flag_var} MATCHES "/MD")
+        endforeach(flag_var)
+    endmacro()
+
+    if(NOT DEFINED PADDLE_LIB)
+      message(FATAL_ERROR "please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib")
+    endif()
+
+    include_directories("${PADDLE_LIB}/")
+    set(PADDLE_LIB_THIRD_PARTY_PATH "${PADDLE_LIB}/third_party/install/")
+    include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/include")
+    include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/include")
+    include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/include")
+    include_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/include")
+
+    link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}protobuf/lib")
+    link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}glog/lib")
+    link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}gflags/lib")
+    link_directories("${PADDLE_LIB_THIRD_PARTY_PATH}xxhash/lib")
+    link_directories("${PADDLE_LIB}/paddle/lib")
+
+    if(WITH_MKL)
+      set(FLAG_OPENMP "-fopenmp")
+    endif()
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 ${FLAG_OPENMP}")
+
+    if (USE_TENSORRT AND WITH_GPU)
+      set(TENSORRT_ROOT "" CACHE STRING "The root directory of TensorRT library")
+      if("${TENSORRT_ROOT}" STREQUAL "")
+          message(FATAL_ERROR "The TENSORRT_ROOT is empty, you must assign it a value with CMake command. Such as: -DTENSORRT_ROOT=TENSORRT_ROOT_PATH ")
+      endif()
+      set(TENSORRT_INCLUDE_DIR ${TENSORRT_ROOT}/include)
+      set(TENSORRT_LIB_DIR ${TENSORRT_ROOT}/lib)
+    endif()
+
+    if (USE_TENSORRT AND WITH_GPU)
+        include_directories("${TENSORRT_INCLUDE_DIR}")
+        link_directories("${TENSORRT_LIB_DIR}")
+    endif()
+
+    if(WITH_MKL)
+      set(MATH_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mklml")
+      include_directories("${MATH_LIB_PATH}/include")
+      set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn")
+      if(EXISTS ${MKLDNN_PATH})
+        include_directories("${MKLDNN_PATH}/include")
+        set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0)
+      endif()
+    else()
+      set(OPENBLAS_LIB_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}openblas")
+      include_directories("${OPENBLAS_LIB_PATH}/include/openblas")
+    endif()
+
+  else()
+    execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import paddle; print(paddle.sysconfig.get_include())"
+                    RESULT_VARIABLE _INC_PYTHON_SUCCESS
+                    OUTPUT_VARIABLE _INC_PYTHON_VALUES)
+    if (NOT _INC_PYTHON_SUCCESS MATCHES 0)
+        message(FATAL_ERROR "Python config Error.")
+    endif()
+    string(REGEX REPLACE ";" "\\\\;" _INC_PYTHON_VALUES ${_INC_PYTHON_VALUES})
+    string(REGEX REPLACE "\n" ";" _INC_PYTHON_VALUES ${_INC_PYTHON_VALUES})
+    list(GET _INC_PYTHON_VALUES 0 PY_INCLUDE_DIR)
+
+    list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR})
+    list(APPEND COMMON_HEADER_DIRS ${PY_INCLUDE_DIR}/third_party)
+
+    execute_process(COMMAND ${PYTHON_PATH} "-c" "from __future__ import print_function; import paddle; print(paddle.sysconfig.get_lib())"
+                    RESULT_VARIABLE _LIB_PYTHON_SUCCESS
+                    OUTPUT_VARIABLE _LIB_PYTHON_VALUES)
+    if (NOT _LIB_PYTHON_SUCCESS MATCHES 0)
+        message(FATAL_ERROR "Python config Error.")
+    endif()
+    string(REGEX REPLACE ";" "\\\\;" _LIB_PYTHON_VALUES ${_LIB_PYTHON_VALUES})
+    string(REGEX REPLACE "\n" ";" _LIB_PYTHON_VALUES ${_LIB_PYTHON_VALUES})
+    list(GET _LIB_PYTHON_VALUES 0 PY_LIB_DIR)
+    list(APPEND COMMON_LIB_DIRS ${PY_LIB_DIR})
+
+    include_directories(${PY_INCLUDE_DIR})
+    include_directories(${PY_INCLUDE_DIR}\third_party)
 
+  endif()
 endif()
 
 include_directories(